]> asedeno.scripts.mit.edu Git - linux.git/blob - drivers/net/ethernet/emulex/benet/be_main.c
Merge branches 'pm-core', 'pm-qos', 'pm-domains' and 'pm-opp'
[linux.git] / drivers / net / ethernet / emulex / benet / be_main.c
1 /*
2  * Copyright (C) 2005 - 2016 Broadcom
3  * All rights reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License version 2
7  * as published by the Free Software Foundation.  The full GNU General
8  * Public License is included in this distribution in the file called COPYING.
9  *
10  * Contact Information:
11  * linux-drivers@emulex.com
12  *
13  * Emulex
14  * 3333 Susan Street
15  * Costa Mesa, CA 92626
16  */
17
18 #include <linux/prefetch.h>
19 #include <linux/module.h>
20 #include "be.h"
21 #include "be_cmds.h"
22 #include <asm/div64.h>
23 #include <linux/aer.h>
24 #include <linux/if_bridge.h>
25 #include <net/busy_poll.h>
26 #include <net/vxlan.h>
27
28 MODULE_VERSION(DRV_VER);
29 MODULE_DESCRIPTION(DRV_DESC " " DRV_VER);
30 MODULE_AUTHOR("Emulex Corporation");
31 MODULE_LICENSE("GPL");
32
33 /* num_vfs module param is obsolete.
34  * Use sysfs method to enable/disable VFs.
35  */
36 static unsigned int num_vfs;
37 module_param(num_vfs, uint, S_IRUGO);
38 MODULE_PARM_DESC(num_vfs, "Number of PCI VFs to initialize");
39
40 static ushort rx_frag_size = 2048;
41 module_param(rx_frag_size, ushort, S_IRUGO);
42 MODULE_PARM_DESC(rx_frag_size, "Size of a fragment that holds rcvd data.");
43
44 /* Per-module error detection/recovery workq shared across all functions.
45  * Each function schedules its own work request on this shared workq.
46  */
47 static struct workqueue_struct *be_err_recovery_workq;
48
49 static const struct pci_device_id be_dev_ids[] = {
50         { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID1) },
51         { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID2) },
52         { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID1) },
53         { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID2) },
54         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID3)},
55         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID4)},
56         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID5)},
57         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID6)},
58         { 0 }
59 };
60 MODULE_DEVICE_TABLE(pci, be_dev_ids);
61
62 /* Workqueue used by all functions for defering cmd calls to the adapter */
63 static struct workqueue_struct *be_wq;
64
65 /* UE Status Low CSR */
66 static const char * const ue_status_low_desc[] = {
67         "CEV",
68         "CTX",
69         "DBUF",
70         "ERX",
71         "Host",
72         "MPU",
73         "NDMA",
74         "PTC ",
75         "RDMA ",
76         "RXF ",
77         "RXIPS ",
78         "RXULP0 ",
79         "RXULP1 ",
80         "RXULP2 ",
81         "TIM ",
82         "TPOST ",
83         "TPRE ",
84         "TXIPS ",
85         "TXULP0 ",
86         "TXULP1 ",
87         "UC ",
88         "WDMA ",
89         "TXULP2 ",
90         "HOST1 ",
91         "P0_OB_LINK ",
92         "P1_OB_LINK ",
93         "HOST_GPIO ",
94         "MBOX ",
95         "ERX2 ",
96         "SPARE ",
97         "JTAG ",
98         "MPU_INTPEND "
99 };
100
101 /* UE Status High CSR */
102 static const char * const ue_status_hi_desc[] = {
103         "LPCMEMHOST",
104         "MGMT_MAC",
105         "PCS0ONLINE",
106         "MPU_IRAM",
107         "PCS1ONLINE",
108         "PCTL0",
109         "PCTL1",
110         "PMEM",
111         "RR",
112         "TXPB",
113         "RXPP",
114         "XAUI",
115         "TXP",
116         "ARM",
117         "IPC",
118         "HOST2",
119         "HOST3",
120         "HOST4",
121         "HOST5",
122         "HOST6",
123         "HOST7",
124         "ECRC",
125         "Poison TLP",
126         "NETC",
127         "PERIPH",
128         "LLTXULP",
129         "D2P",
130         "RCON",
131         "LDMA",
132         "LLTXP",
133         "LLTXPB",
134         "Unknown"
135 };
136
137 #define BE_VF_IF_EN_FLAGS       (BE_IF_FLAGS_UNTAGGED | \
138                                  BE_IF_FLAGS_BROADCAST | \
139                                  BE_IF_FLAGS_MULTICAST | \
140                                  BE_IF_FLAGS_PASS_L3L4_ERRORS)
141
142 static void be_queue_free(struct be_adapter *adapter, struct be_queue_info *q)
143 {
144         struct be_dma_mem *mem = &q->dma_mem;
145
146         if (mem->va) {
147                 dma_free_coherent(&adapter->pdev->dev, mem->size, mem->va,
148                                   mem->dma);
149                 mem->va = NULL;
150         }
151 }
152
153 static int be_queue_alloc(struct be_adapter *adapter, struct be_queue_info *q,
154                           u16 len, u16 entry_size)
155 {
156         struct be_dma_mem *mem = &q->dma_mem;
157
158         memset(q, 0, sizeof(*q));
159         q->len = len;
160         q->entry_size = entry_size;
161         mem->size = len * entry_size;
162         mem->va = dma_zalloc_coherent(&adapter->pdev->dev, mem->size, &mem->dma,
163                                       GFP_KERNEL);
164         if (!mem->va)
165                 return -ENOMEM;
166         return 0;
167 }
168
169 static void be_reg_intr_set(struct be_adapter *adapter, bool enable)
170 {
171         u32 reg, enabled;
172
173         pci_read_config_dword(adapter->pdev, PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET,
174                               &reg);
175         enabled = reg & MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
176
177         if (!enabled && enable)
178                 reg |= MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
179         else if (enabled && !enable)
180                 reg &= ~MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
181         else
182                 return;
183
184         pci_write_config_dword(adapter->pdev,
185                                PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET, reg);
186 }
187
188 static void be_intr_set(struct be_adapter *adapter, bool enable)
189 {
190         int status = 0;
191
192         /* On lancer interrupts can't be controlled via this register */
193         if (lancer_chip(adapter))
194                 return;
195
196         if (be_check_error(adapter, BE_ERROR_EEH))
197                 return;
198
199         status = be_cmd_intr_set(adapter, enable);
200         if (status)
201                 be_reg_intr_set(adapter, enable);
202 }
203
204 static void be_rxq_notify(struct be_adapter *adapter, u16 qid, u16 posted)
205 {
206         u32 val = 0;
207
208         if (be_check_error(adapter, BE_ERROR_HW))
209                 return;
210
211         val |= qid & DB_RQ_RING_ID_MASK;
212         val |= posted << DB_RQ_NUM_POSTED_SHIFT;
213
214         wmb();
215         iowrite32(val, adapter->db + DB_RQ_OFFSET);
216 }
217
218 static void be_txq_notify(struct be_adapter *adapter, struct be_tx_obj *txo,
219                           u16 posted)
220 {
221         u32 val = 0;
222
223         if (be_check_error(adapter, BE_ERROR_HW))
224                 return;
225
226         val |= txo->q.id & DB_TXULP_RING_ID_MASK;
227         val |= (posted & DB_TXULP_NUM_POSTED_MASK) << DB_TXULP_NUM_POSTED_SHIFT;
228
229         wmb();
230         iowrite32(val, adapter->db + txo->db_offset);
231 }
232
233 static void be_eq_notify(struct be_adapter *adapter, u16 qid,
234                          bool arm, bool clear_int, u16 num_popped,
235                          u32 eq_delay_mult_enc)
236 {
237         u32 val = 0;
238
239         val |= qid & DB_EQ_RING_ID_MASK;
240         val |= ((qid & DB_EQ_RING_ID_EXT_MASK) << DB_EQ_RING_ID_EXT_MASK_SHIFT);
241
242         if (be_check_error(adapter, BE_ERROR_HW))
243                 return;
244
245         if (arm)
246                 val |= 1 << DB_EQ_REARM_SHIFT;
247         if (clear_int)
248                 val |= 1 << DB_EQ_CLR_SHIFT;
249         val |= 1 << DB_EQ_EVNT_SHIFT;
250         val |= num_popped << DB_EQ_NUM_POPPED_SHIFT;
251         val |= eq_delay_mult_enc << DB_EQ_R2I_DLY_SHIFT;
252         iowrite32(val, adapter->db + DB_EQ_OFFSET);
253 }
254
255 void be_cq_notify(struct be_adapter *adapter, u16 qid, bool arm, u16 num_popped)
256 {
257         u32 val = 0;
258
259         val |= qid & DB_CQ_RING_ID_MASK;
260         val |= ((qid & DB_CQ_RING_ID_EXT_MASK) <<
261                         DB_CQ_RING_ID_EXT_MASK_SHIFT);
262
263         if (be_check_error(adapter, BE_ERROR_HW))
264                 return;
265
266         if (arm)
267                 val |= 1 << DB_CQ_REARM_SHIFT;
268         val |= num_popped << DB_CQ_NUM_POPPED_SHIFT;
269         iowrite32(val, adapter->db + DB_CQ_OFFSET);
270 }
271
272 static int be_dev_mac_add(struct be_adapter *adapter, u8 *mac)
273 {
274         int i;
275
276         /* Check if mac has already been added as part of uc-list */
277         for (i = 0; i < adapter->uc_macs; i++) {
278                 if (ether_addr_equal(adapter->uc_list[i].mac, mac)) {
279                         /* mac already added, skip addition */
280                         adapter->pmac_id[0] = adapter->pmac_id[i + 1];
281                         return 0;
282                 }
283         }
284
285         return be_cmd_pmac_add(adapter, mac, adapter->if_handle,
286                                &adapter->pmac_id[0], 0);
287 }
288
289 static void be_dev_mac_del(struct be_adapter *adapter, int pmac_id)
290 {
291         int i;
292
293         /* Skip deletion if the programmed mac is
294          * being used in uc-list
295          */
296         for (i = 0; i < adapter->uc_macs; i++) {
297                 if (adapter->pmac_id[i + 1] == pmac_id)
298                         return;
299         }
300         be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
301 }
302
303 static int be_mac_addr_set(struct net_device *netdev, void *p)
304 {
305         struct be_adapter *adapter = netdev_priv(netdev);
306         struct device *dev = &adapter->pdev->dev;
307         struct sockaddr *addr = p;
308         int status;
309         u8 mac[ETH_ALEN];
310         u32 old_pmac_id = adapter->pmac_id[0];
311
312         if (!is_valid_ether_addr(addr->sa_data))
313                 return -EADDRNOTAVAIL;
314
315         /* Proceed further only if, User provided MAC is different
316          * from active MAC
317          */
318         if (ether_addr_equal(addr->sa_data, adapter->dev_mac))
319                 return 0;
320
321         /* BE3 VFs without FILTMGMT privilege are not allowed to set its MAC
322          * address
323          */
324         if (BEx_chip(adapter) && be_virtfn(adapter) &&
325             !check_privilege(adapter, BE_PRIV_FILTMGMT))
326                 return -EPERM;
327
328         /* if device is not running, copy MAC to netdev->dev_addr */
329         if (!netif_running(netdev))
330                 goto done;
331
332         /* The PMAC_ADD cmd may fail if the VF doesn't have FILTMGMT
333          * privilege or if PF did not provision the new MAC address.
334          * On BE3, this cmd will always fail if the VF doesn't have the
335          * FILTMGMT privilege. This failure is OK, only if the PF programmed
336          * the MAC for the VF.
337          */
338         mutex_lock(&adapter->rx_filter_lock);
339         status = be_dev_mac_add(adapter, (u8 *)addr->sa_data);
340         if (!status) {
341
342                 /* Delete the old programmed MAC. This call may fail if the
343                  * old MAC was already deleted by the PF driver.
344                  */
345                 if (adapter->pmac_id[0] != old_pmac_id)
346                         be_dev_mac_del(adapter, old_pmac_id);
347         }
348
349         mutex_unlock(&adapter->rx_filter_lock);
350         /* Decide if the new MAC is successfully activated only after
351          * querying the FW
352          */
353         status = be_cmd_get_active_mac(adapter, adapter->pmac_id[0], mac,
354                                        adapter->if_handle, true, 0);
355         if (status)
356                 goto err;
357
358         /* The MAC change did not happen, either due to lack of privilege
359          * or PF didn't pre-provision.
360          */
361         if (!ether_addr_equal(addr->sa_data, mac)) {
362                 status = -EPERM;
363                 goto err;
364         }
365
366         /* Remember currently programmed MAC */
367         ether_addr_copy(adapter->dev_mac, addr->sa_data);
368 done:
369         ether_addr_copy(netdev->dev_addr, addr->sa_data);
370         dev_info(dev, "MAC address changed to %pM\n", addr->sa_data);
371         return 0;
372 err:
373         dev_warn(dev, "MAC address change to %pM failed\n", addr->sa_data);
374         return status;
375 }
376
377 /* BE2 supports only v0 cmd */
378 static void *hw_stats_from_cmd(struct be_adapter *adapter)
379 {
380         if (BE2_chip(adapter)) {
381                 struct be_cmd_resp_get_stats_v0 *cmd = adapter->stats_cmd.va;
382
383                 return &cmd->hw_stats;
384         } else if (BE3_chip(adapter)) {
385                 struct be_cmd_resp_get_stats_v1 *cmd = adapter->stats_cmd.va;
386
387                 return &cmd->hw_stats;
388         } else {
389                 struct be_cmd_resp_get_stats_v2 *cmd = adapter->stats_cmd.va;
390
391                 return &cmd->hw_stats;
392         }
393 }
394
395 /* BE2 supports only v0 cmd */
396 static void *be_erx_stats_from_cmd(struct be_adapter *adapter)
397 {
398         if (BE2_chip(adapter)) {
399                 struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
400
401                 return &hw_stats->erx;
402         } else if (BE3_chip(adapter)) {
403                 struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
404
405                 return &hw_stats->erx;
406         } else {
407                 struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
408
409                 return &hw_stats->erx;
410         }
411 }
412
413 static void populate_be_v0_stats(struct be_adapter *adapter)
414 {
415         struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
416         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
417         struct be_rxf_stats_v0 *rxf_stats = &hw_stats->rxf;
418         struct be_port_rxf_stats_v0 *port_stats =
419                                         &rxf_stats->port[adapter->port_num];
420         struct be_drv_stats *drvs = &adapter->drv_stats;
421
422         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
423         drvs->rx_pause_frames = port_stats->rx_pause_frames;
424         drvs->rx_crc_errors = port_stats->rx_crc_errors;
425         drvs->rx_control_frames = port_stats->rx_control_frames;
426         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
427         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
428         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
429         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
430         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
431         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
432         drvs->rxpp_fifo_overflow_drop = port_stats->rx_fifo_overflow;
433         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
434         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
435         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
436         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
437         drvs->rx_input_fifo_overflow_drop = port_stats->rx_input_fifo_overflow;
438         drvs->rx_dropped_header_too_small =
439                 port_stats->rx_dropped_header_too_small;
440         drvs->rx_address_filtered =
441                                         port_stats->rx_address_filtered +
442                                         port_stats->rx_vlan_filtered;
443         drvs->rx_alignment_symbol_errors =
444                 port_stats->rx_alignment_symbol_errors;
445
446         drvs->tx_pauseframes = port_stats->tx_pauseframes;
447         drvs->tx_controlframes = port_stats->tx_controlframes;
448
449         if (adapter->port_num)
450                 drvs->jabber_events = rxf_stats->port1_jabber_events;
451         else
452                 drvs->jabber_events = rxf_stats->port0_jabber_events;
453         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
454         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
455         drvs->forwarded_packets = rxf_stats->forwarded_packets;
456         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
457         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
458         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
459         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
460 }
461
462 static void populate_be_v1_stats(struct be_adapter *adapter)
463 {
464         struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
465         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
466         struct be_rxf_stats_v1 *rxf_stats = &hw_stats->rxf;
467         struct be_port_rxf_stats_v1 *port_stats =
468                                         &rxf_stats->port[adapter->port_num];
469         struct be_drv_stats *drvs = &adapter->drv_stats;
470
471         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
472         drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
473         drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
474         drvs->rx_pause_frames = port_stats->rx_pause_frames;
475         drvs->rx_crc_errors = port_stats->rx_crc_errors;
476         drvs->rx_control_frames = port_stats->rx_control_frames;
477         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
478         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
479         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
480         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
481         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
482         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
483         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
484         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
485         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
486         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
487         drvs->rx_dropped_header_too_small =
488                 port_stats->rx_dropped_header_too_small;
489         drvs->rx_input_fifo_overflow_drop =
490                 port_stats->rx_input_fifo_overflow_drop;
491         drvs->rx_address_filtered = port_stats->rx_address_filtered;
492         drvs->rx_alignment_symbol_errors =
493                 port_stats->rx_alignment_symbol_errors;
494         drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
495         drvs->tx_pauseframes = port_stats->tx_pauseframes;
496         drvs->tx_controlframes = port_stats->tx_controlframes;
497         drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
498         drvs->jabber_events = port_stats->jabber_events;
499         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
500         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
501         drvs->forwarded_packets = rxf_stats->forwarded_packets;
502         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
503         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
504         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
505         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
506 }
507
508 static void populate_be_v2_stats(struct be_adapter *adapter)
509 {
510         struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
511         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
512         struct be_rxf_stats_v2 *rxf_stats = &hw_stats->rxf;
513         struct be_port_rxf_stats_v2 *port_stats =
514                                         &rxf_stats->port[adapter->port_num];
515         struct be_drv_stats *drvs = &adapter->drv_stats;
516
517         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
518         drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
519         drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
520         drvs->rx_pause_frames = port_stats->rx_pause_frames;
521         drvs->rx_crc_errors = port_stats->rx_crc_errors;
522         drvs->rx_control_frames = port_stats->rx_control_frames;
523         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
524         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
525         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
526         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
527         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
528         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
529         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
530         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
531         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
532         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
533         drvs->rx_dropped_header_too_small =
534                 port_stats->rx_dropped_header_too_small;
535         drvs->rx_input_fifo_overflow_drop =
536                 port_stats->rx_input_fifo_overflow_drop;
537         drvs->rx_address_filtered = port_stats->rx_address_filtered;
538         drvs->rx_alignment_symbol_errors =
539                 port_stats->rx_alignment_symbol_errors;
540         drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
541         drvs->tx_pauseframes = port_stats->tx_pauseframes;
542         drvs->tx_controlframes = port_stats->tx_controlframes;
543         drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
544         drvs->jabber_events = port_stats->jabber_events;
545         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
546         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
547         drvs->forwarded_packets = rxf_stats->forwarded_packets;
548         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
549         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
550         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
551         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
552         if (be_roce_supported(adapter)) {
553                 drvs->rx_roce_bytes_lsd = port_stats->roce_bytes_received_lsd;
554                 drvs->rx_roce_bytes_msd = port_stats->roce_bytes_received_msd;
555                 drvs->rx_roce_frames = port_stats->roce_frames_received;
556                 drvs->roce_drops_crc = port_stats->roce_drops_crc;
557                 drvs->roce_drops_payload_len =
558                         port_stats->roce_drops_payload_len;
559         }
560 }
561
562 static void populate_lancer_stats(struct be_adapter *adapter)
563 {
564         struct be_drv_stats *drvs = &adapter->drv_stats;
565         struct lancer_pport_stats *pport_stats = pport_stats_from_cmd(adapter);
566
567         be_dws_le_to_cpu(pport_stats, sizeof(*pport_stats));
568         drvs->rx_pause_frames = pport_stats->rx_pause_frames_lo;
569         drvs->rx_crc_errors = pport_stats->rx_crc_errors_lo;
570         drvs->rx_control_frames = pport_stats->rx_control_frames_lo;
571         drvs->rx_in_range_errors = pport_stats->rx_in_range_errors;
572         drvs->rx_frame_too_long = pport_stats->rx_frames_too_long_lo;
573         drvs->rx_dropped_runt = pport_stats->rx_dropped_runt;
574         drvs->rx_ip_checksum_errs = pport_stats->rx_ip_checksum_errors;
575         drvs->rx_tcp_checksum_errs = pport_stats->rx_tcp_checksum_errors;
576         drvs->rx_udp_checksum_errs = pport_stats->rx_udp_checksum_errors;
577         drvs->rx_dropped_tcp_length =
578                                 pport_stats->rx_dropped_invalid_tcp_length;
579         drvs->rx_dropped_too_small = pport_stats->rx_dropped_too_small;
580         drvs->rx_dropped_too_short = pport_stats->rx_dropped_too_short;
581         drvs->rx_out_range_errors = pport_stats->rx_out_of_range_errors;
582         drvs->rx_dropped_header_too_small =
583                                 pport_stats->rx_dropped_header_too_small;
584         drvs->rx_input_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
585         drvs->rx_address_filtered =
586                                         pport_stats->rx_address_filtered +
587                                         pport_stats->rx_vlan_filtered;
588         drvs->rx_alignment_symbol_errors = pport_stats->rx_symbol_errors_lo;
589         drvs->rxpp_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
590         drvs->tx_pauseframes = pport_stats->tx_pause_frames_lo;
591         drvs->tx_controlframes = pport_stats->tx_control_frames_lo;
592         drvs->jabber_events = pport_stats->rx_jabbers;
593         drvs->forwarded_packets = pport_stats->num_forwards_lo;
594         drvs->rx_drops_mtu = pport_stats->rx_drops_mtu_lo;
595         drvs->rx_drops_too_many_frags =
596                                 pport_stats->rx_drops_too_many_frags_lo;
597 }
598
599 static void accumulate_16bit_val(u32 *acc, u16 val)
600 {
601 #define lo(x)                   (x & 0xFFFF)
602 #define hi(x)                   (x & 0xFFFF0000)
603         bool wrapped = val < lo(*acc);
604         u32 newacc = hi(*acc) + val;
605
606         if (wrapped)
607                 newacc += 65536;
608         ACCESS_ONCE(*acc) = newacc;
609 }
610
611 static void populate_erx_stats(struct be_adapter *adapter,
612                                struct be_rx_obj *rxo, u32 erx_stat)
613 {
614         if (!BEx_chip(adapter))
615                 rx_stats(rxo)->rx_drops_no_frags = erx_stat;
616         else
617                 /* below erx HW counter can actually wrap around after
618                  * 65535. Driver accumulates a 32-bit value
619                  */
620                 accumulate_16bit_val(&rx_stats(rxo)->rx_drops_no_frags,
621                                      (u16)erx_stat);
622 }
623
624 void be_parse_stats(struct be_adapter *adapter)
625 {
626         struct be_erx_stats_v2 *erx = be_erx_stats_from_cmd(adapter);
627         struct be_rx_obj *rxo;
628         int i;
629         u32 erx_stat;
630
631         if (lancer_chip(adapter)) {
632                 populate_lancer_stats(adapter);
633         } else {
634                 if (BE2_chip(adapter))
635                         populate_be_v0_stats(adapter);
636                 else if (BE3_chip(adapter))
637                         /* for BE3 */
638                         populate_be_v1_stats(adapter);
639                 else
640                         populate_be_v2_stats(adapter);
641
642                 /* erx_v2 is longer than v0, v1. use v2 for v0, v1 access */
643                 for_all_rx_queues(adapter, rxo, i) {
644                         erx_stat = erx->rx_drops_no_fragments[rxo->q.id];
645                         populate_erx_stats(adapter, rxo, erx_stat);
646                 }
647         }
648 }
649
650 static struct rtnl_link_stats64 *be_get_stats64(struct net_device *netdev,
651                                                 struct rtnl_link_stats64 *stats)
652 {
653         struct be_adapter *adapter = netdev_priv(netdev);
654         struct be_drv_stats *drvs = &adapter->drv_stats;
655         struct be_rx_obj *rxo;
656         struct be_tx_obj *txo;
657         u64 pkts, bytes;
658         unsigned int start;
659         int i;
660
661         for_all_rx_queues(adapter, rxo, i) {
662                 const struct be_rx_stats *rx_stats = rx_stats(rxo);
663
664                 do {
665                         start = u64_stats_fetch_begin_irq(&rx_stats->sync);
666                         pkts = rx_stats(rxo)->rx_pkts;
667                         bytes = rx_stats(rxo)->rx_bytes;
668                 } while (u64_stats_fetch_retry_irq(&rx_stats->sync, start));
669                 stats->rx_packets += pkts;
670                 stats->rx_bytes += bytes;
671                 stats->multicast += rx_stats(rxo)->rx_mcast_pkts;
672                 stats->rx_dropped += rx_stats(rxo)->rx_drops_no_skbs +
673                                         rx_stats(rxo)->rx_drops_no_frags;
674         }
675
676         for_all_tx_queues(adapter, txo, i) {
677                 const struct be_tx_stats *tx_stats = tx_stats(txo);
678
679                 do {
680                         start = u64_stats_fetch_begin_irq(&tx_stats->sync);
681                         pkts = tx_stats(txo)->tx_pkts;
682                         bytes = tx_stats(txo)->tx_bytes;
683                 } while (u64_stats_fetch_retry_irq(&tx_stats->sync, start));
684                 stats->tx_packets += pkts;
685                 stats->tx_bytes += bytes;
686         }
687
688         /* bad pkts received */
689         stats->rx_errors = drvs->rx_crc_errors +
690                 drvs->rx_alignment_symbol_errors +
691                 drvs->rx_in_range_errors +
692                 drvs->rx_out_range_errors +
693                 drvs->rx_frame_too_long +
694                 drvs->rx_dropped_too_small +
695                 drvs->rx_dropped_too_short +
696                 drvs->rx_dropped_header_too_small +
697                 drvs->rx_dropped_tcp_length +
698                 drvs->rx_dropped_runt;
699
700         /* detailed rx errors */
701         stats->rx_length_errors = drvs->rx_in_range_errors +
702                 drvs->rx_out_range_errors +
703                 drvs->rx_frame_too_long;
704
705         stats->rx_crc_errors = drvs->rx_crc_errors;
706
707         /* frame alignment errors */
708         stats->rx_frame_errors = drvs->rx_alignment_symbol_errors;
709
710         /* receiver fifo overrun */
711         /* drops_no_pbuf is no per i/f, it's per BE card */
712         stats->rx_fifo_errors = drvs->rxpp_fifo_overflow_drop +
713                                 drvs->rx_input_fifo_overflow_drop +
714                                 drvs->rx_drops_no_pbuf;
715         return stats;
716 }
717
718 void be_link_status_update(struct be_adapter *adapter, u8 link_status)
719 {
720         struct net_device *netdev = adapter->netdev;
721
722         if (!(adapter->flags & BE_FLAGS_LINK_STATUS_INIT)) {
723                 netif_carrier_off(netdev);
724                 adapter->flags |= BE_FLAGS_LINK_STATUS_INIT;
725         }
726
727         if (link_status)
728                 netif_carrier_on(netdev);
729         else
730                 netif_carrier_off(netdev);
731
732         netdev_info(netdev, "Link is %s\n", link_status ? "Up" : "Down");
733 }
734
735 static int be_gso_hdr_len(struct sk_buff *skb)
736 {
737         if (skb->encapsulation)
738                 return skb_inner_transport_offset(skb) +
739                        inner_tcp_hdrlen(skb);
740         return skb_transport_offset(skb) + tcp_hdrlen(skb);
741 }
742
743 static void be_tx_stats_update(struct be_tx_obj *txo, struct sk_buff *skb)
744 {
745         struct be_tx_stats *stats = tx_stats(txo);
746         u32 tx_pkts = skb_shinfo(skb)->gso_segs ? : 1;
747         /* Account for headers which get duplicated in TSO pkt */
748         u32 dup_hdr_len = tx_pkts > 1 ? be_gso_hdr_len(skb) * (tx_pkts - 1) : 0;
749
750         u64_stats_update_begin(&stats->sync);
751         stats->tx_reqs++;
752         stats->tx_bytes += skb->len + dup_hdr_len;
753         stats->tx_pkts += tx_pkts;
754         if (skb->encapsulation && skb->ip_summed == CHECKSUM_PARTIAL)
755                 stats->tx_vxlan_offload_pkts += tx_pkts;
756         u64_stats_update_end(&stats->sync);
757 }
758
759 /* Returns number of WRBs needed for the skb */
760 static u32 skb_wrb_cnt(struct sk_buff *skb)
761 {
762         /* +1 for the header wrb */
763         return 1 + (skb_headlen(skb) ? 1 : 0) + skb_shinfo(skb)->nr_frags;
764 }
765
766 static inline void wrb_fill(struct be_eth_wrb *wrb, u64 addr, int len)
767 {
768         wrb->frag_pa_hi = cpu_to_le32(upper_32_bits(addr));
769         wrb->frag_pa_lo = cpu_to_le32(lower_32_bits(addr));
770         wrb->frag_len = cpu_to_le32(len & ETH_WRB_FRAG_LEN_MASK);
771         wrb->rsvd0 = 0;
772 }
773
774 /* A dummy wrb is just all zeros. Using a separate routine for dummy-wrb
775  * to avoid the swap and shift/mask operations in wrb_fill().
776  */
777 static inline void wrb_fill_dummy(struct be_eth_wrb *wrb)
778 {
779         wrb->frag_pa_hi = 0;
780         wrb->frag_pa_lo = 0;
781         wrb->frag_len = 0;
782         wrb->rsvd0 = 0;
783 }
784
785 static inline u16 be_get_tx_vlan_tag(struct be_adapter *adapter,
786                                      struct sk_buff *skb)
787 {
788         u8 vlan_prio;
789         u16 vlan_tag;
790
791         vlan_tag = skb_vlan_tag_get(skb);
792         vlan_prio = (vlan_tag & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
793         /* If vlan priority provided by OS is NOT in available bmap */
794         if (!(adapter->vlan_prio_bmap & (1 << vlan_prio)))
795                 vlan_tag = (vlan_tag & ~VLAN_PRIO_MASK) |
796                                 adapter->recommended_prio_bits;
797
798         return vlan_tag;
799 }
800
801 /* Used only for IP tunnel packets */
802 static u16 skb_inner_ip_proto(struct sk_buff *skb)
803 {
804         return (inner_ip_hdr(skb)->version == 4) ?
805                 inner_ip_hdr(skb)->protocol : inner_ipv6_hdr(skb)->nexthdr;
806 }
807
808 static u16 skb_ip_proto(struct sk_buff *skb)
809 {
810         return (ip_hdr(skb)->version == 4) ?
811                 ip_hdr(skb)->protocol : ipv6_hdr(skb)->nexthdr;
812 }
813
814 static inline bool be_is_txq_full(struct be_tx_obj *txo)
815 {
816         return atomic_read(&txo->q.used) + BE_MAX_TX_FRAG_COUNT >= txo->q.len;
817 }
818
819 static inline bool be_can_txq_wake(struct be_tx_obj *txo)
820 {
821         return atomic_read(&txo->q.used) < txo->q.len / 2;
822 }
823
824 static inline bool be_is_tx_compl_pending(struct be_tx_obj *txo)
825 {
826         return atomic_read(&txo->q.used) > txo->pend_wrb_cnt;
827 }
828
829 static void be_get_wrb_params_from_skb(struct be_adapter *adapter,
830                                        struct sk_buff *skb,
831                                        struct be_wrb_params *wrb_params)
832 {
833         u16 proto;
834
835         if (skb_is_gso(skb)) {
836                 BE_WRB_F_SET(wrb_params->features, LSO, 1);
837                 wrb_params->lso_mss = skb_shinfo(skb)->gso_size;
838                 if (skb_is_gso_v6(skb) && !lancer_chip(adapter))
839                         BE_WRB_F_SET(wrb_params->features, LSO6, 1);
840         } else if (skb->ip_summed == CHECKSUM_PARTIAL) {
841                 if (skb->encapsulation) {
842                         BE_WRB_F_SET(wrb_params->features, IPCS, 1);
843                         proto = skb_inner_ip_proto(skb);
844                 } else {
845                         proto = skb_ip_proto(skb);
846                 }
847                 if (proto == IPPROTO_TCP)
848                         BE_WRB_F_SET(wrb_params->features, TCPCS, 1);
849                 else if (proto == IPPROTO_UDP)
850                         BE_WRB_F_SET(wrb_params->features, UDPCS, 1);
851         }
852
853         if (skb_vlan_tag_present(skb)) {
854                 BE_WRB_F_SET(wrb_params->features, VLAN, 1);
855                 wrb_params->vlan_tag = be_get_tx_vlan_tag(adapter, skb);
856         }
857
858         BE_WRB_F_SET(wrb_params->features, CRC, 1);
859 }
860
861 static void wrb_fill_hdr(struct be_adapter *adapter,
862                          struct be_eth_hdr_wrb *hdr,
863                          struct be_wrb_params *wrb_params,
864                          struct sk_buff *skb)
865 {
866         memset(hdr, 0, sizeof(*hdr));
867
868         SET_TX_WRB_HDR_BITS(crc, hdr,
869                             BE_WRB_F_GET(wrb_params->features, CRC));
870         SET_TX_WRB_HDR_BITS(ipcs, hdr,
871                             BE_WRB_F_GET(wrb_params->features, IPCS));
872         SET_TX_WRB_HDR_BITS(tcpcs, hdr,
873                             BE_WRB_F_GET(wrb_params->features, TCPCS));
874         SET_TX_WRB_HDR_BITS(udpcs, hdr,
875                             BE_WRB_F_GET(wrb_params->features, UDPCS));
876
877         SET_TX_WRB_HDR_BITS(lso, hdr,
878                             BE_WRB_F_GET(wrb_params->features, LSO));
879         SET_TX_WRB_HDR_BITS(lso6, hdr,
880                             BE_WRB_F_GET(wrb_params->features, LSO6));
881         SET_TX_WRB_HDR_BITS(lso_mss, hdr, wrb_params->lso_mss);
882
883         /* Hack to skip HW VLAN tagging needs evt = 1, compl = 0. When this
884          * hack is not needed, the evt bit is set while ringing DB.
885          */
886         SET_TX_WRB_HDR_BITS(event, hdr,
887                             BE_WRB_F_GET(wrb_params->features, VLAN_SKIP_HW));
888         SET_TX_WRB_HDR_BITS(vlan, hdr,
889                             BE_WRB_F_GET(wrb_params->features, VLAN));
890         SET_TX_WRB_HDR_BITS(vlan_tag, hdr, wrb_params->vlan_tag);
891
892         SET_TX_WRB_HDR_BITS(num_wrb, hdr, skb_wrb_cnt(skb));
893         SET_TX_WRB_HDR_BITS(len, hdr, skb->len);
894         SET_TX_WRB_HDR_BITS(mgmt, hdr,
895                             BE_WRB_F_GET(wrb_params->features, OS2BMC));
896 }
897
898 static void unmap_tx_frag(struct device *dev, struct be_eth_wrb *wrb,
899                           bool unmap_single)
900 {
901         dma_addr_t dma;
902         u32 frag_len = le32_to_cpu(wrb->frag_len);
903
904
905         dma = (u64)le32_to_cpu(wrb->frag_pa_hi) << 32 |
906                 (u64)le32_to_cpu(wrb->frag_pa_lo);
907         if (frag_len) {
908                 if (unmap_single)
909                         dma_unmap_single(dev, dma, frag_len, DMA_TO_DEVICE);
910                 else
911                         dma_unmap_page(dev, dma, frag_len, DMA_TO_DEVICE);
912         }
913 }
914
915 /* Grab a WRB header for xmit */
916 static u32 be_tx_get_wrb_hdr(struct be_tx_obj *txo)
917 {
918         u32 head = txo->q.head;
919
920         queue_head_inc(&txo->q);
921         return head;
922 }
923
924 /* Set up the WRB header for xmit */
925 static void be_tx_setup_wrb_hdr(struct be_adapter *adapter,
926                                 struct be_tx_obj *txo,
927                                 struct be_wrb_params *wrb_params,
928                                 struct sk_buff *skb, u16 head)
929 {
930         u32 num_frags = skb_wrb_cnt(skb);
931         struct be_queue_info *txq = &txo->q;
932         struct be_eth_hdr_wrb *hdr = queue_index_node(txq, head);
933
934         wrb_fill_hdr(adapter, hdr, wrb_params, skb);
935         be_dws_cpu_to_le(hdr, sizeof(*hdr));
936
937         BUG_ON(txo->sent_skb_list[head]);
938         txo->sent_skb_list[head] = skb;
939         txo->last_req_hdr = head;
940         atomic_add(num_frags, &txq->used);
941         txo->last_req_wrb_cnt = num_frags;
942         txo->pend_wrb_cnt += num_frags;
943 }
944
945 /* Setup a WRB fragment (buffer descriptor) for xmit */
946 static void be_tx_setup_wrb_frag(struct be_tx_obj *txo, dma_addr_t busaddr,
947                                  int len)
948 {
949         struct be_eth_wrb *wrb;
950         struct be_queue_info *txq = &txo->q;
951
952         wrb = queue_head_node(txq);
953         wrb_fill(wrb, busaddr, len);
954         queue_head_inc(txq);
955 }
956
957 /* Bring the queue back to the state it was in before be_xmit_enqueue() routine
958  * was invoked. The producer index is restored to the previous packet and the
959  * WRBs of the current packet are unmapped. Invoked to handle tx setup errors.
960  */
961 static void be_xmit_restore(struct be_adapter *adapter,
962                             struct be_tx_obj *txo, u32 head, bool map_single,
963                             u32 copied)
964 {
965         struct device *dev;
966         struct be_eth_wrb *wrb;
967         struct be_queue_info *txq = &txo->q;
968
969         dev = &adapter->pdev->dev;
970         txq->head = head;
971
972         /* skip the first wrb (hdr); it's not mapped */
973         queue_head_inc(txq);
974         while (copied) {
975                 wrb = queue_head_node(txq);
976                 unmap_tx_frag(dev, wrb, map_single);
977                 map_single = false;
978                 copied -= le32_to_cpu(wrb->frag_len);
979                 queue_head_inc(txq);
980         }
981
982         txq->head = head;
983 }
984
985 /* Enqueue the given packet for transmit. This routine allocates WRBs for the
986  * packet, dma maps the packet buffers and sets up the WRBs. Returns the number
987  * of WRBs used up by the packet.
988  */
989 static u32 be_xmit_enqueue(struct be_adapter *adapter, struct be_tx_obj *txo,
990                            struct sk_buff *skb,
991                            struct be_wrb_params *wrb_params)
992 {
993         u32 i, copied = 0, wrb_cnt = skb_wrb_cnt(skb);
994         struct device *dev = &adapter->pdev->dev;
995         struct be_queue_info *txq = &txo->q;
996         bool map_single = false;
997         u32 head = txq->head;
998         dma_addr_t busaddr;
999         int len;
1000
1001         head = be_tx_get_wrb_hdr(txo);
1002
1003         if (skb->len > skb->data_len) {
1004                 len = skb_headlen(skb);
1005
1006                 busaddr = dma_map_single(dev, skb->data, len, DMA_TO_DEVICE);
1007                 if (dma_mapping_error(dev, busaddr))
1008                         goto dma_err;
1009                 map_single = true;
1010                 be_tx_setup_wrb_frag(txo, busaddr, len);
1011                 copied += len;
1012         }
1013
1014         for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1015                 const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
1016                 len = skb_frag_size(frag);
1017
1018                 busaddr = skb_frag_dma_map(dev, frag, 0, len, DMA_TO_DEVICE);
1019                 if (dma_mapping_error(dev, busaddr))
1020                         goto dma_err;
1021                 be_tx_setup_wrb_frag(txo, busaddr, len);
1022                 copied += len;
1023         }
1024
1025         be_tx_setup_wrb_hdr(adapter, txo, wrb_params, skb, head);
1026
1027         be_tx_stats_update(txo, skb);
1028         return wrb_cnt;
1029
1030 dma_err:
1031         adapter->drv_stats.dma_map_errors++;
1032         be_xmit_restore(adapter, txo, head, map_single, copied);
1033         return 0;
1034 }
1035
1036 static inline int qnq_async_evt_rcvd(struct be_adapter *adapter)
1037 {
1038         return adapter->flags & BE_FLAGS_QNQ_ASYNC_EVT_RCVD;
1039 }
1040
1041 static struct sk_buff *be_insert_vlan_in_pkt(struct be_adapter *adapter,
1042                                              struct sk_buff *skb,
1043                                              struct be_wrb_params
1044                                              *wrb_params)
1045 {
1046         u16 vlan_tag = 0;
1047
1048         skb = skb_share_check(skb, GFP_ATOMIC);
1049         if (unlikely(!skb))
1050                 return skb;
1051
1052         if (skb_vlan_tag_present(skb))
1053                 vlan_tag = be_get_tx_vlan_tag(adapter, skb);
1054
1055         if (qnq_async_evt_rcvd(adapter) && adapter->pvid) {
1056                 if (!vlan_tag)
1057                         vlan_tag = adapter->pvid;
1058                 /* f/w workaround to set skip_hw_vlan = 1, informs the F/W to
1059                  * skip VLAN insertion
1060                  */
1061                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1062         }
1063
1064         if (vlan_tag) {
1065                 skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1066                                                 vlan_tag);
1067                 if (unlikely(!skb))
1068                         return skb;
1069                 skb->vlan_tci = 0;
1070         }
1071
1072         /* Insert the outer VLAN, if any */
1073         if (adapter->qnq_vid) {
1074                 vlan_tag = adapter->qnq_vid;
1075                 skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1076                                                 vlan_tag);
1077                 if (unlikely(!skb))
1078                         return skb;
1079                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1080         }
1081
1082         return skb;
1083 }
1084
1085 static bool be_ipv6_exthdr_check(struct sk_buff *skb)
1086 {
1087         struct ethhdr *eh = (struct ethhdr *)skb->data;
1088         u16 offset = ETH_HLEN;
1089
1090         if (eh->h_proto == htons(ETH_P_IPV6)) {
1091                 struct ipv6hdr *ip6h = (struct ipv6hdr *)(skb->data + offset);
1092
1093                 offset += sizeof(struct ipv6hdr);
1094                 if (ip6h->nexthdr != NEXTHDR_TCP &&
1095                     ip6h->nexthdr != NEXTHDR_UDP) {
1096                         struct ipv6_opt_hdr *ehdr =
1097                                 (struct ipv6_opt_hdr *)(skb->data + offset);
1098
1099                         /* offending pkt: 2nd byte following IPv6 hdr is 0xff */
1100                         if (ehdr->hdrlen == 0xff)
1101                                 return true;
1102                 }
1103         }
1104         return false;
1105 }
1106
1107 static int be_vlan_tag_tx_chk(struct be_adapter *adapter, struct sk_buff *skb)
1108 {
1109         return skb_vlan_tag_present(skb) || adapter->pvid || adapter->qnq_vid;
1110 }
1111
1112 static int be_ipv6_tx_stall_chk(struct be_adapter *adapter, struct sk_buff *skb)
1113 {
1114         return BE3_chip(adapter) && be_ipv6_exthdr_check(skb);
1115 }
1116
1117 static struct sk_buff *be_lancer_xmit_workarounds(struct be_adapter *adapter,
1118                                                   struct sk_buff *skb,
1119                                                   struct be_wrb_params
1120                                                   *wrb_params)
1121 {
1122         struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
1123         unsigned int eth_hdr_len;
1124         struct iphdr *ip;
1125
1126         /* For padded packets, BE HW modifies tot_len field in IP header
1127          * incorrecly when VLAN tag is inserted by HW.
1128          * For padded packets, Lancer computes incorrect checksum.
1129          */
1130         eth_hdr_len = ntohs(skb->protocol) == ETH_P_8021Q ?
1131                                                 VLAN_ETH_HLEN : ETH_HLEN;
1132         if (skb->len <= 60 &&
1133             (lancer_chip(adapter) || skb_vlan_tag_present(skb)) &&
1134             is_ipv4_pkt(skb)) {
1135                 ip = (struct iphdr *)ip_hdr(skb);
1136                 pskb_trim(skb, eth_hdr_len + ntohs(ip->tot_len));
1137         }
1138
1139         /* If vlan tag is already inlined in the packet, skip HW VLAN
1140          * tagging in pvid-tagging mode
1141          */
1142         if (be_pvid_tagging_enabled(adapter) &&
1143             veh->h_vlan_proto == htons(ETH_P_8021Q))
1144                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1145
1146         /* HW has a bug wherein it will calculate CSUM for VLAN
1147          * pkts even though it is disabled.
1148          * Manually insert VLAN in pkt.
1149          */
1150         if (skb->ip_summed != CHECKSUM_PARTIAL &&
1151             skb_vlan_tag_present(skb)) {
1152                 skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1153                 if (unlikely(!skb))
1154                         goto err;
1155         }
1156
1157         /* HW may lockup when VLAN HW tagging is requested on
1158          * certain ipv6 packets. Drop such pkts if the HW workaround to
1159          * skip HW tagging is not enabled by FW.
1160          */
1161         if (unlikely(be_ipv6_tx_stall_chk(adapter, skb) &&
1162                      (adapter->pvid || adapter->qnq_vid) &&
1163                      !qnq_async_evt_rcvd(adapter)))
1164                 goto tx_drop;
1165
1166         /* Manual VLAN tag insertion to prevent:
1167          * ASIC lockup when the ASIC inserts VLAN tag into
1168          * certain ipv6 packets. Insert VLAN tags in driver,
1169          * and set event, completion, vlan bits accordingly
1170          * in the Tx WRB.
1171          */
1172         if (be_ipv6_tx_stall_chk(adapter, skb) &&
1173             be_vlan_tag_tx_chk(adapter, skb)) {
1174                 skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1175                 if (unlikely(!skb))
1176                         goto err;
1177         }
1178
1179         return skb;
1180 tx_drop:
1181         dev_kfree_skb_any(skb);
1182 err:
1183         return NULL;
1184 }
1185
1186 static struct sk_buff *be_xmit_workarounds(struct be_adapter *adapter,
1187                                            struct sk_buff *skb,
1188                                            struct be_wrb_params *wrb_params)
1189 {
1190         int err;
1191
1192         /* Lancer, SH and BE3 in SRIOV mode have a bug wherein
1193          * packets that are 32b or less may cause a transmit stall
1194          * on that port. The workaround is to pad such packets
1195          * (len <= 32 bytes) to a minimum length of 36b.
1196          */
1197         if (skb->len <= 32) {
1198                 if (skb_put_padto(skb, 36))
1199                         return NULL;
1200         }
1201
1202         if (BEx_chip(adapter) || lancer_chip(adapter)) {
1203                 skb = be_lancer_xmit_workarounds(adapter, skb, wrb_params);
1204                 if (!skb)
1205                         return NULL;
1206         }
1207
1208         /* The stack can send us skbs with length greater than
1209          * what the HW can handle. Trim the extra bytes.
1210          */
1211         WARN_ON_ONCE(skb->len > BE_MAX_GSO_SIZE);
1212         err = pskb_trim(skb, BE_MAX_GSO_SIZE);
1213         WARN_ON(err);
1214
1215         return skb;
1216 }
1217
1218 static void be_xmit_flush(struct be_adapter *adapter, struct be_tx_obj *txo)
1219 {
1220         struct be_queue_info *txq = &txo->q;
1221         struct be_eth_hdr_wrb *hdr = queue_index_node(txq, txo->last_req_hdr);
1222
1223         /* Mark the last request eventable if it hasn't been marked already */
1224         if (!(hdr->dw[2] & cpu_to_le32(TX_HDR_WRB_EVT)))
1225                 hdr->dw[2] |= cpu_to_le32(TX_HDR_WRB_EVT | TX_HDR_WRB_COMPL);
1226
1227         /* compose a dummy wrb if there are odd set of wrbs to notify */
1228         if (!lancer_chip(adapter) && (txo->pend_wrb_cnt & 1)) {
1229                 wrb_fill_dummy(queue_head_node(txq));
1230                 queue_head_inc(txq);
1231                 atomic_inc(&txq->used);
1232                 txo->pend_wrb_cnt++;
1233                 hdr->dw[2] &= ~cpu_to_le32(TX_HDR_WRB_NUM_MASK <<
1234                                            TX_HDR_WRB_NUM_SHIFT);
1235                 hdr->dw[2] |= cpu_to_le32((txo->last_req_wrb_cnt + 1) <<
1236                                           TX_HDR_WRB_NUM_SHIFT);
1237         }
1238         be_txq_notify(adapter, txo, txo->pend_wrb_cnt);
1239         txo->pend_wrb_cnt = 0;
1240 }
1241
1242 /* OS2BMC related */
1243
1244 #define DHCP_CLIENT_PORT        68
1245 #define DHCP_SERVER_PORT        67
1246 #define NET_BIOS_PORT1          137
1247 #define NET_BIOS_PORT2          138
1248 #define DHCPV6_RAS_PORT         547
1249
1250 #define is_mc_allowed_on_bmc(adapter, eh)       \
1251         (!is_multicast_filt_enabled(adapter) && \
1252          is_multicast_ether_addr(eh->h_dest) && \
1253          !is_broadcast_ether_addr(eh->h_dest))
1254
1255 #define is_bc_allowed_on_bmc(adapter, eh)       \
1256         (!is_broadcast_filt_enabled(adapter) && \
1257          is_broadcast_ether_addr(eh->h_dest))
1258
1259 #define is_arp_allowed_on_bmc(adapter, skb)     \
1260         (is_arp(skb) && is_arp_filt_enabled(adapter))
1261
1262 #define is_broadcast_packet(eh, adapter)        \
1263                 (is_multicast_ether_addr(eh->h_dest) && \
1264                 !compare_ether_addr(eh->h_dest, adapter->netdev->broadcast))
1265
1266 #define is_arp(skb)     (skb->protocol == htons(ETH_P_ARP))
1267
1268 #define is_arp_filt_enabled(adapter)    \
1269                 (adapter->bmc_filt_mask & (BMC_FILT_BROADCAST_ARP))
1270
1271 #define is_dhcp_client_filt_enabled(adapter)    \
1272                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_CLIENT)
1273
1274 #define is_dhcp_srvr_filt_enabled(adapter)      \
1275                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_SERVER)
1276
1277 #define is_nbios_filt_enabled(adapter)  \
1278                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_NET_BIOS)
1279
1280 #define is_ipv6_na_filt_enabled(adapter)        \
1281                 (adapter->bmc_filt_mask &       \
1282                         BMC_FILT_MULTICAST_IPV6_NEIGH_ADVER)
1283
1284 #define is_ipv6_ra_filt_enabled(adapter)        \
1285                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RA)
1286
1287 #define is_ipv6_ras_filt_enabled(adapter)       \
1288                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RAS)
1289
1290 #define is_broadcast_filt_enabled(adapter)      \
1291                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST)
1292
1293 #define is_multicast_filt_enabled(adapter)      \
1294                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST)
1295
1296 static bool be_send_pkt_to_bmc(struct be_adapter *adapter,
1297                                struct sk_buff **skb)
1298 {
1299         struct ethhdr *eh = (struct ethhdr *)(*skb)->data;
1300         bool os2bmc = false;
1301
1302         if (!be_is_os2bmc_enabled(adapter))
1303                 goto done;
1304
1305         if (!is_multicast_ether_addr(eh->h_dest))
1306                 goto done;
1307
1308         if (is_mc_allowed_on_bmc(adapter, eh) ||
1309             is_bc_allowed_on_bmc(adapter, eh) ||
1310             is_arp_allowed_on_bmc(adapter, (*skb))) {
1311                 os2bmc = true;
1312                 goto done;
1313         }
1314
1315         if ((*skb)->protocol == htons(ETH_P_IPV6)) {
1316                 struct ipv6hdr *hdr = ipv6_hdr((*skb));
1317                 u8 nexthdr = hdr->nexthdr;
1318
1319                 if (nexthdr == IPPROTO_ICMPV6) {
1320                         struct icmp6hdr *icmp6 = icmp6_hdr((*skb));
1321
1322                         switch (icmp6->icmp6_type) {
1323                         case NDISC_ROUTER_ADVERTISEMENT:
1324                                 os2bmc = is_ipv6_ra_filt_enabled(adapter);
1325                                 goto done;
1326                         case NDISC_NEIGHBOUR_ADVERTISEMENT:
1327                                 os2bmc = is_ipv6_na_filt_enabled(adapter);
1328                                 goto done;
1329                         default:
1330                                 break;
1331                         }
1332                 }
1333         }
1334
1335         if (is_udp_pkt((*skb))) {
1336                 struct udphdr *udp = udp_hdr((*skb));
1337
1338                 switch (ntohs(udp->dest)) {
1339                 case DHCP_CLIENT_PORT:
1340                         os2bmc = is_dhcp_client_filt_enabled(adapter);
1341                         goto done;
1342                 case DHCP_SERVER_PORT:
1343                         os2bmc = is_dhcp_srvr_filt_enabled(adapter);
1344                         goto done;
1345                 case NET_BIOS_PORT1:
1346                 case NET_BIOS_PORT2:
1347                         os2bmc = is_nbios_filt_enabled(adapter);
1348                         goto done;
1349                 case DHCPV6_RAS_PORT:
1350                         os2bmc = is_ipv6_ras_filt_enabled(adapter);
1351                         goto done;
1352                 default:
1353                         break;
1354                 }
1355         }
1356 done:
1357         /* For packets over a vlan, which are destined
1358          * to BMC, asic expects the vlan to be inline in the packet.
1359          */
1360         if (os2bmc)
1361                 *skb = be_insert_vlan_in_pkt(adapter, *skb, NULL);
1362
1363         return os2bmc;
1364 }
1365
1366 static netdev_tx_t be_xmit(struct sk_buff *skb, struct net_device *netdev)
1367 {
1368         struct be_adapter *adapter = netdev_priv(netdev);
1369         u16 q_idx = skb_get_queue_mapping(skb);
1370         struct be_tx_obj *txo = &adapter->tx_obj[q_idx];
1371         struct be_wrb_params wrb_params = { 0 };
1372         bool flush = !skb->xmit_more;
1373         u16 wrb_cnt;
1374
1375         skb = be_xmit_workarounds(adapter, skb, &wrb_params);
1376         if (unlikely(!skb))
1377                 goto drop;
1378
1379         be_get_wrb_params_from_skb(adapter, skb, &wrb_params);
1380
1381         wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1382         if (unlikely(!wrb_cnt)) {
1383                 dev_kfree_skb_any(skb);
1384                 goto drop;
1385         }
1386
1387         /* if os2bmc is enabled and if the pkt is destined to bmc,
1388          * enqueue the pkt a 2nd time with mgmt bit set.
1389          */
1390         if (be_send_pkt_to_bmc(adapter, &skb)) {
1391                 BE_WRB_F_SET(wrb_params.features, OS2BMC, 1);
1392                 wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1393                 if (unlikely(!wrb_cnt))
1394                         goto drop;
1395                 else
1396                         skb_get(skb);
1397         }
1398
1399         if (be_is_txq_full(txo)) {
1400                 netif_stop_subqueue(netdev, q_idx);
1401                 tx_stats(txo)->tx_stops++;
1402         }
1403
1404         if (flush || __netif_subqueue_stopped(netdev, q_idx))
1405                 be_xmit_flush(adapter, txo);
1406
1407         return NETDEV_TX_OK;
1408 drop:
1409         tx_stats(txo)->tx_drv_drops++;
1410         /* Flush the already enqueued tx requests */
1411         if (flush && txo->pend_wrb_cnt)
1412                 be_xmit_flush(adapter, txo);
1413
1414         return NETDEV_TX_OK;
1415 }
1416
1417 static inline bool be_in_all_promisc(struct be_adapter *adapter)
1418 {
1419         return (adapter->if_flags & BE_IF_FLAGS_ALL_PROMISCUOUS) ==
1420                         BE_IF_FLAGS_ALL_PROMISCUOUS;
1421 }
1422
1423 static int be_set_vlan_promisc(struct be_adapter *adapter)
1424 {
1425         struct device *dev = &adapter->pdev->dev;
1426         int status;
1427
1428         if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS)
1429                 return 0;
1430
1431         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, ON);
1432         if (!status) {
1433                 dev_info(dev, "Enabled VLAN promiscuous mode\n");
1434                 adapter->if_flags |= BE_IF_FLAGS_VLAN_PROMISCUOUS;
1435         } else {
1436                 dev_err(dev, "Failed to enable VLAN promiscuous mode\n");
1437         }
1438         return status;
1439 }
1440
1441 static int be_clear_vlan_promisc(struct be_adapter *adapter)
1442 {
1443         struct device *dev = &adapter->pdev->dev;
1444         int status;
1445
1446         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, OFF);
1447         if (!status) {
1448                 dev_info(dev, "Disabling VLAN promiscuous mode\n");
1449                 adapter->if_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
1450         }
1451         return status;
1452 }
1453
1454 /*
1455  * A max of 64 (BE_NUM_VLANS_SUPPORTED) vlans can be configured in BE.
1456  * If the user configures more, place BE in vlan promiscuous mode.
1457  */
1458 static int be_vid_config(struct be_adapter *adapter)
1459 {
1460         struct device *dev = &adapter->pdev->dev;
1461         u16 vids[BE_NUM_VLANS_SUPPORTED];
1462         u16 num = 0, i = 0;
1463         int status = 0;
1464
1465         /* No need to change the VLAN state if the I/F is in promiscuous */
1466         if (adapter->netdev->flags & IFF_PROMISC)
1467                 return 0;
1468
1469         if (adapter->vlans_added > be_max_vlans(adapter))
1470                 return be_set_vlan_promisc(adapter);
1471
1472         if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
1473                 status = be_clear_vlan_promisc(adapter);
1474                 if (status)
1475                         return status;
1476         }
1477         /* Construct VLAN Table to give to HW */
1478         for_each_set_bit(i, adapter->vids, VLAN_N_VID)
1479                 vids[num++] = cpu_to_le16(i);
1480
1481         status = be_cmd_vlan_config(adapter, adapter->if_handle, vids, num, 0);
1482         if (status) {
1483                 dev_err(dev, "Setting HW VLAN filtering failed\n");
1484                 /* Set to VLAN promisc mode as setting VLAN filter failed */
1485                 if (addl_status(status) == MCC_ADDL_STATUS_INSUFFICIENT_VLANS ||
1486                     addl_status(status) ==
1487                                 MCC_ADDL_STATUS_INSUFFICIENT_RESOURCES)
1488                         return be_set_vlan_promisc(adapter);
1489         }
1490         return status;
1491 }
1492
1493 static int be_vlan_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
1494 {
1495         struct be_adapter *adapter = netdev_priv(netdev);
1496         int status = 0;
1497
1498         mutex_lock(&adapter->rx_filter_lock);
1499
1500         /* Packets with VID 0 are always received by Lancer by default */
1501         if (lancer_chip(adapter) && vid == 0)
1502                 goto done;
1503
1504         if (test_bit(vid, adapter->vids))
1505                 goto done;
1506
1507         set_bit(vid, adapter->vids);
1508         adapter->vlans_added++;
1509
1510         status = be_vid_config(adapter);
1511 done:
1512         mutex_unlock(&adapter->rx_filter_lock);
1513         return status;
1514 }
1515
1516 static int be_vlan_rem_vid(struct net_device *netdev, __be16 proto, u16 vid)
1517 {
1518         struct be_adapter *adapter = netdev_priv(netdev);
1519         int status = 0;
1520
1521         mutex_lock(&adapter->rx_filter_lock);
1522
1523         /* Packets with VID 0 are always received by Lancer by default */
1524         if (lancer_chip(adapter) && vid == 0)
1525                 goto done;
1526
1527         if (!test_bit(vid, adapter->vids))
1528                 goto done;
1529
1530         clear_bit(vid, adapter->vids);
1531         adapter->vlans_added--;
1532
1533         status = be_vid_config(adapter);
1534 done:
1535         mutex_unlock(&adapter->rx_filter_lock);
1536         return status;
1537 }
1538
1539 static void be_set_all_promisc(struct be_adapter *adapter)
1540 {
1541         be_cmd_rx_filter(adapter, BE_IF_FLAGS_ALL_PROMISCUOUS, ON);
1542         adapter->if_flags |= BE_IF_FLAGS_ALL_PROMISCUOUS;
1543 }
1544
1545 static void be_set_mc_promisc(struct be_adapter *adapter)
1546 {
1547         int status;
1548
1549         if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS)
1550                 return;
1551
1552         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MCAST_PROMISCUOUS, ON);
1553         if (!status)
1554                 adapter->if_flags |= BE_IF_FLAGS_MCAST_PROMISCUOUS;
1555 }
1556
1557 static void be_set_uc_promisc(struct be_adapter *adapter)
1558 {
1559         int status;
1560
1561         if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS)
1562                 return;
1563
1564         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, ON);
1565         if (!status)
1566                 adapter->if_flags |= BE_IF_FLAGS_PROMISCUOUS;
1567 }
1568
1569 static void be_clear_uc_promisc(struct be_adapter *adapter)
1570 {
1571         int status;
1572
1573         if (!(adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS))
1574                 return;
1575
1576         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, OFF);
1577         if (!status)
1578                 adapter->if_flags &= ~BE_IF_FLAGS_PROMISCUOUS;
1579 }
1580
1581 /* The below 2 functions are the callback args for __dev_mc_sync/dev_uc_sync().
1582  * We use a single callback function for both sync and unsync. We really don't
1583  * add/remove addresses through this callback. But, we use it to detect changes
1584  * to the uc/mc lists. The entire uc/mc list is programmed in be_set_rx_mode().
1585  */
1586 static int be_uc_list_update(struct net_device *netdev,
1587                              const unsigned char *addr)
1588 {
1589         struct be_adapter *adapter = netdev_priv(netdev);
1590
1591         adapter->update_uc_list = true;
1592         return 0;
1593 }
1594
1595 static int be_mc_list_update(struct net_device *netdev,
1596                              const unsigned char *addr)
1597 {
1598         struct be_adapter *adapter = netdev_priv(netdev);
1599
1600         adapter->update_mc_list = true;
1601         return 0;
1602 }
1603
1604 static void be_set_mc_list(struct be_adapter *adapter)
1605 {
1606         struct net_device *netdev = adapter->netdev;
1607         struct netdev_hw_addr *ha;
1608         bool mc_promisc = false;
1609         int status;
1610
1611         netif_addr_lock_bh(netdev);
1612         __dev_mc_sync(netdev, be_mc_list_update, be_mc_list_update);
1613
1614         if (netdev->flags & IFF_PROMISC) {
1615                 adapter->update_mc_list = false;
1616         } else if (netdev->flags & IFF_ALLMULTI ||
1617                    netdev_mc_count(netdev) > be_max_mc(adapter)) {
1618                 /* Enable multicast promisc if num configured exceeds
1619                  * what we support
1620                  */
1621                 mc_promisc = true;
1622                 adapter->update_mc_list = false;
1623         } else if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS) {
1624                 /* Update mc-list unconditionally if the iface was previously
1625                  * in mc-promisc mode and now is out of that mode.
1626                  */
1627                 adapter->update_mc_list = true;
1628         }
1629
1630         if (adapter->update_mc_list) {
1631                 int i = 0;
1632
1633                 /* cache the mc-list in adapter */
1634                 netdev_for_each_mc_addr(ha, netdev) {
1635                         ether_addr_copy(adapter->mc_list[i].mac, ha->addr);
1636                         i++;
1637                 }
1638                 adapter->mc_count = netdev_mc_count(netdev);
1639         }
1640         netif_addr_unlock_bh(netdev);
1641
1642         if (mc_promisc) {
1643                 be_set_mc_promisc(adapter);
1644         } else if (adapter->update_mc_list) {
1645                 status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, ON);
1646                 if (!status)
1647                         adapter->if_flags &= ~BE_IF_FLAGS_MCAST_PROMISCUOUS;
1648                 else
1649                         be_set_mc_promisc(adapter);
1650
1651                 adapter->update_mc_list = false;
1652         }
1653 }
1654
1655 static void be_clear_mc_list(struct be_adapter *adapter)
1656 {
1657         struct net_device *netdev = adapter->netdev;
1658
1659         __dev_mc_unsync(netdev, NULL);
1660         be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, OFF);
1661         adapter->mc_count = 0;
1662 }
1663
1664 static int be_uc_mac_add(struct be_adapter *adapter, int uc_idx)
1665 {
1666         if (ether_addr_equal(adapter->uc_list[uc_idx].mac, adapter->dev_mac)) {
1667                 adapter->pmac_id[uc_idx + 1] = adapter->pmac_id[0];
1668                 return 0;
1669         }
1670
1671         return be_cmd_pmac_add(adapter, adapter->uc_list[uc_idx].mac,
1672                                adapter->if_handle,
1673                                &adapter->pmac_id[uc_idx + 1], 0);
1674 }
1675
1676 static void be_uc_mac_del(struct be_adapter *adapter, int pmac_id)
1677 {
1678         if (pmac_id == adapter->pmac_id[0])
1679                 return;
1680
1681         be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
1682 }
1683
1684 static void be_set_uc_list(struct be_adapter *adapter)
1685 {
1686         struct net_device *netdev = adapter->netdev;
1687         struct netdev_hw_addr *ha;
1688         bool uc_promisc = false;
1689         int curr_uc_macs = 0, i;
1690
1691         netif_addr_lock_bh(netdev);
1692         __dev_uc_sync(netdev, be_uc_list_update, be_uc_list_update);
1693
1694         if (netdev->flags & IFF_PROMISC) {
1695                 adapter->update_uc_list = false;
1696         } else if (netdev_uc_count(netdev) > (be_max_uc(adapter) - 1)) {
1697                 uc_promisc = true;
1698                 adapter->update_uc_list = false;
1699         }  else if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS) {
1700                 /* Update uc-list unconditionally if the iface was previously
1701                  * in uc-promisc mode and now is out of that mode.
1702                  */
1703                 adapter->update_uc_list = true;
1704         }
1705
1706         if (adapter->update_uc_list) {
1707                 /* cache the uc-list in adapter array */
1708                 i = 0;
1709                 netdev_for_each_uc_addr(ha, netdev) {
1710                         ether_addr_copy(adapter->uc_list[i].mac, ha->addr);
1711                         i++;
1712                 }
1713                 curr_uc_macs = netdev_uc_count(netdev);
1714         }
1715         netif_addr_unlock_bh(netdev);
1716
1717         if (uc_promisc) {
1718                 be_set_uc_promisc(adapter);
1719         } else if (adapter->update_uc_list) {
1720                 be_clear_uc_promisc(adapter);
1721
1722                 for (i = 0; i < adapter->uc_macs; i++)
1723                         be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1724
1725                 for (i = 0; i < curr_uc_macs; i++)
1726                         be_uc_mac_add(adapter, i);
1727                 adapter->uc_macs = curr_uc_macs;
1728                 adapter->update_uc_list = false;
1729         }
1730 }
1731
1732 static void be_clear_uc_list(struct be_adapter *adapter)
1733 {
1734         struct net_device *netdev = adapter->netdev;
1735         int i;
1736
1737         __dev_uc_unsync(netdev, NULL);
1738         for (i = 0; i < adapter->uc_macs; i++)
1739                 be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1740
1741         adapter->uc_macs = 0;
1742 }
1743
1744 static void __be_set_rx_mode(struct be_adapter *adapter)
1745 {
1746         struct net_device *netdev = adapter->netdev;
1747
1748         mutex_lock(&adapter->rx_filter_lock);
1749
1750         if (netdev->flags & IFF_PROMISC) {
1751                 if (!be_in_all_promisc(adapter))
1752                         be_set_all_promisc(adapter);
1753         } else if (be_in_all_promisc(adapter)) {
1754                 /* We need to re-program the vlan-list or clear
1755                  * vlan-promisc mode (if needed) when the interface
1756                  * comes out of promisc mode.
1757                  */
1758                 be_vid_config(adapter);
1759         }
1760
1761         be_set_uc_list(adapter);
1762         be_set_mc_list(adapter);
1763
1764         mutex_unlock(&adapter->rx_filter_lock);
1765 }
1766
1767 static void be_work_set_rx_mode(struct work_struct *work)
1768 {
1769         struct be_cmd_work *cmd_work =
1770                                 container_of(work, struct be_cmd_work, work);
1771
1772         __be_set_rx_mode(cmd_work->adapter);
1773         kfree(cmd_work);
1774 }
1775
1776 static int be_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
1777 {
1778         struct be_adapter *adapter = netdev_priv(netdev);
1779         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1780         int status;
1781
1782         if (!sriov_enabled(adapter))
1783                 return -EPERM;
1784
1785         if (!is_valid_ether_addr(mac) || vf >= adapter->num_vfs)
1786                 return -EINVAL;
1787
1788         /* Proceed further only if user provided MAC is different
1789          * from active MAC
1790          */
1791         if (ether_addr_equal(mac, vf_cfg->mac_addr))
1792                 return 0;
1793
1794         if (BEx_chip(adapter)) {
1795                 be_cmd_pmac_del(adapter, vf_cfg->if_handle, vf_cfg->pmac_id,
1796                                 vf + 1);
1797
1798                 status = be_cmd_pmac_add(adapter, mac, vf_cfg->if_handle,
1799                                          &vf_cfg->pmac_id, vf + 1);
1800         } else {
1801                 status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
1802                                         vf + 1);
1803         }
1804
1805         if (status) {
1806                 dev_err(&adapter->pdev->dev, "MAC %pM set on VF %d Failed: %#x",
1807                         mac, vf, status);
1808                 return be_cmd_status(status);
1809         }
1810
1811         ether_addr_copy(vf_cfg->mac_addr, mac);
1812
1813         return 0;
1814 }
1815
1816 static int be_get_vf_config(struct net_device *netdev, int vf,
1817                             struct ifla_vf_info *vi)
1818 {
1819         struct be_adapter *adapter = netdev_priv(netdev);
1820         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1821
1822         if (!sriov_enabled(adapter))
1823                 return -EPERM;
1824
1825         if (vf >= adapter->num_vfs)
1826                 return -EINVAL;
1827
1828         vi->vf = vf;
1829         vi->max_tx_rate = vf_cfg->tx_rate;
1830         vi->min_tx_rate = 0;
1831         vi->vlan = vf_cfg->vlan_tag & VLAN_VID_MASK;
1832         vi->qos = vf_cfg->vlan_tag >> VLAN_PRIO_SHIFT;
1833         memcpy(&vi->mac, vf_cfg->mac_addr, ETH_ALEN);
1834         vi->linkstate = adapter->vf_cfg[vf].plink_tracking;
1835         vi->spoofchk = adapter->vf_cfg[vf].spoofchk;
1836
1837         return 0;
1838 }
1839
1840 static int be_set_vf_tvt(struct be_adapter *adapter, int vf, u16 vlan)
1841 {
1842         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1843         u16 vids[BE_NUM_VLANS_SUPPORTED];
1844         int vf_if_id = vf_cfg->if_handle;
1845         int status;
1846
1847         /* Enable Transparent VLAN Tagging */
1848         status = be_cmd_set_hsw_config(adapter, vlan, vf + 1, vf_if_id, 0, 0);
1849         if (status)
1850                 return status;
1851
1852         /* Clear pre-programmed VLAN filters on VF if any, if TVT is enabled */
1853         vids[0] = 0;
1854         status = be_cmd_vlan_config(adapter, vf_if_id, vids, 1, vf + 1);
1855         if (!status)
1856                 dev_info(&adapter->pdev->dev,
1857                          "Cleared guest VLANs on VF%d", vf);
1858
1859         /* After TVT is enabled, disallow VFs to program VLAN filters */
1860         if (vf_cfg->privileges & BE_PRIV_FILTMGMT) {
1861                 status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges &
1862                                                   ~BE_PRIV_FILTMGMT, vf + 1);
1863                 if (!status)
1864                         vf_cfg->privileges &= ~BE_PRIV_FILTMGMT;
1865         }
1866         return 0;
1867 }
1868
1869 static int be_clear_vf_tvt(struct be_adapter *adapter, int vf)
1870 {
1871         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1872         struct device *dev = &adapter->pdev->dev;
1873         int status;
1874
1875         /* Reset Transparent VLAN Tagging. */
1876         status = be_cmd_set_hsw_config(adapter, BE_RESET_VLAN_TAG_ID, vf + 1,
1877                                        vf_cfg->if_handle, 0, 0);
1878         if (status)
1879                 return status;
1880
1881         /* Allow VFs to program VLAN filtering */
1882         if (!(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
1883                 status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges |
1884                                                   BE_PRIV_FILTMGMT, vf + 1);
1885                 if (!status) {
1886                         vf_cfg->privileges |= BE_PRIV_FILTMGMT;
1887                         dev_info(dev, "VF%d: FILTMGMT priv enabled", vf);
1888                 }
1889         }
1890
1891         dev_info(dev,
1892                  "Disable/re-enable i/f in VM to clear Transparent VLAN tag");
1893         return 0;
1894 }
1895
1896 static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos,
1897                           __be16 vlan_proto)
1898 {
1899         struct be_adapter *adapter = netdev_priv(netdev);
1900         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1901         int status;
1902
1903         if (!sriov_enabled(adapter))
1904                 return -EPERM;
1905
1906         if (vf >= adapter->num_vfs || vlan > 4095 || qos > 7)
1907                 return -EINVAL;
1908
1909         if (vlan_proto != htons(ETH_P_8021Q))
1910                 return -EPROTONOSUPPORT;
1911
1912         if (vlan || qos) {
1913                 vlan |= qos << VLAN_PRIO_SHIFT;
1914                 status = be_set_vf_tvt(adapter, vf, vlan);
1915         } else {
1916                 status = be_clear_vf_tvt(adapter, vf);
1917         }
1918
1919         if (status) {
1920                 dev_err(&adapter->pdev->dev,
1921                         "VLAN %d config on VF %d failed : %#x\n", vlan, vf,
1922                         status);
1923                 return be_cmd_status(status);
1924         }
1925
1926         vf_cfg->vlan_tag = vlan;
1927         return 0;
1928 }
1929
1930 static int be_set_vf_tx_rate(struct net_device *netdev, int vf,
1931                              int min_tx_rate, int max_tx_rate)
1932 {
1933         struct be_adapter *adapter = netdev_priv(netdev);
1934         struct device *dev = &adapter->pdev->dev;
1935         int percent_rate, status = 0;
1936         u16 link_speed = 0;
1937         u8 link_status;
1938
1939         if (!sriov_enabled(adapter))
1940                 return -EPERM;
1941
1942         if (vf >= adapter->num_vfs)
1943                 return -EINVAL;
1944
1945         if (min_tx_rate)
1946                 return -EINVAL;
1947
1948         if (!max_tx_rate)
1949                 goto config_qos;
1950
1951         status = be_cmd_link_status_query(adapter, &link_speed,
1952                                           &link_status, 0);
1953         if (status)
1954                 goto err;
1955
1956         if (!link_status) {
1957                 dev_err(dev, "TX-rate setting not allowed when link is down\n");
1958                 status = -ENETDOWN;
1959                 goto err;
1960         }
1961
1962         if (max_tx_rate < 100 || max_tx_rate > link_speed) {
1963                 dev_err(dev, "TX-rate must be between 100 and %d Mbps\n",
1964                         link_speed);
1965                 status = -EINVAL;
1966                 goto err;
1967         }
1968
1969         /* On Skyhawk the QOS setting must be done only as a % value */
1970         percent_rate = link_speed / 100;
1971         if (skyhawk_chip(adapter) && (max_tx_rate % percent_rate)) {
1972                 dev_err(dev, "TX-rate must be a multiple of %d Mbps\n",
1973                         percent_rate);
1974                 status = -EINVAL;
1975                 goto err;
1976         }
1977
1978 config_qos:
1979         status = be_cmd_config_qos(adapter, max_tx_rate, link_speed, vf + 1);
1980         if (status)
1981                 goto err;
1982
1983         adapter->vf_cfg[vf].tx_rate = max_tx_rate;
1984         return 0;
1985
1986 err:
1987         dev_err(dev, "TX-rate setting of %dMbps on VF%d failed\n",
1988                 max_tx_rate, vf);
1989         return be_cmd_status(status);
1990 }
1991
1992 static int be_set_vf_link_state(struct net_device *netdev, int vf,
1993                                 int link_state)
1994 {
1995         struct be_adapter *adapter = netdev_priv(netdev);
1996         int status;
1997
1998         if (!sriov_enabled(adapter))
1999                 return -EPERM;
2000
2001         if (vf >= adapter->num_vfs)
2002                 return -EINVAL;
2003
2004         status = be_cmd_set_logical_link_config(adapter, link_state, vf+1);
2005         if (status) {
2006                 dev_err(&adapter->pdev->dev,
2007                         "Link state change on VF %d failed: %#x\n", vf, status);
2008                 return be_cmd_status(status);
2009         }
2010
2011         adapter->vf_cfg[vf].plink_tracking = link_state;
2012
2013         return 0;
2014 }
2015
2016 static int be_set_vf_spoofchk(struct net_device *netdev, int vf, bool enable)
2017 {
2018         struct be_adapter *adapter = netdev_priv(netdev);
2019         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
2020         u8 spoofchk;
2021         int status;
2022
2023         if (!sriov_enabled(adapter))
2024                 return -EPERM;
2025
2026         if (vf >= adapter->num_vfs)
2027                 return -EINVAL;
2028
2029         if (BEx_chip(adapter))
2030                 return -EOPNOTSUPP;
2031
2032         if (enable == vf_cfg->spoofchk)
2033                 return 0;
2034
2035         spoofchk = enable ? ENABLE_MAC_SPOOFCHK : DISABLE_MAC_SPOOFCHK;
2036
2037         status = be_cmd_set_hsw_config(adapter, 0, vf + 1, vf_cfg->if_handle,
2038                                        0, spoofchk);
2039         if (status) {
2040                 dev_err(&adapter->pdev->dev,
2041                         "Spoofchk change on VF %d failed: %#x\n", vf, status);
2042                 return be_cmd_status(status);
2043         }
2044
2045         vf_cfg->spoofchk = enable;
2046         return 0;
2047 }
2048
2049 static void be_aic_update(struct be_aic_obj *aic, u64 rx_pkts, u64 tx_pkts,
2050                           ulong now)
2051 {
2052         aic->rx_pkts_prev = rx_pkts;
2053         aic->tx_reqs_prev = tx_pkts;
2054         aic->jiffies = now;
2055 }
2056
2057 static int be_get_new_eqd(struct be_eq_obj *eqo)
2058 {
2059         struct be_adapter *adapter = eqo->adapter;
2060         int eqd, start;
2061         struct be_aic_obj *aic;
2062         struct be_rx_obj *rxo;
2063         struct be_tx_obj *txo;
2064         u64 rx_pkts = 0, tx_pkts = 0;
2065         ulong now;
2066         u32 pps, delta;
2067         int i;
2068
2069         aic = &adapter->aic_obj[eqo->idx];
2070         if (!aic->enable) {
2071                 if (aic->jiffies)
2072                         aic->jiffies = 0;
2073                 eqd = aic->et_eqd;
2074                 return eqd;
2075         }
2076
2077         for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
2078                 do {
2079                         start = u64_stats_fetch_begin_irq(&rxo->stats.sync);
2080                         rx_pkts += rxo->stats.rx_pkts;
2081                 } while (u64_stats_fetch_retry_irq(&rxo->stats.sync, start));
2082         }
2083
2084         for_all_tx_queues_on_eq(adapter, eqo, txo, i) {
2085                 do {
2086                         start = u64_stats_fetch_begin_irq(&txo->stats.sync);
2087                         tx_pkts += txo->stats.tx_reqs;
2088                 } while (u64_stats_fetch_retry_irq(&txo->stats.sync, start));
2089         }
2090
2091         /* Skip, if wrapped around or first calculation */
2092         now = jiffies;
2093         if (!aic->jiffies || time_before(now, aic->jiffies) ||
2094             rx_pkts < aic->rx_pkts_prev ||
2095             tx_pkts < aic->tx_reqs_prev) {
2096                 be_aic_update(aic, rx_pkts, tx_pkts, now);
2097                 return aic->prev_eqd;
2098         }
2099
2100         delta = jiffies_to_msecs(now - aic->jiffies);
2101         if (delta == 0)
2102                 return aic->prev_eqd;
2103
2104         pps = (((u32)(rx_pkts - aic->rx_pkts_prev) * 1000) / delta) +
2105                 (((u32)(tx_pkts - aic->tx_reqs_prev) * 1000) / delta);
2106         eqd = (pps / 15000) << 2;
2107
2108         if (eqd < 8)
2109                 eqd = 0;
2110         eqd = min_t(u32, eqd, aic->max_eqd);
2111         eqd = max_t(u32, eqd, aic->min_eqd);
2112
2113         be_aic_update(aic, rx_pkts, tx_pkts, now);
2114
2115         return eqd;
2116 }
2117
2118 /* For Skyhawk-R only */
2119 static u32 be_get_eq_delay_mult_enc(struct be_eq_obj *eqo)
2120 {
2121         struct be_adapter *adapter = eqo->adapter;
2122         struct be_aic_obj *aic = &adapter->aic_obj[eqo->idx];
2123         ulong now = jiffies;
2124         int eqd;
2125         u32 mult_enc;
2126
2127         if (!aic->enable)
2128                 return 0;
2129
2130         if (jiffies_to_msecs(now - aic->jiffies) < 1)
2131                 eqd = aic->prev_eqd;
2132         else
2133                 eqd = be_get_new_eqd(eqo);
2134
2135         if (eqd > 100)
2136                 mult_enc = R2I_DLY_ENC_1;
2137         else if (eqd > 60)
2138                 mult_enc = R2I_DLY_ENC_2;
2139         else if (eqd > 20)
2140                 mult_enc = R2I_DLY_ENC_3;
2141         else
2142                 mult_enc = R2I_DLY_ENC_0;
2143
2144         aic->prev_eqd = eqd;
2145
2146         return mult_enc;
2147 }
2148
2149 void be_eqd_update(struct be_adapter *adapter, bool force_update)
2150 {
2151         struct be_set_eqd set_eqd[MAX_EVT_QS];
2152         struct be_aic_obj *aic;
2153         struct be_eq_obj *eqo;
2154         int i, num = 0, eqd;
2155
2156         for_all_evt_queues(adapter, eqo, i) {
2157                 aic = &adapter->aic_obj[eqo->idx];
2158                 eqd = be_get_new_eqd(eqo);
2159                 if (force_update || eqd != aic->prev_eqd) {
2160                         set_eqd[num].delay_multiplier = (eqd * 65)/100;
2161                         set_eqd[num].eq_id = eqo->q.id;
2162                         aic->prev_eqd = eqd;
2163                         num++;
2164                 }
2165         }
2166
2167         if (num)
2168                 be_cmd_modify_eqd(adapter, set_eqd, num);
2169 }
2170
2171 static void be_rx_stats_update(struct be_rx_obj *rxo,
2172                                struct be_rx_compl_info *rxcp)
2173 {
2174         struct be_rx_stats *stats = rx_stats(rxo);
2175
2176         u64_stats_update_begin(&stats->sync);
2177         stats->rx_compl++;
2178         stats->rx_bytes += rxcp->pkt_size;
2179         stats->rx_pkts++;
2180         if (rxcp->tunneled)
2181                 stats->rx_vxlan_offload_pkts++;
2182         if (rxcp->pkt_type == BE_MULTICAST_PACKET)
2183                 stats->rx_mcast_pkts++;
2184         if (rxcp->err)
2185                 stats->rx_compl_err++;
2186         u64_stats_update_end(&stats->sync);
2187 }
2188
2189 static inline bool csum_passed(struct be_rx_compl_info *rxcp)
2190 {
2191         /* L4 checksum is not reliable for non TCP/UDP packets.
2192          * Also ignore ipcksm for ipv6 pkts
2193          */
2194         return (rxcp->tcpf || rxcp->udpf) && rxcp->l4_csum &&
2195                 (rxcp->ip_csum || rxcp->ipv6) && !rxcp->err;
2196 }
2197
2198 static struct be_rx_page_info *get_rx_page_info(struct be_rx_obj *rxo)
2199 {
2200         struct be_adapter *adapter = rxo->adapter;
2201         struct be_rx_page_info *rx_page_info;
2202         struct be_queue_info *rxq = &rxo->q;
2203         u32 frag_idx = rxq->tail;
2204
2205         rx_page_info = &rxo->page_info_tbl[frag_idx];
2206         BUG_ON(!rx_page_info->page);
2207
2208         if (rx_page_info->last_frag) {
2209                 dma_unmap_page(&adapter->pdev->dev,
2210                                dma_unmap_addr(rx_page_info, bus),
2211                                adapter->big_page_size, DMA_FROM_DEVICE);
2212                 rx_page_info->last_frag = false;
2213         } else {
2214                 dma_sync_single_for_cpu(&adapter->pdev->dev,
2215                                         dma_unmap_addr(rx_page_info, bus),
2216                                         rx_frag_size, DMA_FROM_DEVICE);
2217         }
2218
2219         queue_tail_inc(rxq);
2220         atomic_dec(&rxq->used);
2221         return rx_page_info;
2222 }
2223
2224 /* Throwaway the data in the Rx completion */
2225 static void be_rx_compl_discard(struct be_rx_obj *rxo,
2226                                 struct be_rx_compl_info *rxcp)
2227 {
2228         struct be_rx_page_info *page_info;
2229         u16 i, num_rcvd = rxcp->num_rcvd;
2230
2231         for (i = 0; i < num_rcvd; i++) {
2232                 page_info = get_rx_page_info(rxo);
2233                 put_page(page_info->page);
2234                 memset(page_info, 0, sizeof(*page_info));
2235         }
2236 }
2237
2238 /*
2239  * skb_fill_rx_data forms a complete skb for an ether frame
2240  * indicated by rxcp.
2241  */
2242 static void skb_fill_rx_data(struct be_rx_obj *rxo, struct sk_buff *skb,
2243                              struct be_rx_compl_info *rxcp)
2244 {
2245         struct be_rx_page_info *page_info;
2246         u16 i, j;
2247         u16 hdr_len, curr_frag_len, remaining;
2248         u8 *start;
2249
2250         page_info = get_rx_page_info(rxo);
2251         start = page_address(page_info->page) + page_info->page_offset;
2252         prefetch(start);
2253
2254         /* Copy data in the first descriptor of this completion */
2255         curr_frag_len = min(rxcp->pkt_size, rx_frag_size);
2256
2257         skb->len = curr_frag_len;
2258         if (curr_frag_len <= BE_HDR_LEN) { /* tiny packet */
2259                 memcpy(skb->data, start, curr_frag_len);
2260                 /* Complete packet has now been moved to data */
2261                 put_page(page_info->page);
2262                 skb->data_len = 0;
2263                 skb->tail += curr_frag_len;
2264         } else {
2265                 hdr_len = ETH_HLEN;
2266                 memcpy(skb->data, start, hdr_len);
2267                 skb_shinfo(skb)->nr_frags = 1;
2268                 skb_frag_set_page(skb, 0, page_info->page);
2269                 skb_shinfo(skb)->frags[0].page_offset =
2270                                         page_info->page_offset + hdr_len;
2271                 skb_frag_size_set(&skb_shinfo(skb)->frags[0],
2272                                   curr_frag_len - hdr_len);
2273                 skb->data_len = curr_frag_len - hdr_len;
2274                 skb->truesize += rx_frag_size;
2275                 skb->tail += hdr_len;
2276         }
2277         page_info->page = NULL;
2278
2279         if (rxcp->pkt_size <= rx_frag_size) {
2280                 BUG_ON(rxcp->num_rcvd != 1);
2281                 return;
2282         }
2283
2284         /* More frags present for this completion */
2285         remaining = rxcp->pkt_size - curr_frag_len;
2286         for (i = 1, j = 0; i < rxcp->num_rcvd; i++) {
2287                 page_info = get_rx_page_info(rxo);
2288                 curr_frag_len = min(remaining, rx_frag_size);
2289
2290                 /* Coalesce all frags from the same physical page in one slot */
2291                 if (page_info->page_offset == 0) {
2292                         /* Fresh page */
2293                         j++;
2294                         skb_frag_set_page(skb, j, page_info->page);
2295                         skb_shinfo(skb)->frags[j].page_offset =
2296                                                         page_info->page_offset;
2297                         skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2298                         skb_shinfo(skb)->nr_frags++;
2299                 } else {
2300                         put_page(page_info->page);
2301                 }
2302
2303                 skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2304                 skb->len += curr_frag_len;
2305                 skb->data_len += curr_frag_len;
2306                 skb->truesize += rx_frag_size;
2307                 remaining -= curr_frag_len;
2308                 page_info->page = NULL;
2309         }
2310         BUG_ON(j > MAX_SKB_FRAGS);
2311 }
2312
2313 /* Process the RX completion indicated by rxcp when GRO is disabled */
2314 static void be_rx_compl_process(struct be_rx_obj *rxo, struct napi_struct *napi,
2315                                 struct be_rx_compl_info *rxcp)
2316 {
2317         struct be_adapter *adapter = rxo->adapter;
2318         struct net_device *netdev = adapter->netdev;
2319         struct sk_buff *skb;
2320
2321         skb = netdev_alloc_skb_ip_align(netdev, BE_RX_SKB_ALLOC_SIZE);
2322         if (unlikely(!skb)) {
2323                 rx_stats(rxo)->rx_drops_no_skbs++;
2324                 be_rx_compl_discard(rxo, rxcp);
2325                 return;
2326         }
2327
2328         skb_fill_rx_data(rxo, skb, rxcp);
2329
2330         if (likely((netdev->features & NETIF_F_RXCSUM) && csum_passed(rxcp)))
2331                 skb->ip_summed = CHECKSUM_UNNECESSARY;
2332         else
2333                 skb_checksum_none_assert(skb);
2334
2335         skb->protocol = eth_type_trans(skb, netdev);
2336         skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2337         if (netdev->features & NETIF_F_RXHASH)
2338                 skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2339
2340         skb->csum_level = rxcp->tunneled;
2341         skb_mark_napi_id(skb, napi);
2342
2343         if (rxcp->vlanf)
2344                 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2345
2346         netif_receive_skb(skb);
2347 }
2348
2349 /* Process the RX completion indicated by rxcp when GRO is enabled */
2350 static void be_rx_compl_process_gro(struct be_rx_obj *rxo,
2351                                     struct napi_struct *napi,
2352                                     struct be_rx_compl_info *rxcp)
2353 {
2354         struct be_adapter *adapter = rxo->adapter;
2355         struct be_rx_page_info *page_info;
2356         struct sk_buff *skb = NULL;
2357         u16 remaining, curr_frag_len;
2358         u16 i, j;
2359
2360         skb = napi_get_frags(napi);
2361         if (!skb) {
2362                 be_rx_compl_discard(rxo, rxcp);
2363                 return;
2364         }
2365
2366         remaining = rxcp->pkt_size;
2367         for (i = 0, j = -1; i < rxcp->num_rcvd; i++) {
2368                 page_info = get_rx_page_info(rxo);
2369
2370                 curr_frag_len = min(remaining, rx_frag_size);
2371
2372                 /* Coalesce all frags from the same physical page in one slot */
2373                 if (i == 0 || page_info->page_offset == 0) {
2374                         /* First frag or Fresh page */
2375                         j++;
2376                         skb_frag_set_page(skb, j, page_info->page);
2377                         skb_shinfo(skb)->frags[j].page_offset =
2378                                                         page_info->page_offset;
2379                         skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2380                 } else {
2381                         put_page(page_info->page);
2382                 }
2383                 skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2384                 skb->truesize += rx_frag_size;
2385                 remaining -= curr_frag_len;
2386                 memset(page_info, 0, sizeof(*page_info));
2387         }
2388         BUG_ON(j > MAX_SKB_FRAGS);
2389
2390         skb_shinfo(skb)->nr_frags = j + 1;
2391         skb->len = rxcp->pkt_size;
2392         skb->data_len = rxcp->pkt_size;
2393         skb->ip_summed = CHECKSUM_UNNECESSARY;
2394         skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2395         if (adapter->netdev->features & NETIF_F_RXHASH)
2396                 skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2397
2398         skb->csum_level = rxcp->tunneled;
2399
2400         if (rxcp->vlanf)
2401                 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2402
2403         napi_gro_frags(napi);
2404 }
2405
2406 static void be_parse_rx_compl_v1(struct be_eth_rx_compl *compl,
2407                                  struct be_rx_compl_info *rxcp)
2408 {
2409         rxcp->pkt_size = GET_RX_COMPL_V1_BITS(pktsize, compl);
2410         rxcp->vlanf = GET_RX_COMPL_V1_BITS(vtp, compl);
2411         rxcp->err = GET_RX_COMPL_V1_BITS(err, compl);
2412         rxcp->tcpf = GET_RX_COMPL_V1_BITS(tcpf, compl);
2413         rxcp->udpf = GET_RX_COMPL_V1_BITS(udpf, compl);
2414         rxcp->ip_csum = GET_RX_COMPL_V1_BITS(ipcksm, compl);
2415         rxcp->l4_csum = GET_RX_COMPL_V1_BITS(l4_cksm, compl);
2416         rxcp->ipv6 = GET_RX_COMPL_V1_BITS(ip_version, compl);
2417         rxcp->num_rcvd = GET_RX_COMPL_V1_BITS(numfrags, compl);
2418         rxcp->pkt_type = GET_RX_COMPL_V1_BITS(cast_enc, compl);
2419         rxcp->rss_hash = GET_RX_COMPL_V1_BITS(rsshash, compl);
2420         if (rxcp->vlanf) {
2421                 rxcp->qnq = GET_RX_COMPL_V1_BITS(qnq, compl);
2422                 rxcp->vlan_tag = GET_RX_COMPL_V1_BITS(vlan_tag, compl);
2423         }
2424         rxcp->port = GET_RX_COMPL_V1_BITS(port, compl);
2425         rxcp->tunneled =
2426                 GET_RX_COMPL_V1_BITS(tunneled, compl);
2427 }
2428
2429 static void be_parse_rx_compl_v0(struct be_eth_rx_compl *compl,
2430                                  struct be_rx_compl_info *rxcp)
2431 {
2432         rxcp->pkt_size = GET_RX_COMPL_V0_BITS(pktsize, compl);
2433         rxcp->vlanf = GET_RX_COMPL_V0_BITS(vtp, compl);
2434         rxcp->err = GET_RX_COMPL_V0_BITS(err, compl);
2435         rxcp->tcpf = GET_RX_COMPL_V0_BITS(tcpf, compl);
2436         rxcp->udpf = GET_RX_COMPL_V0_BITS(udpf, compl);
2437         rxcp->ip_csum = GET_RX_COMPL_V0_BITS(ipcksm, compl);
2438         rxcp->l4_csum = GET_RX_COMPL_V0_BITS(l4_cksm, compl);
2439         rxcp->ipv6 = GET_RX_COMPL_V0_BITS(ip_version, compl);
2440         rxcp->num_rcvd = GET_RX_COMPL_V0_BITS(numfrags, compl);
2441         rxcp->pkt_type = GET_RX_COMPL_V0_BITS(cast_enc, compl);
2442         rxcp->rss_hash = GET_RX_COMPL_V0_BITS(rsshash, compl);
2443         if (rxcp->vlanf) {
2444                 rxcp->qnq = GET_RX_COMPL_V0_BITS(qnq, compl);
2445                 rxcp->vlan_tag = GET_RX_COMPL_V0_BITS(vlan_tag, compl);
2446         }
2447         rxcp->port = GET_RX_COMPL_V0_BITS(port, compl);
2448         rxcp->ip_frag = GET_RX_COMPL_V0_BITS(ip_frag, compl);
2449 }
2450
2451 static struct be_rx_compl_info *be_rx_compl_get(struct be_rx_obj *rxo)
2452 {
2453         struct be_eth_rx_compl *compl = queue_tail_node(&rxo->cq);
2454         struct be_rx_compl_info *rxcp = &rxo->rxcp;
2455         struct be_adapter *adapter = rxo->adapter;
2456
2457         /* For checking the valid bit it is Ok to use either definition as the
2458          * valid bit is at the same position in both v0 and v1 Rx compl */
2459         if (compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] == 0)
2460                 return NULL;
2461
2462         rmb();
2463         be_dws_le_to_cpu(compl, sizeof(*compl));
2464
2465         if (adapter->be3_native)
2466                 be_parse_rx_compl_v1(compl, rxcp);
2467         else
2468                 be_parse_rx_compl_v0(compl, rxcp);
2469
2470         if (rxcp->ip_frag)
2471                 rxcp->l4_csum = 0;
2472
2473         if (rxcp->vlanf) {
2474                 /* In QNQ modes, if qnq bit is not set, then the packet was
2475                  * tagged only with the transparent outer vlan-tag and must
2476                  * not be treated as a vlan packet by host
2477                  */
2478                 if (be_is_qnq_mode(adapter) && !rxcp->qnq)
2479                         rxcp->vlanf = 0;
2480
2481                 if (!lancer_chip(adapter))
2482                         rxcp->vlan_tag = swab16(rxcp->vlan_tag);
2483
2484                 if (adapter->pvid == (rxcp->vlan_tag & VLAN_VID_MASK) &&
2485                     !test_bit(rxcp->vlan_tag, adapter->vids))
2486                         rxcp->vlanf = 0;
2487         }
2488
2489         /* As the compl has been parsed, reset it; we wont touch it again */
2490         compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] = 0;
2491
2492         queue_tail_inc(&rxo->cq);
2493         return rxcp;
2494 }
2495
2496 static inline struct page *be_alloc_pages(u32 size, gfp_t gfp)
2497 {
2498         u32 order = get_order(size);
2499
2500         if (order > 0)
2501                 gfp |= __GFP_COMP;
2502         return  alloc_pages(gfp, order);
2503 }
2504
2505 /*
2506  * Allocate a page, split it to fragments of size rx_frag_size and post as
2507  * receive buffers to BE
2508  */
2509 static void be_post_rx_frags(struct be_rx_obj *rxo, gfp_t gfp, u32 frags_needed)
2510 {
2511         struct be_adapter *adapter = rxo->adapter;
2512         struct be_rx_page_info *page_info = NULL, *prev_page_info = NULL;
2513         struct be_queue_info *rxq = &rxo->q;
2514         struct page *pagep = NULL;
2515         struct device *dev = &adapter->pdev->dev;
2516         struct be_eth_rx_d *rxd;
2517         u64 page_dmaaddr = 0, frag_dmaaddr;
2518         u32 posted, page_offset = 0, notify = 0;
2519
2520         page_info = &rxo->page_info_tbl[rxq->head];
2521         for (posted = 0; posted < frags_needed && !page_info->page; posted++) {
2522                 if (!pagep) {
2523                         pagep = be_alloc_pages(adapter->big_page_size, gfp);
2524                         if (unlikely(!pagep)) {
2525                                 rx_stats(rxo)->rx_post_fail++;
2526                                 break;
2527                         }
2528                         page_dmaaddr = dma_map_page(dev, pagep, 0,
2529                                                     adapter->big_page_size,
2530                                                     DMA_FROM_DEVICE);
2531                         if (dma_mapping_error(dev, page_dmaaddr)) {
2532                                 put_page(pagep);
2533                                 pagep = NULL;
2534                                 adapter->drv_stats.dma_map_errors++;
2535                                 break;
2536                         }
2537                         page_offset = 0;
2538                 } else {
2539                         get_page(pagep);
2540                         page_offset += rx_frag_size;
2541                 }
2542                 page_info->page_offset = page_offset;
2543                 page_info->page = pagep;
2544
2545                 rxd = queue_head_node(rxq);
2546                 frag_dmaaddr = page_dmaaddr + page_info->page_offset;
2547                 rxd->fragpa_lo = cpu_to_le32(frag_dmaaddr & 0xFFFFFFFF);
2548                 rxd->fragpa_hi = cpu_to_le32(upper_32_bits(frag_dmaaddr));
2549
2550                 /* Any space left in the current big page for another frag? */
2551                 if ((page_offset + rx_frag_size + rx_frag_size) >
2552                                         adapter->big_page_size) {
2553                         pagep = NULL;
2554                         page_info->last_frag = true;
2555                         dma_unmap_addr_set(page_info, bus, page_dmaaddr);
2556                 } else {
2557                         dma_unmap_addr_set(page_info, bus, frag_dmaaddr);
2558                 }
2559
2560                 prev_page_info = page_info;
2561                 queue_head_inc(rxq);
2562                 page_info = &rxo->page_info_tbl[rxq->head];
2563         }
2564
2565         /* Mark the last frag of a page when we break out of the above loop
2566          * with no more slots available in the RXQ
2567          */
2568         if (pagep) {
2569                 prev_page_info->last_frag = true;
2570                 dma_unmap_addr_set(prev_page_info, bus, page_dmaaddr);
2571         }
2572
2573         if (posted) {
2574                 atomic_add(posted, &rxq->used);
2575                 if (rxo->rx_post_starved)
2576                         rxo->rx_post_starved = false;
2577                 do {
2578                         notify = min(MAX_NUM_POST_ERX_DB, posted);
2579                         be_rxq_notify(adapter, rxq->id, notify);
2580                         posted -= notify;
2581                 } while (posted);
2582         } else if (atomic_read(&rxq->used) == 0) {
2583                 /* Let be_worker replenish when memory is available */
2584                 rxo->rx_post_starved = true;
2585         }
2586 }
2587
2588 static struct be_tx_compl_info *be_tx_compl_get(struct be_tx_obj *txo)
2589 {
2590         struct be_queue_info *tx_cq = &txo->cq;
2591         struct be_tx_compl_info *txcp = &txo->txcp;
2592         struct be_eth_tx_compl *compl = queue_tail_node(tx_cq);
2593
2594         if (compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] == 0)
2595                 return NULL;
2596
2597         /* Ensure load ordering of valid bit dword and other dwords below */
2598         rmb();
2599         be_dws_le_to_cpu(compl, sizeof(*compl));
2600
2601         txcp->status = GET_TX_COMPL_BITS(status, compl);
2602         txcp->end_index = GET_TX_COMPL_BITS(wrb_index, compl);
2603
2604         compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] = 0;
2605         queue_tail_inc(tx_cq);
2606         return txcp;
2607 }
2608
2609 static u16 be_tx_compl_process(struct be_adapter *adapter,
2610                                struct be_tx_obj *txo, u16 last_index)
2611 {
2612         struct sk_buff **sent_skbs = txo->sent_skb_list;
2613         struct be_queue_info *txq = &txo->q;
2614         struct sk_buff *skb = NULL;
2615         bool unmap_skb_hdr = false;
2616         struct be_eth_wrb *wrb;
2617         u16 num_wrbs = 0;
2618         u32 frag_index;
2619
2620         do {
2621                 if (sent_skbs[txq->tail]) {
2622                         /* Free skb from prev req */
2623                         if (skb)
2624                                 dev_consume_skb_any(skb);
2625                         skb = sent_skbs[txq->tail];
2626                         sent_skbs[txq->tail] = NULL;
2627                         queue_tail_inc(txq);  /* skip hdr wrb */
2628                         num_wrbs++;
2629                         unmap_skb_hdr = true;
2630                 }
2631                 wrb = queue_tail_node(txq);
2632                 frag_index = txq->tail;
2633                 unmap_tx_frag(&adapter->pdev->dev, wrb,
2634                               (unmap_skb_hdr && skb_headlen(skb)));
2635                 unmap_skb_hdr = false;
2636                 queue_tail_inc(txq);
2637                 num_wrbs++;
2638         } while (frag_index != last_index);
2639         dev_consume_skb_any(skb);
2640
2641         return num_wrbs;
2642 }
2643
2644 /* Return the number of events in the event queue */
2645 static inline int events_get(struct be_eq_obj *eqo)
2646 {
2647         struct be_eq_entry *eqe;
2648         int num = 0;
2649
2650         do {
2651                 eqe = queue_tail_node(&eqo->q);
2652                 if (eqe->evt == 0)
2653                         break;
2654
2655                 rmb();
2656                 eqe->evt = 0;
2657                 num++;
2658                 queue_tail_inc(&eqo->q);
2659         } while (true);
2660
2661         return num;
2662 }
2663
2664 /* Leaves the EQ is disarmed state */
2665 static void be_eq_clean(struct be_eq_obj *eqo)
2666 {
2667         int num = events_get(eqo);
2668
2669         be_eq_notify(eqo->adapter, eqo->q.id, false, true, num, 0);
2670 }
2671
2672 /* Free posted rx buffers that were not used */
2673 static void be_rxq_clean(struct be_rx_obj *rxo)
2674 {
2675         struct be_queue_info *rxq = &rxo->q;
2676         struct be_rx_page_info *page_info;
2677
2678         while (atomic_read(&rxq->used) > 0) {
2679                 page_info = get_rx_page_info(rxo);
2680                 put_page(page_info->page);
2681                 memset(page_info, 0, sizeof(*page_info));
2682         }
2683         BUG_ON(atomic_read(&rxq->used));
2684         rxq->tail = 0;
2685         rxq->head = 0;
2686 }
2687
2688 static void be_rx_cq_clean(struct be_rx_obj *rxo)
2689 {
2690         struct be_queue_info *rx_cq = &rxo->cq;
2691         struct be_rx_compl_info *rxcp;
2692         struct be_adapter *adapter = rxo->adapter;
2693         int flush_wait = 0;
2694
2695         /* Consume pending rx completions.
2696          * Wait for the flush completion (identified by zero num_rcvd)
2697          * to arrive. Notify CQ even when there are no more CQ entries
2698          * for HW to flush partially coalesced CQ entries.
2699          * In Lancer, there is no need to wait for flush compl.
2700          */
2701         for (;;) {
2702                 rxcp = be_rx_compl_get(rxo);
2703                 if (!rxcp) {
2704                         if (lancer_chip(adapter))
2705                                 break;
2706
2707                         if (flush_wait++ > 50 ||
2708                             be_check_error(adapter,
2709                                            BE_ERROR_HW)) {
2710                                 dev_warn(&adapter->pdev->dev,
2711                                          "did not receive flush compl\n");
2712                                 break;
2713                         }
2714                         be_cq_notify(adapter, rx_cq->id, true, 0);
2715                         mdelay(1);
2716                 } else {
2717                         be_rx_compl_discard(rxo, rxcp);
2718                         be_cq_notify(adapter, rx_cq->id, false, 1);
2719                         if (rxcp->num_rcvd == 0)
2720                                 break;
2721                 }
2722         }
2723
2724         /* After cleanup, leave the CQ in unarmed state */
2725         be_cq_notify(adapter, rx_cq->id, false, 0);
2726 }
2727
2728 static void be_tx_compl_clean(struct be_adapter *adapter)
2729 {
2730         struct device *dev = &adapter->pdev->dev;
2731         u16 cmpl = 0, timeo = 0, num_wrbs = 0;
2732         struct be_tx_compl_info *txcp;
2733         struct be_queue_info *txq;
2734         u32 end_idx, notified_idx;
2735         struct be_tx_obj *txo;
2736         int i, pending_txqs;
2737
2738         /* Stop polling for compls when HW has been silent for 10ms */
2739         do {
2740                 pending_txqs = adapter->num_tx_qs;
2741
2742                 for_all_tx_queues(adapter, txo, i) {
2743                         cmpl = 0;
2744                         num_wrbs = 0;
2745                         txq = &txo->q;
2746                         while ((txcp = be_tx_compl_get(txo))) {
2747                                 num_wrbs +=
2748                                         be_tx_compl_process(adapter, txo,
2749                                                             txcp->end_index);
2750                                 cmpl++;
2751                         }
2752                         if (cmpl) {
2753                                 be_cq_notify(adapter, txo->cq.id, false, cmpl);
2754                                 atomic_sub(num_wrbs, &txq->used);
2755                                 timeo = 0;
2756                         }
2757                         if (!be_is_tx_compl_pending(txo))
2758                                 pending_txqs--;
2759                 }
2760
2761                 if (pending_txqs == 0 || ++timeo > 10 ||
2762                     be_check_error(adapter, BE_ERROR_HW))
2763                         break;
2764
2765                 mdelay(1);
2766         } while (true);
2767
2768         /* Free enqueued TX that was never notified to HW */
2769         for_all_tx_queues(adapter, txo, i) {
2770                 txq = &txo->q;
2771
2772                 if (atomic_read(&txq->used)) {
2773                         dev_info(dev, "txq%d: cleaning %d pending tx-wrbs\n",
2774                                  i, atomic_read(&txq->used));
2775                         notified_idx = txq->tail;
2776                         end_idx = txq->tail;
2777                         index_adv(&end_idx, atomic_read(&txq->used) - 1,
2778                                   txq->len);
2779                         /* Use the tx-compl process logic to handle requests
2780                          * that were not sent to the HW.
2781                          */
2782                         num_wrbs = be_tx_compl_process(adapter, txo, end_idx);
2783                         atomic_sub(num_wrbs, &txq->used);
2784                         BUG_ON(atomic_read(&txq->used));
2785                         txo->pend_wrb_cnt = 0;
2786                         /* Since hw was never notified of these requests,
2787                          * reset TXQ indices
2788                          */
2789                         txq->head = notified_idx;
2790                         txq->tail = notified_idx;
2791                 }
2792         }
2793 }
2794
2795 static void be_evt_queues_destroy(struct be_adapter *adapter)
2796 {
2797         struct be_eq_obj *eqo;
2798         int i;
2799
2800         for_all_evt_queues(adapter, eqo, i) {
2801                 if (eqo->q.created) {
2802                         be_eq_clean(eqo);
2803                         be_cmd_q_destroy(adapter, &eqo->q, QTYPE_EQ);
2804                         netif_napi_del(&eqo->napi);
2805                         free_cpumask_var(eqo->affinity_mask);
2806                 }
2807                 be_queue_free(adapter, &eqo->q);
2808         }
2809 }
2810
2811 static int be_evt_queues_create(struct be_adapter *adapter)
2812 {
2813         struct be_queue_info *eq;
2814         struct be_eq_obj *eqo;
2815         struct be_aic_obj *aic;
2816         int i, rc;
2817
2818         /* need enough EQs to service both RX and TX queues */
2819         adapter->num_evt_qs = min_t(u16, num_irqs(adapter),
2820                                     max(adapter->cfg_num_rx_irqs,
2821                                         adapter->cfg_num_tx_irqs));
2822
2823         for_all_evt_queues(adapter, eqo, i) {
2824                 int numa_node = dev_to_node(&adapter->pdev->dev);
2825
2826                 aic = &adapter->aic_obj[i];
2827                 eqo->adapter = adapter;
2828                 eqo->idx = i;
2829                 aic->max_eqd = BE_MAX_EQD;
2830                 aic->enable = true;
2831
2832                 eq = &eqo->q;
2833                 rc = be_queue_alloc(adapter, eq, EVNT_Q_LEN,
2834                                     sizeof(struct be_eq_entry));
2835                 if (rc)
2836                         return rc;
2837
2838                 rc = be_cmd_eq_create(adapter, eqo);
2839                 if (rc)
2840                         return rc;
2841
2842                 if (!zalloc_cpumask_var(&eqo->affinity_mask, GFP_KERNEL))
2843                         return -ENOMEM;
2844                 cpumask_set_cpu(cpumask_local_spread(i, numa_node),
2845                                 eqo->affinity_mask);
2846                 netif_napi_add(adapter->netdev, &eqo->napi, be_poll,
2847                                BE_NAPI_WEIGHT);
2848         }
2849         return 0;
2850 }
2851
2852 static void be_mcc_queues_destroy(struct be_adapter *adapter)
2853 {
2854         struct be_queue_info *q;
2855
2856         q = &adapter->mcc_obj.q;
2857         if (q->created)
2858                 be_cmd_q_destroy(adapter, q, QTYPE_MCCQ);
2859         be_queue_free(adapter, q);
2860
2861         q = &adapter->mcc_obj.cq;
2862         if (q->created)
2863                 be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2864         be_queue_free(adapter, q);
2865 }
2866
2867 /* Must be called only after TX qs are created as MCC shares TX EQ */
2868 static int be_mcc_queues_create(struct be_adapter *adapter)
2869 {
2870         struct be_queue_info *q, *cq;
2871
2872         cq = &adapter->mcc_obj.cq;
2873         if (be_queue_alloc(adapter, cq, MCC_CQ_LEN,
2874                            sizeof(struct be_mcc_compl)))
2875                 goto err;
2876
2877         /* Use the default EQ for MCC completions */
2878         if (be_cmd_cq_create(adapter, cq, &mcc_eqo(adapter)->q, true, 0))
2879                 goto mcc_cq_free;
2880
2881         q = &adapter->mcc_obj.q;
2882         if (be_queue_alloc(adapter, q, MCC_Q_LEN, sizeof(struct be_mcc_wrb)))
2883                 goto mcc_cq_destroy;
2884
2885         if (be_cmd_mccq_create(adapter, q, cq))
2886                 goto mcc_q_free;
2887
2888         return 0;
2889
2890 mcc_q_free:
2891         be_queue_free(adapter, q);
2892 mcc_cq_destroy:
2893         be_cmd_q_destroy(adapter, cq, QTYPE_CQ);
2894 mcc_cq_free:
2895         be_queue_free(adapter, cq);
2896 err:
2897         return -1;
2898 }
2899
2900 static void be_tx_queues_destroy(struct be_adapter *adapter)
2901 {
2902         struct be_queue_info *q;
2903         struct be_tx_obj *txo;
2904         u8 i;
2905
2906         for_all_tx_queues(adapter, txo, i) {
2907                 q = &txo->q;
2908                 if (q->created)
2909                         be_cmd_q_destroy(adapter, q, QTYPE_TXQ);
2910                 be_queue_free(adapter, q);
2911
2912                 q = &txo->cq;
2913                 if (q->created)
2914                         be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2915                 be_queue_free(adapter, q);
2916         }
2917 }
2918
2919 static int be_tx_qs_create(struct be_adapter *adapter)
2920 {
2921         struct be_queue_info *cq;
2922         struct be_tx_obj *txo;
2923         struct be_eq_obj *eqo;
2924         int status, i;
2925
2926         adapter->num_tx_qs = min(adapter->num_evt_qs, adapter->cfg_num_tx_irqs);
2927
2928         for_all_tx_queues(adapter, txo, i) {
2929                 cq = &txo->cq;
2930                 status = be_queue_alloc(adapter, cq, TX_CQ_LEN,
2931                                         sizeof(struct be_eth_tx_compl));
2932                 if (status)
2933                         return status;
2934
2935                 u64_stats_init(&txo->stats.sync);
2936                 u64_stats_init(&txo->stats.sync_compl);
2937
2938                 /* If num_evt_qs is less than num_tx_qs, then more than
2939                  * one txq share an eq
2940                  */
2941                 eqo = &adapter->eq_obj[i % adapter->num_evt_qs];
2942                 status = be_cmd_cq_create(adapter, cq, &eqo->q, false, 3);
2943                 if (status)
2944                         return status;
2945
2946                 status = be_queue_alloc(adapter, &txo->q, TX_Q_LEN,
2947                                         sizeof(struct be_eth_wrb));
2948                 if (status)
2949                         return status;
2950
2951                 status = be_cmd_txq_create(adapter, txo);
2952                 if (status)
2953                         return status;
2954
2955                 netif_set_xps_queue(adapter->netdev, eqo->affinity_mask,
2956                                     eqo->idx);
2957         }
2958
2959         dev_info(&adapter->pdev->dev, "created %d TX queue(s)\n",
2960                  adapter->num_tx_qs);
2961         return 0;
2962 }
2963
2964 static void be_rx_cqs_destroy(struct be_adapter *adapter)
2965 {
2966         struct be_queue_info *q;
2967         struct be_rx_obj *rxo;
2968         int i;
2969
2970         for_all_rx_queues(adapter, rxo, i) {
2971                 q = &rxo->cq;
2972                 if (q->created)
2973                         be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2974                 be_queue_free(adapter, q);
2975         }
2976 }
2977
2978 static int be_rx_cqs_create(struct be_adapter *adapter)
2979 {
2980         struct be_queue_info *eq, *cq;
2981         struct be_rx_obj *rxo;
2982         int rc, i;
2983
2984         adapter->num_rss_qs =
2985                         min(adapter->num_evt_qs, adapter->cfg_num_rx_irqs);
2986
2987         /* We'll use RSS only if atleast 2 RSS rings are supported. */
2988         if (adapter->num_rss_qs < 2)
2989                 adapter->num_rss_qs = 0;
2990
2991         adapter->num_rx_qs = adapter->num_rss_qs + adapter->need_def_rxq;
2992
2993         /* When the interface is not capable of RSS rings (and there is no
2994          * need to create a default RXQ) we'll still need one RXQ
2995          */
2996         if (adapter->num_rx_qs == 0)
2997                 adapter->num_rx_qs = 1;
2998
2999         adapter->big_page_size = (1 << get_order(rx_frag_size)) * PAGE_SIZE;
3000         for_all_rx_queues(adapter, rxo, i) {
3001                 rxo->adapter = adapter;
3002                 cq = &rxo->cq;
3003                 rc = be_queue_alloc(adapter, cq, RX_CQ_LEN,
3004                                     sizeof(struct be_eth_rx_compl));
3005                 if (rc)
3006                         return rc;
3007
3008                 u64_stats_init(&rxo->stats.sync);
3009                 eq = &adapter->eq_obj[i % adapter->num_evt_qs].q;
3010                 rc = be_cmd_cq_create(adapter, cq, eq, false, 3);
3011                 if (rc)
3012                         return rc;
3013         }
3014
3015         dev_info(&adapter->pdev->dev,
3016                  "created %d RX queue(s)\n", adapter->num_rx_qs);
3017         return 0;
3018 }
3019
3020 static irqreturn_t be_intx(int irq, void *dev)
3021 {
3022         struct be_eq_obj *eqo = dev;
3023         struct be_adapter *adapter = eqo->adapter;
3024         int num_evts = 0;
3025
3026         /* IRQ is not expected when NAPI is scheduled as the EQ
3027          * will not be armed.
3028          * But, this can happen on Lancer INTx where it takes
3029          * a while to de-assert INTx or in BE2 where occasionaly
3030          * an interrupt may be raised even when EQ is unarmed.
3031          * If NAPI is already scheduled, then counting & notifying
3032          * events will orphan them.
3033          */
3034         if (napi_schedule_prep(&eqo->napi)) {
3035                 num_evts = events_get(eqo);
3036                 __napi_schedule(&eqo->napi);
3037                 if (num_evts)
3038                         eqo->spurious_intr = 0;
3039         }
3040         be_eq_notify(adapter, eqo->q.id, false, true, num_evts, 0);
3041
3042         /* Return IRQ_HANDLED only for the the first spurious intr
3043          * after a valid intr to stop the kernel from branding
3044          * this irq as a bad one!
3045          */
3046         if (num_evts || eqo->spurious_intr++ == 0)
3047                 return IRQ_HANDLED;
3048         else
3049                 return IRQ_NONE;
3050 }
3051
3052 static irqreturn_t be_msix(int irq, void *dev)
3053 {
3054         struct be_eq_obj *eqo = dev;
3055
3056         be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
3057         napi_schedule(&eqo->napi);
3058         return IRQ_HANDLED;
3059 }
3060
3061 static inline bool do_gro(struct be_rx_compl_info *rxcp)
3062 {
3063         return (rxcp->tcpf && !rxcp->err && rxcp->l4_csum) ? true : false;
3064 }
3065
3066 static int be_process_rx(struct be_rx_obj *rxo, struct napi_struct *napi,
3067                          int budget, int polling)
3068 {
3069         struct be_adapter *adapter = rxo->adapter;
3070         struct be_queue_info *rx_cq = &rxo->cq;
3071         struct be_rx_compl_info *rxcp;
3072         u32 work_done;
3073         u32 frags_consumed = 0;
3074
3075         for (work_done = 0; work_done < budget; work_done++) {
3076                 rxcp = be_rx_compl_get(rxo);
3077                 if (!rxcp)
3078                         break;
3079
3080                 /* Is it a flush compl that has no data */
3081                 if (unlikely(rxcp->num_rcvd == 0))
3082                         goto loop_continue;
3083
3084                 /* Discard compl with partial DMA Lancer B0 */
3085                 if (unlikely(!rxcp->pkt_size)) {
3086                         be_rx_compl_discard(rxo, rxcp);
3087                         goto loop_continue;
3088                 }
3089
3090                 /* On BE drop pkts that arrive due to imperfect filtering in
3091                  * promiscuous mode on some skews
3092                  */
3093                 if (unlikely(rxcp->port != adapter->port_num &&
3094                              !lancer_chip(adapter))) {
3095                         be_rx_compl_discard(rxo, rxcp);
3096                         goto loop_continue;
3097                 }
3098
3099                 /* Don't do gro when we're busy_polling */
3100                 if (do_gro(rxcp) && polling != BUSY_POLLING)
3101                         be_rx_compl_process_gro(rxo, napi, rxcp);
3102                 else
3103                         be_rx_compl_process(rxo, napi, rxcp);
3104
3105 loop_continue:
3106                 frags_consumed += rxcp->num_rcvd;
3107                 be_rx_stats_update(rxo, rxcp);
3108         }
3109
3110         if (work_done) {
3111                 be_cq_notify(adapter, rx_cq->id, true, work_done);
3112
3113                 /* When an rx-obj gets into post_starved state, just
3114                  * let be_worker do the posting.
3115                  */
3116                 if (atomic_read(&rxo->q.used) < RX_FRAGS_REFILL_WM &&
3117                     !rxo->rx_post_starved)
3118                         be_post_rx_frags(rxo, GFP_ATOMIC,
3119                                          max_t(u32, MAX_RX_POST,
3120                                                frags_consumed));
3121         }
3122
3123         return work_done;
3124 }
3125
3126 static inline void be_update_tx_err(struct be_tx_obj *txo, u8 status)
3127 {
3128         switch (status) {
3129         case BE_TX_COMP_HDR_PARSE_ERR:
3130                 tx_stats(txo)->tx_hdr_parse_err++;
3131                 break;
3132         case BE_TX_COMP_NDMA_ERR:
3133                 tx_stats(txo)->tx_dma_err++;
3134                 break;
3135         case BE_TX_COMP_ACL_ERR:
3136                 tx_stats(txo)->tx_spoof_check_err++;
3137                 break;
3138         }
3139 }
3140
3141 static inline void lancer_update_tx_err(struct be_tx_obj *txo, u8 status)
3142 {
3143         switch (status) {
3144         case LANCER_TX_COMP_LSO_ERR:
3145                 tx_stats(txo)->tx_tso_err++;
3146                 break;
3147         case LANCER_TX_COMP_HSW_DROP_MAC_ERR:
3148         case LANCER_TX_COMP_HSW_DROP_VLAN_ERR:
3149                 tx_stats(txo)->tx_spoof_check_err++;
3150                 break;
3151         case LANCER_TX_COMP_QINQ_ERR:
3152                 tx_stats(txo)->tx_qinq_err++;
3153                 break;
3154         case LANCER_TX_COMP_PARITY_ERR:
3155                 tx_stats(txo)->tx_internal_parity_err++;
3156                 break;
3157         case LANCER_TX_COMP_DMA_ERR:
3158                 tx_stats(txo)->tx_dma_err++;
3159                 break;
3160         }
3161 }
3162
3163 static void be_process_tx(struct be_adapter *adapter, struct be_tx_obj *txo,
3164                           int idx)
3165 {
3166         int num_wrbs = 0, work_done = 0;
3167         struct be_tx_compl_info *txcp;
3168
3169         while ((txcp = be_tx_compl_get(txo))) {
3170                 num_wrbs += be_tx_compl_process(adapter, txo, txcp->end_index);
3171                 work_done++;
3172
3173                 if (txcp->status) {
3174                         if (lancer_chip(adapter))
3175                                 lancer_update_tx_err(txo, txcp->status);
3176                         else
3177                                 be_update_tx_err(txo, txcp->status);
3178                 }
3179         }
3180
3181         if (work_done) {
3182                 be_cq_notify(adapter, txo->cq.id, true, work_done);
3183                 atomic_sub(num_wrbs, &txo->q.used);
3184
3185                 /* As Tx wrbs have been freed up, wake up netdev queue
3186                  * if it was stopped due to lack of tx wrbs.  */
3187                 if (__netif_subqueue_stopped(adapter->netdev, idx) &&
3188                     be_can_txq_wake(txo)) {
3189                         netif_wake_subqueue(adapter->netdev, idx);
3190                 }
3191
3192                 u64_stats_update_begin(&tx_stats(txo)->sync_compl);
3193                 tx_stats(txo)->tx_compl += work_done;
3194                 u64_stats_update_end(&tx_stats(txo)->sync_compl);
3195         }
3196 }
3197
3198 #ifdef CONFIG_NET_RX_BUSY_POLL
3199 static inline bool be_lock_napi(struct be_eq_obj *eqo)
3200 {
3201         bool status = true;
3202
3203         spin_lock(&eqo->lock); /* BH is already disabled */
3204         if (eqo->state & BE_EQ_LOCKED) {
3205                 WARN_ON(eqo->state & BE_EQ_NAPI);
3206                 eqo->state |= BE_EQ_NAPI_YIELD;
3207                 status = false;
3208         } else {
3209                 eqo->state = BE_EQ_NAPI;
3210         }
3211         spin_unlock(&eqo->lock);
3212         return status;
3213 }
3214
3215 static inline void be_unlock_napi(struct be_eq_obj *eqo)
3216 {
3217         spin_lock(&eqo->lock); /* BH is already disabled */
3218
3219         WARN_ON(eqo->state & (BE_EQ_POLL | BE_EQ_NAPI_YIELD));
3220         eqo->state = BE_EQ_IDLE;
3221
3222         spin_unlock(&eqo->lock);
3223 }
3224
3225 static inline bool be_lock_busy_poll(struct be_eq_obj *eqo)
3226 {
3227         bool status = true;
3228
3229         spin_lock_bh(&eqo->lock);
3230         if (eqo->state & BE_EQ_LOCKED) {
3231                 eqo->state |= BE_EQ_POLL_YIELD;
3232                 status = false;
3233         } else {
3234                 eqo->state |= BE_EQ_POLL;
3235         }
3236         spin_unlock_bh(&eqo->lock);
3237         return status;
3238 }
3239
3240 static inline void be_unlock_busy_poll(struct be_eq_obj *eqo)
3241 {
3242         spin_lock_bh(&eqo->lock);
3243
3244         WARN_ON(eqo->state & (BE_EQ_NAPI));
3245         eqo->state = BE_EQ_IDLE;
3246
3247         spin_unlock_bh(&eqo->lock);
3248 }
3249
3250 static inline void be_enable_busy_poll(struct be_eq_obj *eqo)
3251 {
3252         spin_lock_init(&eqo->lock);
3253         eqo->state = BE_EQ_IDLE;
3254 }
3255
3256 static inline void be_disable_busy_poll(struct be_eq_obj *eqo)
3257 {
3258         local_bh_disable();
3259
3260         /* It's enough to just acquire napi lock on the eqo to stop
3261          * be_busy_poll() from processing any queueus.
3262          */
3263         while (!be_lock_napi(eqo))
3264                 mdelay(1);
3265
3266         local_bh_enable();
3267 }
3268
3269 #else /* CONFIG_NET_RX_BUSY_POLL */
3270
3271 static inline bool be_lock_napi(struct be_eq_obj *eqo)
3272 {
3273         return true;
3274 }
3275
3276 static inline void be_unlock_napi(struct be_eq_obj *eqo)
3277 {
3278 }
3279
3280 static inline bool be_lock_busy_poll(struct be_eq_obj *eqo)
3281 {
3282         return false;
3283 }
3284
3285 static inline void be_unlock_busy_poll(struct be_eq_obj *eqo)
3286 {
3287 }
3288
3289 static inline void be_enable_busy_poll(struct be_eq_obj *eqo)
3290 {
3291 }
3292
3293 static inline void be_disable_busy_poll(struct be_eq_obj *eqo)
3294 {
3295 }
3296 #endif /* CONFIG_NET_RX_BUSY_POLL */
3297
3298 int be_poll(struct napi_struct *napi, int budget)
3299 {
3300         struct be_eq_obj *eqo = container_of(napi, struct be_eq_obj, napi);
3301         struct be_adapter *adapter = eqo->adapter;
3302         int max_work = 0, work, i, num_evts;
3303         struct be_rx_obj *rxo;
3304         struct be_tx_obj *txo;
3305         u32 mult_enc = 0;
3306
3307         num_evts = events_get(eqo);
3308
3309         for_all_tx_queues_on_eq(adapter, eqo, txo, i)
3310                 be_process_tx(adapter, txo, i);
3311
3312         if (be_lock_napi(eqo)) {
3313                 /* This loop will iterate twice for EQ0 in which
3314                  * completions of the last RXQ (default one) are also processed
3315                  * For other EQs the loop iterates only once
3316                  */
3317                 for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
3318                         work = be_process_rx(rxo, napi, budget, NAPI_POLLING);
3319                         max_work = max(work, max_work);
3320                 }
3321                 be_unlock_napi(eqo);
3322         } else {
3323                 max_work = budget;
3324         }
3325
3326         if (is_mcc_eqo(eqo))
3327                 be_process_mcc(adapter);
3328
3329         if (max_work < budget) {
3330                 napi_complete(napi);
3331
3332                 /* Skyhawk EQ_DB has a provision to set the rearm to interrupt
3333                  * delay via a delay multiplier encoding value
3334                  */
3335                 if (skyhawk_chip(adapter))
3336                         mult_enc = be_get_eq_delay_mult_enc(eqo);
3337
3338                 be_eq_notify(adapter, eqo->q.id, true, false, num_evts,
3339                              mult_enc);
3340         } else {
3341                 /* As we'll continue in polling mode, count and clear events */
3342                 be_eq_notify(adapter, eqo->q.id, false, false, num_evts, 0);
3343         }
3344         return max_work;
3345 }
3346
3347 #ifdef CONFIG_NET_RX_BUSY_POLL
3348 static int be_busy_poll(struct napi_struct *napi)
3349 {
3350         struct be_eq_obj *eqo = container_of(napi, struct be_eq_obj, napi);
3351         struct be_adapter *adapter = eqo->adapter;
3352         struct be_rx_obj *rxo;
3353         int i, work = 0;
3354
3355         if (!be_lock_busy_poll(eqo))
3356                 return LL_FLUSH_BUSY;
3357
3358         for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
3359                 work = be_process_rx(rxo, napi, 4, BUSY_POLLING);
3360                 if (work)
3361                         break;
3362         }
3363
3364         be_unlock_busy_poll(eqo);
3365         return work;
3366 }
3367 #endif
3368
3369 void be_detect_error(struct be_adapter *adapter)
3370 {
3371         u32 ue_lo = 0, ue_hi = 0, ue_lo_mask = 0, ue_hi_mask = 0;
3372         u32 sliport_status = 0, sliport_err1 = 0, sliport_err2 = 0;
3373         u32 i;
3374         struct device *dev = &adapter->pdev->dev;
3375
3376         if (be_check_error(adapter, BE_ERROR_HW))
3377                 return;
3378
3379         if (lancer_chip(adapter)) {
3380                 sliport_status = ioread32(adapter->db + SLIPORT_STATUS_OFFSET);
3381                 if (sliport_status & SLIPORT_STATUS_ERR_MASK) {
3382                         be_set_error(adapter, BE_ERROR_UE);
3383                         sliport_err1 = ioread32(adapter->db +
3384                                                 SLIPORT_ERROR1_OFFSET);
3385                         sliport_err2 = ioread32(adapter->db +
3386                                                 SLIPORT_ERROR2_OFFSET);
3387                         /* Do not log error messages if its a FW reset */
3388                         if (sliport_err1 == SLIPORT_ERROR_FW_RESET1 &&
3389                             sliport_err2 == SLIPORT_ERROR_FW_RESET2) {
3390                                 dev_info(dev, "Firmware update in progress\n");
3391                         } else {
3392                                 dev_err(dev, "Error detected in the card\n");
3393                                 dev_err(dev, "ERR: sliport status 0x%x\n",
3394                                         sliport_status);
3395                                 dev_err(dev, "ERR: sliport error1 0x%x\n",
3396                                         sliport_err1);
3397                                 dev_err(dev, "ERR: sliport error2 0x%x\n",
3398                                         sliport_err2);
3399                         }
3400                 }
3401         } else {
3402                 ue_lo = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_LOW);
3403                 ue_hi = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_HIGH);
3404                 ue_lo_mask = ioread32(adapter->pcicfg +
3405                                       PCICFG_UE_STATUS_LOW_MASK);
3406                 ue_hi_mask = ioread32(adapter->pcicfg +
3407                                       PCICFG_UE_STATUS_HI_MASK);
3408
3409                 ue_lo = (ue_lo & ~ue_lo_mask);
3410                 ue_hi = (ue_hi & ~ue_hi_mask);
3411
3412                 /* On certain platforms BE hardware can indicate spurious UEs.
3413                  * Allow HW to stop working completely in case of a real UE.
3414                  * Hence not setting the hw_error for UE detection.
3415                  */
3416
3417                 if (ue_lo || ue_hi) {
3418                         dev_err(dev, "Error detected in the adapter");
3419                         if (skyhawk_chip(adapter))
3420                                 be_set_error(adapter, BE_ERROR_UE);
3421
3422                         for (i = 0; ue_lo; ue_lo >>= 1, i++) {
3423                                 if (ue_lo & 1)
3424                                         dev_err(dev, "UE: %s bit set\n",
3425                                                 ue_status_low_desc[i]);
3426                         }
3427                         for (i = 0; ue_hi; ue_hi >>= 1, i++) {
3428                                 if (ue_hi & 1)
3429                                         dev_err(dev, "UE: %s bit set\n",
3430                                                 ue_status_hi_desc[i]);
3431                         }
3432                 }
3433         }
3434 }
3435
3436 static void be_msix_disable(struct be_adapter *adapter)
3437 {
3438         if (msix_enabled(adapter)) {
3439                 pci_disable_msix(adapter->pdev);
3440                 adapter->num_msix_vec = 0;
3441                 adapter->num_msix_roce_vec = 0;
3442         }
3443 }
3444
3445 static int be_msix_enable(struct be_adapter *adapter)
3446 {
3447         unsigned int i, max_roce_eqs;
3448         struct device *dev = &adapter->pdev->dev;
3449         int num_vec;
3450
3451         /* If RoCE is supported, program the max number of vectors that
3452          * could be used for NIC and RoCE, else, just program the number
3453          * we'll use initially.
3454          */
3455         if (be_roce_supported(adapter)) {
3456                 max_roce_eqs =
3457                         be_max_func_eqs(adapter) - be_max_nic_eqs(adapter);
3458                 max_roce_eqs = min(max_roce_eqs, num_online_cpus());
3459                 num_vec = be_max_any_irqs(adapter) + max_roce_eqs;
3460         } else {
3461                 num_vec = max(adapter->cfg_num_rx_irqs,
3462                               adapter->cfg_num_tx_irqs);
3463         }
3464
3465         for (i = 0; i < num_vec; i++)
3466                 adapter->msix_entries[i].entry = i;
3467
3468         num_vec = pci_enable_msix_range(adapter->pdev, adapter->msix_entries,
3469                                         MIN_MSIX_VECTORS, num_vec);
3470         if (num_vec < 0)
3471                 goto fail;
3472
3473         if (be_roce_supported(adapter) && num_vec > MIN_MSIX_VECTORS) {
3474                 adapter->num_msix_roce_vec = num_vec / 2;
3475                 dev_info(dev, "enabled %d MSI-x vector(s) for RoCE\n",
3476                          adapter->num_msix_roce_vec);
3477         }
3478
3479         adapter->num_msix_vec = num_vec - adapter->num_msix_roce_vec;
3480
3481         dev_info(dev, "enabled %d MSI-x vector(s) for NIC\n",
3482                  adapter->num_msix_vec);
3483         return 0;
3484
3485 fail:
3486         dev_warn(dev, "MSIx enable failed\n");
3487
3488         /* INTx is not supported in VFs, so fail probe if enable_msix fails */
3489         if (be_virtfn(adapter))
3490                 return num_vec;
3491         return 0;
3492 }
3493
3494 static inline int be_msix_vec_get(struct be_adapter *adapter,
3495                                   struct be_eq_obj *eqo)
3496 {
3497         return adapter->msix_entries[eqo->msix_idx].vector;
3498 }
3499
3500 static int be_msix_register(struct be_adapter *adapter)
3501 {
3502         struct net_device *netdev = adapter->netdev;
3503         struct be_eq_obj *eqo;
3504         int status, i, vec;
3505
3506         for_all_evt_queues(adapter, eqo, i) {
3507                 sprintf(eqo->desc, "%s-q%d", netdev->name, i);
3508                 vec = be_msix_vec_get(adapter, eqo);
3509                 status = request_irq(vec, be_msix, 0, eqo->desc, eqo);
3510                 if (status)
3511                         goto err_msix;
3512
3513                 irq_set_affinity_hint(vec, eqo->affinity_mask);
3514         }
3515
3516         return 0;
3517 err_msix:
3518         for (i--; i >= 0; i--) {
3519                 eqo = &adapter->eq_obj[i];
3520                 free_irq(be_msix_vec_get(adapter, eqo), eqo);
3521         }
3522         dev_warn(&adapter->pdev->dev, "MSIX Request IRQ failed - err %d\n",
3523                  status);
3524         be_msix_disable(adapter);
3525         return status;
3526 }
3527
3528 static int be_irq_register(struct be_adapter *adapter)
3529 {
3530         struct net_device *netdev = adapter->netdev;
3531         int status;
3532
3533         if (msix_enabled(adapter)) {
3534                 status = be_msix_register(adapter);
3535                 if (status == 0)
3536                         goto done;
3537                 /* INTx is not supported for VF */
3538                 if (be_virtfn(adapter))
3539                         return status;
3540         }
3541
3542         /* INTx: only the first EQ is used */
3543         netdev->irq = adapter->pdev->irq;
3544         status = request_irq(netdev->irq, be_intx, IRQF_SHARED, netdev->name,
3545                              &adapter->eq_obj[0]);
3546         if (status) {
3547                 dev_err(&adapter->pdev->dev,
3548                         "INTx request IRQ failed - err %d\n", status);
3549                 return status;
3550         }
3551 done:
3552         adapter->isr_registered = true;
3553         return 0;
3554 }
3555
3556 static void be_irq_unregister(struct be_adapter *adapter)
3557 {
3558         struct net_device *netdev = adapter->netdev;
3559         struct be_eq_obj *eqo;
3560         int i, vec;
3561
3562         if (!adapter->isr_registered)
3563                 return;
3564
3565         /* INTx */
3566         if (!msix_enabled(adapter)) {
3567                 free_irq(netdev->irq, &adapter->eq_obj[0]);
3568                 goto done;
3569         }
3570
3571         /* MSIx */
3572         for_all_evt_queues(adapter, eqo, i) {
3573                 vec = be_msix_vec_get(adapter, eqo);
3574                 irq_set_affinity_hint(vec, NULL);
3575                 free_irq(vec, eqo);
3576         }
3577
3578 done:
3579         adapter->isr_registered = false;
3580 }
3581
3582 static void be_rx_qs_destroy(struct be_adapter *adapter)
3583 {
3584         struct rss_info *rss = &adapter->rss_info;
3585         struct be_queue_info *q;
3586         struct be_rx_obj *rxo;
3587         int i;
3588
3589         for_all_rx_queues(adapter, rxo, i) {
3590                 q = &rxo->q;
3591                 if (q->created) {
3592                         /* If RXQs are destroyed while in an "out of buffer"
3593                          * state, there is a possibility of an HW stall on
3594                          * Lancer. So, post 64 buffers to each queue to relieve
3595                          * the "out of buffer" condition.
3596                          * Make sure there's space in the RXQ before posting.
3597                          */
3598                         if (lancer_chip(adapter)) {
3599                                 be_rx_cq_clean(rxo);
3600                                 if (atomic_read(&q->used) == 0)
3601                                         be_post_rx_frags(rxo, GFP_KERNEL,
3602                                                          MAX_RX_POST);
3603                         }
3604
3605                         be_cmd_rxq_destroy(adapter, q);
3606                         be_rx_cq_clean(rxo);
3607                         be_rxq_clean(rxo);
3608                 }
3609                 be_queue_free(adapter, q);
3610         }
3611
3612         if (rss->rss_flags) {
3613                 rss->rss_flags = RSS_ENABLE_NONE;
3614                 be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3615                                   128, rss->rss_hkey);
3616         }
3617 }
3618
3619 static void be_disable_if_filters(struct be_adapter *adapter)
3620 {
3621         /* Don't delete MAC on BE3 VFs without FILTMGMT privilege  */
3622         if (!BEx_chip(adapter) || !be_virtfn(adapter) ||
3623             check_privilege(adapter, BE_PRIV_FILTMGMT)) {
3624                 be_dev_mac_del(adapter, adapter->pmac_id[0]);
3625                 eth_zero_addr(adapter->dev_mac);
3626         }
3627
3628         be_clear_uc_list(adapter);
3629         be_clear_mc_list(adapter);
3630
3631         /* The IFACE flags are enabled in the open path and cleared
3632          * in the close path. When a VF gets detached from the host and
3633          * assigned to a VM the following happens:
3634          *      - VF's IFACE flags get cleared in the detach path
3635          *      - IFACE create is issued by the VF in the attach path
3636          * Due to a bug in the BE3/Skyhawk-R FW
3637          * (Lancer FW doesn't have the bug), the IFACE capability flags
3638          * specified along with the IFACE create cmd issued by a VF are not
3639          * honoured by FW.  As a consequence, if a *new* driver
3640          * (that enables/disables IFACE flags in open/close)
3641          * is loaded in the host and an *old* driver is * used by a VM/VF,
3642          * the IFACE gets created *without* the needed flags.
3643          * To avoid this, disable RX-filter flags only for Lancer.
3644          */
3645         if (lancer_chip(adapter)) {
3646                 be_cmd_rx_filter(adapter, BE_IF_ALL_FILT_FLAGS, OFF);
3647                 adapter->if_flags &= ~BE_IF_ALL_FILT_FLAGS;
3648         }
3649 }
3650
3651 static int be_close(struct net_device *netdev)
3652 {
3653         struct be_adapter *adapter = netdev_priv(netdev);
3654         struct be_eq_obj *eqo;
3655         int i;
3656
3657         /* This protection is needed as be_close() may be called even when the
3658          * adapter is in cleared state (after eeh perm failure)
3659          */
3660         if (!(adapter->flags & BE_FLAGS_SETUP_DONE))
3661                 return 0;
3662
3663         /* Before attempting cleanup ensure all the pending cmds in the
3664          * config_wq have finished execution
3665          */
3666         flush_workqueue(be_wq);
3667
3668         be_disable_if_filters(adapter);
3669
3670         if (adapter->flags & BE_FLAGS_NAPI_ENABLED) {
3671                 for_all_evt_queues(adapter, eqo, i) {
3672                         napi_disable(&eqo->napi);
3673                         be_disable_busy_poll(eqo);
3674                 }
3675                 adapter->flags &= ~BE_FLAGS_NAPI_ENABLED;
3676         }
3677
3678         be_async_mcc_disable(adapter);
3679
3680         /* Wait for all pending tx completions to arrive so that
3681          * all tx skbs are freed.
3682          */
3683         netif_tx_disable(netdev);
3684         be_tx_compl_clean(adapter);
3685
3686         be_rx_qs_destroy(adapter);
3687
3688         for_all_evt_queues(adapter, eqo, i) {
3689                 if (msix_enabled(adapter))
3690                         synchronize_irq(be_msix_vec_get(adapter, eqo));
3691                 else
3692                         synchronize_irq(netdev->irq);
3693                 be_eq_clean(eqo);
3694         }
3695
3696         be_irq_unregister(adapter);
3697
3698         return 0;
3699 }
3700
3701 static int be_rx_qs_create(struct be_adapter *adapter)
3702 {
3703         struct rss_info *rss = &adapter->rss_info;
3704         u8 rss_key[RSS_HASH_KEY_LEN];
3705         struct be_rx_obj *rxo;
3706         int rc, i, j;
3707
3708         for_all_rx_queues(adapter, rxo, i) {
3709                 rc = be_queue_alloc(adapter, &rxo->q, RX_Q_LEN,
3710                                     sizeof(struct be_eth_rx_d));
3711                 if (rc)
3712                         return rc;
3713         }
3714
3715         if (adapter->need_def_rxq || !adapter->num_rss_qs) {
3716                 rxo = default_rxo(adapter);
3717                 rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3718                                        rx_frag_size, adapter->if_handle,
3719                                        false, &rxo->rss_id);
3720                 if (rc)
3721                         return rc;
3722         }
3723
3724         for_all_rss_queues(adapter, rxo, i) {
3725                 rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3726                                        rx_frag_size, adapter->if_handle,
3727                                        true, &rxo->rss_id);
3728                 if (rc)
3729                         return rc;
3730         }
3731
3732         if (be_multi_rxq(adapter)) {
3733                 for (j = 0; j < RSS_INDIR_TABLE_LEN; j += adapter->num_rss_qs) {
3734                         for_all_rss_queues(adapter, rxo, i) {
3735                                 if ((j + i) >= RSS_INDIR_TABLE_LEN)
3736                                         break;
3737                                 rss->rsstable[j + i] = rxo->rss_id;
3738                                 rss->rss_queue[j + i] = i;
3739                         }
3740                 }
3741                 rss->rss_flags = RSS_ENABLE_TCP_IPV4 | RSS_ENABLE_IPV4 |
3742                         RSS_ENABLE_TCP_IPV6 | RSS_ENABLE_IPV6;
3743
3744                 if (!BEx_chip(adapter))
3745                         rss->rss_flags |= RSS_ENABLE_UDP_IPV4 |
3746                                 RSS_ENABLE_UDP_IPV6;
3747
3748                 netdev_rss_key_fill(rss_key, RSS_HASH_KEY_LEN);
3749                 rc = be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3750                                        RSS_INDIR_TABLE_LEN, rss_key);
3751                 if (rc) {
3752                         rss->rss_flags = RSS_ENABLE_NONE;
3753                         return rc;
3754                 }
3755
3756                 memcpy(rss->rss_hkey, rss_key, RSS_HASH_KEY_LEN);
3757         } else {
3758                 /* Disable RSS, if only default RX Q is created */
3759                 rss->rss_flags = RSS_ENABLE_NONE;
3760         }
3761
3762
3763         /* Post 1 less than RXQ-len to avoid head being equal to tail,
3764          * which is a queue empty condition
3765          */
3766         for_all_rx_queues(adapter, rxo, i)
3767                 be_post_rx_frags(rxo, GFP_KERNEL, RX_Q_LEN - 1);
3768
3769         return 0;
3770 }
3771
3772 static int be_enable_if_filters(struct be_adapter *adapter)
3773 {
3774         int status;
3775
3776         status = be_cmd_rx_filter(adapter, BE_IF_FILT_FLAGS_BASIC, ON);
3777         if (status)
3778                 return status;
3779
3780         /* Normally this condition usually true as the ->dev_mac is zeroed.
3781          * But on BE3 VFs the initial MAC is pre-programmed by PF and
3782          * subsequent be_dev_mac_add() can fail (after fresh boot)
3783          */
3784         if (!ether_addr_equal(adapter->dev_mac, adapter->netdev->dev_addr)) {
3785                 int old_pmac_id = -1;
3786
3787                 /* Remember old programmed MAC if any - can happen on BE3 VF */
3788                 if (!is_zero_ether_addr(adapter->dev_mac))
3789                         old_pmac_id = adapter->pmac_id[0];
3790
3791                 status = be_dev_mac_add(adapter, adapter->netdev->dev_addr);
3792                 if (status)
3793                         return status;
3794
3795                 /* Delete the old programmed MAC as we successfully programmed
3796                  * a new MAC
3797                  */
3798                 if (old_pmac_id >= 0 && old_pmac_id != adapter->pmac_id[0])
3799                         be_dev_mac_del(adapter, old_pmac_id);
3800
3801                 ether_addr_copy(adapter->dev_mac, adapter->netdev->dev_addr);
3802         }
3803
3804         if (adapter->vlans_added)
3805                 be_vid_config(adapter);
3806
3807         __be_set_rx_mode(adapter);
3808
3809         return 0;
3810 }
3811
3812 static int be_open(struct net_device *netdev)
3813 {
3814         struct be_adapter *adapter = netdev_priv(netdev);
3815         struct be_eq_obj *eqo;
3816         struct be_rx_obj *rxo;
3817         struct be_tx_obj *txo;
3818         u8 link_status;
3819         int status, i;
3820
3821         status = be_rx_qs_create(adapter);
3822         if (status)
3823                 goto err;
3824
3825         status = be_enable_if_filters(adapter);
3826         if (status)
3827                 goto err;
3828
3829         status = be_irq_register(adapter);
3830         if (status)
3831                 goto err;
3832
3833         for_all_rx_queues(adapter, rxo, i)
3834                 be_cq_notify(adapter, rxo->cq.id, true, 0);
3835
3836         for_all_tx_queues(adapter, txo, i)
3837                 be_cq_notify(adapter, txo->cq.id, true, 0);
3838
3839         be_async_mcc_enable(adapter);
3840
3841         for_all_evt_queues(adapter, eqo, i) {
3842                 napi_enable(&eqo->napi);
3843                 be_enable_busy_poll(eqo);
3844                 be_eq_notify(adapter, eqo->q.id, true, true, 0, 0);
3845         }
3846         adapter->flags |= BE_FLAGS_NAPI_ENABLED;
3847
3848         status = be_cmd_link_status_query(adapter, NULL, &link_status, 0);
3849         if (!status)
3850                 be_link_status_update(adapter, link_status);
3851
3852         netif_tx_start_all_queues(netdev);
3853         if (skyhawk_chip(adapter))
3854                 udp_tunnel_get_rx_info(netdev);
3855
3856         return 0;
3857 err:
3858         be_close(adapter->netdev);
3859         return -EIO;
3860 }
3861
3862 static void be_vf_eth_addr_generate(struct be_adapter *adapter, u8 *mac)
3863 {
3864         u32 addr;
3865
3866         addr = jhash(adapter->netdev->dev_addr, ETH_ALEN, 0);
3867
3868         mac[5] = (u8)(addr & 0xFF);
3869         mac[4] = (u8)((addr >> 8) & 0xFF);
3870         mac[3] = (u8)((addr >> 16) & 0xFF);
3871         /* Use the OUI from the current MAC address */
3872         memcpy(mac, adapter->netdev->dev_addr, 3);
3873 }
3874
3875 /*
3876  * Generate a seed MAC address from the PF MAC Address using jhash.
3877  * MAC Address for VFs are assigned incrementally starting from the seed.
3878  * These addresses are programmed in the ASIC by the PF and the VF driver
3879  * queries for the MAC address during its probe.
3880  */
3881 static int be_vf_eth_addr_config(struct be_adapter *adapter)
3882 {
3883         u32 vf;
3884         int status = 0;
3885         u8 mac[ETH_ALEN];
3886         struct be_vf_cfg *vf_cfg;
3887
3888         be_vf_eth_addr_generate(adapter, mac);
3889
3890         for_all_vfs(adapter, vf_cfg, vf) {
3891                 if (BEx_chip(adapter))
3892                         status = be_cmd_pmac_add(adapter, mac,
3893                                                  vf_cfg->if_handle,
3894                                                  &vf_cfg->pmac_id, vf + 1);
3895                 else
3896                         status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
3897                                                 vf + 1);
3898
3899                 if (status)
3900                         dev_err(&adapter->pdev->dev,
3901                                 "Mac address assignment failed for VF %d\n",
3902                                 vf);
3903                 else
3904                         memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3905
3906                 mac[5] += 1;
3907         }
3908         return status;
3909 }
3910
3911 static int be_vfs_mac_query(struct be_adapter *adapter)
3912 {
3913         int status, vf;
3914         u8 mac[ETH_ALEN];
3915         struct be_vf_cfg *vf_cfg;
3916
3917         for_all_vfs(adapter, vf_cfg, vf) {
3918                 status = be_cmd_get_active_mac(adapter, vf_cfg->pmac_id,
3919                                                mac, vf_cfg->if_handle,
3920                                                false, vf+1);
3921                 if (status)
3922                         return status;
3923                 memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3924         }
3925         return 0;
3926 }
3927
3928 static void be_vf_clear(struct be_adapter *adapter)
3929 {
3930         struct be_vf_cfg *vf_cfg;
3931         u32 vf;
3932
3933         if (pci_vfs_assigned(adapter->pdev)) {
3934                 dev_warn(&adapter->pdev->dev,
3935                          "VFs are assigned to VMs: not disabling VFs\n");
3936                 goto done;
3937         }
3938
3939         pci_disable_sriov(adapter->pdev);
3940
3941         for_all_vfs(adapter, vf_cfg, vf) {
3942                 if (BEx_chip(adapter))
3943                         be_cmd_pmac_del(adapter, vf_cfg->if_handle,
3944                                         vf_cfg->pmac_id, vf + 1);
3945                 else
3946                         be_cmd_set_mac(adapter, NULL, vf_cfg->if_handle,
3947                                        vf + 1);
3948
3949                 be_cmd_if_destroy(adapter, vf_cfg->if_handle, vf + 1);
3950         }
3951
3952         if (BE3_chip(adapter))
3953                 be_cmd_set_hsw_config(adapter, 0, 0,
3954                                       adapter->if_handle,
3955                                       PORT_FWD_TYPE_PASSTHRU, 0);
3956 done:
3957         kfree(adapter->vf_cfg);
3958         adapter->num_vfs = 0;
3959         adapter->flags &= ~BE_FLAGS_SRIOV_ENABLED;
3960 }
3961
3962 static void be_clear_queues(struct be_adapter *adapter)
3963 {
3964         be_mcc_queues_destroy(adapter);
3965         be_rx_cqs_destroy(adapter);
3966         be_tx_queues_destroy(adapter);
3967         be_evt_queues_destroy(adapter);
3968 }
3969
3970 static void be_cancel_worker(struct be_adapter *adapter)
3971 {
3972         if (adapter->flags & BE_FLAGS_WORKER_SCHEDULED) {
3973                 cancel_delayed_work_sync(&adapter->work);
3974                 adapter->flags &= ~BE_FLAGS_WORKER_SCHEDULED;
3975         }
3976 }
3977
3978 static void be_cancel_err_detection(struct be_adapter *adapter)
3979 {
3980         struct be_error_recovery *err_rec = &adapter->error_recovery;
3981
3982         if (!be_err_recovery_workq)
3983                 return;
3984
3985         if (adapter->flags & BE_FLAGS_ERR_DETECTION_SCHEDULED) {
3986                 cancel_delayed_work_sync(&err_rec->err_detection_work);
3987                 adapter->flags &= ~BE_FLAGS_ERR_DETECTION_SCHEDULED;
3988         }
3989 }
3990
3991 static void be_disable_vxlan_offloads(struct be_adapter *adapter)
3992 {
3993         struct net_device *netdev = adapter->netdev;
3994
3995         if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS)
3996                 be_cmd_manage_iface(adapter, adapter->if_handle,
3997                                     OP_CONVERT_TUNNEL_TO_NORMAL);
3998
3999         if (adapter->vxlan_port)
4000                 be_cmd_set_vxlan_port(adapter, 0);
4001
4002         adapter->flags &= ~BE_FLAGS_VXLAN_OFFLOADS;
4003         adapter->vxlan_port = 0;
4004
4005         netdev->hw_enc_features = 0;
4006         netdev->hw_features &= ~(NETIF_F_GSO_UDP_TUNNEL);
4007         netdev->features &= ~(NETIF_F_GSO_UDP_TUNNEL);
4008 }
4009
4010 static void be_calculate_vf_res(struct be_adapter *adapter, u16 num_vfs,
4011                                 struct be_resources *vft_res)
4012 {
4013         struct be_resources res = adapter->pool_res;
4014         u32 vf_if_cap_flags = res.vf_if_cap_flags;
4015         struct be_resources res_mod = {0};
4016         u16 num_vf_qs = 1;
4017
4018         /* Distribute the queue resources among the PF and it's VFs */
4019         if (num_vfs) {
4020                 /* Divide the rx queues evenly among the VFs and the PF, capped
4021                  * at VF-EQ-count. Any remainder queues belong to the PF.
4022                  */
4023                 num_vf_qs = min(SH_VF_MAX_NIC_EQS,
4024                                 res.max_rss_qs / (num_vfs + 1));
4025
4026                 /* Skyhawk-R chip supports only MAX_PORT_RSS_TABLES
4027                  * RSS Tables per port. Provide RSS on VFs, only if number of
4028                  * VFs requested is less than it's PF Pool's RSS Tables limit.
4029                  */
4030                 if (num_vfs >= be_max_pf_pool_rss_tables(adapter))
4031                         num_vf_qs = 1;
4032         }
4033
4034         /* Resource with fields set to all '1's by GET_PROFILE_CONFIG cmd,
4035          * which are modifiable using SET_PROFILE_CONFIG cmd.
4036          */
4037         be_cmd_get_profile_config(adapter, &res_mod, NULL, ACTIVE_PROFILE_TYPE,
4038                                   RESOURCE_MODIFIABLE, 0);
4039
4040         /* If RSS IFACE capability flags are modifiable for a VF, set the
4041          * capability flag as valid and set RSS and DEFQ_RSS IFACE flags if
4042          * more than 1 RSSQ is available for a VF.
4043          * Otherwise, provision only 1 queue pair for VF.
4044          */
4045         if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_RSS) {
4046                 vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
4047                 if (num_vf_qs > 1) {
4048                         vf_if_cap_flags |= BE_IF_FLAGS_RSS;
4049                         if (res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS)
4050                                 vf_if_cap_flags |= BE_IF_FLAGS_DEFQ_RSS;
4051                 } else {
4052                         vf_if_cap_flags &= ~(BE_IF_FLAGS_RSS |
4053                                              BE_IF_FLAGS_DEFQ_RSS);
4054                 }
4055         } else {
4056                 num_vf_qs = 1;
4057         }
4058
4059         if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
4060                 vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
4061                 vf_if_cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4062         }
4063
4064         vft_res->vf_if_cap_flags = vf_if_cap_flags;
4065         vft_res->max_rx_qs = num_vf_qs;
4066         vft_res->max_rss_qs = num_vf_qs;
4067         vft_res->max_tx_qs = res.max_tx_qs / (num_vfs + 1);
4068         vft_res->max_cq_count = res.max_cq_count / (num_vfs + 1);
4069
4070         /* Distribute unicast MACs, VLANs, IFACE count and MCCQ count equally
4071          * among the PF and it's VFs, if the fields are changeable
4072          */
4073         if (res_mod.max_uc_mac == FIELD_MODIFIABLE)
4074                 vft_res->max_uc_mac = res.max_uc_mac / (num_vfs + 1);
4075
4076         if (res_mod.max_vlans == FIELD_MODIFIABLE)
4077                 vft_res->max_vlans = res.max_vlans / (num_vfs + 1);
4078
4079         if (res_mod.max_iface_count == FIELD_MODIFIABLE)
4080                 vft_res->max_iface_count = res.max_iface_count / (num_vfs + 1);
4081
4082         if (res_mod.max_mcc_count == FIELD_MODIFIABLE)
4083                 vft_res->max_mcc_count = res.max_mcc_count / (num_vfs + 1);
4084 }
4085
4086 static void be_if_destroy(struct be_adapter *adapter)
4087 {
4088         be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4089
4090         kfree(adapter->pmac_id);
4091         adapter->pmac_id = NULL;
4092
4093         kfree(adapter->mc_list);
4094         adapter->mc_list = NULL;
4095
4096         kfree(adapter->uc_list);
4097         adapter->uc_list = NULL;
4098 }
4099
4100 static int be_clear(struct be_adapter *adapter)
4101 {
4102         struct pci_dev *pdev = adapter->pdev;
4103         struct  be_resources vft_res = {0};
4104
4105         be_cancel_worker(adapter);
4106
4107         flush_workqueue(be_wq);
4108
4109         if (sriov_enabled(adapter))
4110                 be_vf_clear(adapter);
4111
4112         /* Re-configure FW to distribute resources evenly across max-supported
4113          * number of VFs, only when VFs are not already enabled.
4114          */
4115         if (skyhawk_chip(adapter) && be_physfn(adapter) &&
4116             !pci_vfs_assigned(pdev)) {
4117                 be_calculate_vf_res(adapter,
4118                                     pci_sriov_get_totalvfs(pdev),
4119                                     &vft_res);
4120                 be_cmd_set_sriov_config(adapter, adapter->pool_res,
4121                                         pci_sriov_get_totalvfs(pdev),
4122                                         &vft_res);
4123         }
4124
4125         be_disable_vxlan_offloads(adapter);
4126
4127         be_if_destroy(adapter);
4128
4129         be_clear_queues(adapter);
4130
4131         be_msix_disable(adapter);
4132         adapter->flags &= ~BE_FLAGS_SETUP_DONE;
4133         return 0;
4134 }
4135
4136 static int be_vfs_if_create(struct be_adapter *adapter)
4137 {
4138         struct be_resources res = {0};
4139         u32 cap_flags, en_flags, vf;
4140         struct be_vf_cfg *vf_cfg;
4141         int status;
4142
4143         /* If a FW profile exists, then cap_flags are updated */
4144         cap_flags = BE_VF_IF_EN_FLAGS;
4145
4146         for_all_vfs(adapter, vf_cfg, vf) {
4147                 if (!BE3_chip(adapter)) {
4148                         status = be_cmd_get_profile_config(adapter, &res, NULL,
4149                                                            ACTIVE_PROFILE_TYPE,
4150                                                            RESOURCE_LIMITS,
4151                                                            vf + 1);
4152                         if (!status) {
4153                                 cap_flags = res.if_cap_flags;
4154                                 /* Prevent VFs from enabling VLAN promiscuous
4155                                  * mode
4156                                  */
4157                                 cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4158                         }
4159                 }
4160
4161                 /* PF should enable IF flags during proxy if_create call */
4162                 en_flags = cap_flags & BE_VF_IF_EN_FLAGS;
4163                 status = be_cmd_if_create(adapter, cap_flags, en_flags,
4164                                           &vf_cfg->if_handle, vf + 1);
4165                 if (status)
4166                         return status;
4167         }
4168
4169         return 0;
4170 }
4171
4172 static int be_vf_setup_init(struct be_adapter *adapter)
4173 {
4174         struct be_vf_cfg *vf_cfg;
4175         int vf;
4176
4177         adapter->vf_cfg = kcalloc(adapter->num_vfs, sizeof(*vf_cfg),
4178                                   GFP_KERNEL);
4179         if (!adapter->vf_cfg)
4180                 return -ENOMEM;
4181
4182         for_all_vfs(adapter, vf_cfg, vf) {
4183                 vf_cfg->if_handle = -1;
4184                 vf_cfg->pmac_id = -1;
4185         }
4186         return 0;
4187 }
4188
4189 static int be_vf_setup(struct be_adapter *adapter)
4190 {
4191         struct device *dev = &adapter->pdev->dev;
4192         struct be_vf_cfg *vf_cfg;
4193         int status, old_vfs, vf;
4194         bool spoofchk;
4195
4196         old_vfs = pci_num_vf(adapter->pdev);
4197
4198         status = be_vf_setup_init(adapter);
4199         if (status)
4200                 goto err;
4201
4202         if (old_vfs) {
4203                 for_all_vfs(adapter, vf_cfg, vf) {
4204                         status = be_cmd_get_if_id(adapter, vf_cfg, vf);
4205                         if (status)
4206                                 goto err;
4207                 }
4208
4209                 status = be_vfs_mac_query(adapter);
4210                 if (status)
4211                         goto err;
4212         } else {
4213                 status = be_vfs_if_create(adapter);
4214                 if (status)
4215                         goto err;
4216
4217                 status = be_vf_eth_addr_config(adapter);
4218                 if (status)
4219                         goto err;
4220         }
4221
4222         for_all_vfs(adapter, vf_cfg, vf) {
4223                 /* Allow VFs to programs MAC/VLAN filters */
4224                 status = be_cmd_get_fn_privileges(adapter, &vf_cfg->privileges,
4225                                                   vf + 1);
4226                 if (!status && !(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
4227                         status = be_cmd_set_fn_privileges(adapter,
4228                                                           vf_cfg->privileges |
4229                                                           BE_PRIV_FILTMGMT,
4230                                                           vf + 1);
4231                         if (!status) {
4232                                 vf_cfg->privileges |= BE_PRIV_FILTMGMT;
4233                                 dev_info(dev, "VF%d has FILTMGMT privilege\n",
4234                                          vf);
4235                         }
4236                 }
4237
4238                 /* Allow full available bandwidth */
4239                 if (!old_vfs)
4240                         be_cmd_config_qos(adapter, 0, 0, vf + 1);
4241
4242                 status = be_cmd_get_hsw_config(adapter, NULL, vf + 1,
4243                                                vf_cfg->if_handle, NULL,
4244                                                &spoofchk);
4245                 if (!status)
4246                         vf_cfg->spoofchk = spoofchk;
4247
4248                 if (!old_vfs) {
4249                         be_cmd_enable_vf(adapter, vf + 1);
4250                         be_cmd_set_logical_link_config(adapter,
4251                                                        IFLA_VF_LINK_STATE_AUTO,
4252                                                        vf+1);
4253                 }
4254         }
4255
4256         if (!old_vfs) {
4257                 status = pci_enable_sriov(adapter->pdev, adapter->num_vfs);
4258                 if (status) {
4259                         dev_err(dev, "SRIOV enable failed\n");
4260                         adapter->num_vfs = 0;
4261                         goto err;
4262                 }
4263         }
4264
4265         if (BE3_chip(adapter)) {
4266                 /* On BE3, enable VEB only when SRIOV is enabled */
4267                 status = be_cmd_set_hsw_config(adapter, 0, 0,
4268                                                adapter->if_handle,
4269                                                PORT_FWD_TYPE_VEB, 0);
4270                 if (status)
4271                         goto err;
4272         }
4273
4274         adapter->flags |= BE_FLAGS_SRIOV_ENABLED;
4275         return 0;
4276 err:
4277         dev_err(dev, "VF setup failed\n");
4278         be_vf_clear(adapter);
4279         return status;
4280 }
4281
4282 /* Converting function_mode bits on BE3 to SH mc_type enums */
4283
4284 static u8 be_convert_mc_type(u32 function_mode)
4285 {
4286         if (function_mode & VNIC_MODE && function_mode & QNQ_MODE)
4287                 return vNIC1;
4288         else if (function_mode & QNQ_MODE)
4289                 return FLEX10;
4290         else if (function_mode & VNIC_MODE)
4291                 return vNIC2;
4292         else if (function_mode & UMC_ENABLED)
4293                 return UMC;
4294         else
4295                 return MC_NONE;
4296 }
4297
4298 /* On BE2/BE3 FW does not suggest the supported limits */
4299 static void BEx_get_resources(struct be_adapter *adapter,
4300                               struct be_resources *res)
4301 {
4302         bool use_sriov = adapter->num_vfs ? 1 : 0;
4303
4304         if (be_physfn(adapter))
4305                 res->max_uc_mac = BE_UC_PMAC_COUNT;
4306         else
4307                 res->max_uc_mac = BE_VF_UC_PMAC_COUNT;
4308
4309         adapter->mc_type = be_convert_mc_type(adapter->function_mode);
4310
4311         if (be_is_mc(adapter)) {
4312                 /* Assuming that there are 4 channels per port,
4313                  * when multi-channel is enabled
4314                  */
4315                 if (be_is_qnq_mode(adapter))
4316                         res->max_vlans = BE_NUM_VLANS_SUPPORTED/8;
4317                 else
4318                         /* In a non-qnq multichannel mode, the pvid
4319                          * takes up one vlan entry
4320                          */
4321                         res->max_vlans = (BE_NUM_VLANS_SUPPORTED / 4) - 1;
4322         } else {
4323                 res->max_vlans = BE_NUM_VLANS_SUPPORTED;
4324         }
4325
4326         res->max_mcast_mac = BE_MAX_MC;
4327
4328         /* 1) For BE3 1Gb ports, FW does not support multiple TXQs
4329          * 2) Create multiple TX rings on a BE3-R multi-channel interface
4330          *    *only* if it is RSS-capable.
4331          */
4332         if (BE2_chip(adapter) || use_sriov ||  (adapter->port_num > 1) ||
4333             be_virtfn(adapter) ||
4334             (be_is_mc(adapter) &&
4335              !(adapter->function_caps & BE_FUNCTION_CAPS_RSS))) {
4336                 res->max_tx_qs = 1;
4337         } else if (adapter->function_caps & BE_FUNCTION_CAPS_SUPER_NIC) {
4338                 struct be_resources super_nic_res = {0};
4339
4340                 /* On a SuperNIC profile, the driver needs to use the
4341                  * GET_PROFILE_CONFIG cmd to query the per-function TXQ limits
4342                  */
4343                 be_cmd_get_profile_config(adapter, &super_nic_res, NULL,
4344                                           ACTIVE_PROFILE_TYPE, RESOURCE_LIMITS,
4345                                           0);
4346                 /* Some old versions of BE3 FW don't report max_tx_qs value */
4347                 res->max_tx_qs = super_nic_res.max_tx_qs ? : BE3_MAX_TX_QS;
4348         } else {
4349                 res->max_tx_qs = BE3_MAX_TX_QS;
4350         }
4351
4352         if ((adapter->function_caps & BE_FUNCTION_CAPS_RSS) &&
4353             !use_sriov && be_physfn(adapter))
4354                 res->max_rss_qs = (adapter->be3_native) ?
4355                                            BE3_MAX_RSS_QS : BE2_MAX_RSS_QS;
4356         res->max_rx_qs = res->max_rss_qs + 1;
4357
4358         if (be_physfn(adapter))
4359                 res->max_evt_qs = (be_max_vfs(adapter) > 0) ?
4360                                         BE3_SRIOV_MAX_EVT_QS : BE3_MAX_EVT_QS;
4361         else
4362                 res->max_evt_qs = 1;
4363
4364         res->if_cap_flags = BE_IF_CAP_FLAGS_WANT;
4365         res->if_cap_flags &= ~BE_IF_FLAGS_DEFQ_RSS;
4366         if (!(adapter->function_caps & BE_FUNCTION_CAPS_RSS))
4367                 res->if_cap_flags &= ~BE_IF_FLAGS_RSS;
4368 }
4369
4370 static void be_setup_init(struct be_adapter *adapter)
4371 {
4372         adapter->vlan_prio_bmap = 0xff;
4373         adapter->phy.link_speed = -1;
4374         adapter->if_handle = -1;
4375         adapter->be3_native = false;
4376         adapter->if_flags = 0;
4377         adapter->phy_state = BE_UNKNOWN_PHY_STATE;
4378         if (be_physfn(adapter))
4379                 adapter->cmd_privileges = MAX_PRIVILEGES;
4380         else
4381                 adapter->cmd_privileges = MIN_PRIVILEGES;
4382 }
4383
4384 /* HW supports only MAX_PORT_RSS_TABLES RSS Policy Tables per port.
4385  * However, this HW limitation is not exposed to the host via any SLI cmd.
4386  * As a result, in the case of SRIOV and in particular multi-partition configs
4387  * the driver needs to calcuate a proportional share of RSS Tables per PF-pool
4388  * for distribution between the VFs. This self-imposed limit will determine the
4389  * no: of VFs for which RSS can be enabled.
4390  */
4391 static void be_calculate_pf_pool_rss_tables(struct be_adapter *adapter)
4392 {
4393         struct be_port_resources port_res = {0};
4394         u8 rss_tables_on_port;
4395         u16 max_vfs = be_max_vfs(adapter);
4396
4397         be_cmd_get_profile_config(adapter, NULL, &port_res, SAVED_PROFILE_TYPE,
4398                                   RESOURCE_LIMITS, 0);
4399
4400         rss_tables_on_port = MAX_PORT_RSS_TABLES - port_res.nic_pfs;
4401
4402         /* Each PF Pool's RSS Tables limit =
4403          * PF's Max VFs / Total_Max_VFs on Port * RSS Tables on Port
4404          */
4405         adapter->pool_res.max_rss_tables =
4406                 max_vfs * rss_tables_on_port / port_res.max_vfs;
4407 }
4408
4409 static int be_get_sriov_config(struct be_adapter *adapter)
4410 {
4411         struct be_resources res = {0};
4412         int max_vfs, old_vfs;
4413
4414         be_cmd_get_profile_config(adapter, &res, NULL, ACTIVE_PROFILE_TYPE,
4415                                   RESOURCE_LIMITS, 0);
4416
4417         /* Some old versions of BE3 FW don't report max_vfs value */
4418         if (BE3_chip(adapter) && !res.max_vfs) {
4419                 max_vfs = pci_sriov_get_totalvfs(adapter->pdev);
4420                 res.max_vfs = max_vfs > 0 ? min(MAX_VFS, max_vfs) : 0;
4421         }
4422
4423         adapter->pool_res = res;
4424
4425         /* If during previous unload of the driver, the VFs were not disabled,
4426          * then we cannot rely on the PF POOL limits for the TotalVFs value.
4427          * Instead use the TotalVFs value stored in the pci-dev struct.
4428          */
4429         old_vfs = pci_num_vf(adapter->pdev);
4430         if (old_vfs) {
4431                 dev_info(&adapter->pdev->dev, "%d VFs are already enabled\n",
4432                          old_vfs);
4433
4434                 adapter->pool_res.max_vfs =
4435                         pci_sriov_get_totalvfs(adapter->pdev);
4436                 adapter->num_vfs = old_vfs;
4437         }
4438
4439         if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4440                 be_calculate_pf_pool_rss_tables(adapter);
4441                 dev_info(&adapter->pdev->dev,
4442                          "RSS can be enabled for all VFs if num_vfs <= %d\n",
4443                          be_max_pf_pool_rss_tables(adapter));
4444         }
4445         return 0;
4446 }
4447
4448 static void be_alloc_sriov_res(struct be_adapter *adapter)
4449 {
4450         int old_vfs = pci_num_vf(adapter->pdev);
4451         struct  be_resources vft_res = {0};
4452         int status;
4453
4454         be_get_sriov_config(adapter);
4455
4456         if (!old_vfs)
4457                 pci_sriov_set_totalvfs(adapter->pdev, be_max_vfs(adapter));
4458
4459         /* When the HW is in SRIOV capable configuration, the PF-pool
4460          * resources are given to PF during driver load, if there are no
4461          * old VFs. This facility is not available in BE3 FW.
4462          * Also, this is done by FW in Lancer chip.
4463          */
4464         if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4465                 be_calculate_vf_res(adapter, 0, &vft_res);
4466                 status = be_cmd_set_sriov_config(adapter, adapter->pool_res, 0,
4467                                                  &vft_res);
4468                 if (status)
4469                         dev_err(&adapter->pdev->dev,
4470                                 "Failed to optimize SRIOV resources\n");
4471         }
4472 }
4473
4474 static int be_get_resources(struct be_adapter *adapter)
4475 {
4476         struct device *dev = &adapter->pdev->dev;
4477         struct be_resources res = {0};
4478         int status;
4479
4480         /* For Lancer, SH etc read per-function resource limits from FW.
4481          * GET_FUNC_CONFIG returns per function guaranteed limits.
4482          * GET_PROFILE_CONFIG returns PCI-E related limits PF-pool limits
4483          */
4484         if (BEx_chip(adapter)) {
4485                 BEx_get_resources(adapter, &res);
4486         } else {
4487                 status = be_cmd_get_func_config(adapter, &res);
4488                 if (status)
4489                         return status;
4490
4491                 /* If a deafault RXQ must be created, we'll use up one RSSQ*/
4492                 if (res.max_rss_qs && res.max_rss_qs == res.max_rx_qs &&
4493                     !(res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS))
4494                         res.max_rss_qs -= 1;
4495         }
4496
4497         /* If RoCE is supported stash away half the EQs for RoCE */
4498         res.max_nic_evt_qs = be_roce_supported(adapter) ?
4499                                 res.max_evt_qs / 2 : res.max_evt_qs;
4500         adapter->res = res;
4501
4502         /* If FW supports RSS default queue, then skip creating non-RSS
4503          * queue for non-IP traffic.
4504          */
4505         adapter->need_def_rxq = (be_if_cap_flags(adapter) &
4506                                  BE_IF_FLAGS_DEFQ_RSS) ? 0 : 1;
4507
4508         dev_info(dev, "Max: txqs %d, rxqs %d, rss %d, eqs %d, vfs %d\n",
4509                  be_max_txqs(adapter), be_max_rxqs(adapter),
4510                  be_max_rss(adapter), be_max_nic_eqs(adapter),
4511                  be_max_vfs(adapter));
4512         dev_info(dev, "Max: uc-macs %d, mc-macs %d, vlans %d\n",
4513                  be_max_uc(adapter), be_max_mc(adapter),
4514                  be_max_vlans(adapter));
4515
4516         /* Ensure RX and TX queues are created in pairs at init time */
4517         adapter->cfg_num_rx_irqs =
4518                                 min_t(u16, netif_get_num_default_rss_queues(),
4519                                       be_max_qp_irqs(adapter));
4520         adapter->cfg_num_tx_irqs = adapter->cfg_num_rx_irqs;
4521         return 0;
4522 }
4523
4524 static int be_get_config(struct be_adapter *adapter)
4525 {
4526         int status, level;
4527         u16 profile_id;
4528
4529         status = be_cmd_get_cntl_attributes(adapter);
4530         if (status)
4531                 return status;
4532
4533         status = be_cmd_query_fw_cfg(adapter);
4534         if (status)
4535                 return status;
4536
4537         if (!lancer_chip(adapter) && be_physfn(adapter))
4538                 be_cmd_get_fat_dump_len(adapter, &adapter->fat_dump_len);
4539
4540         if (BEx_chip(adapter)) {
4541                 level = be_cmd_get_fw_log_level(adapter);
4542                 adapter->msg_enable =
4543                         level <= FW_LOG_LEVEL_DEFAULT ? NETIF_MSG_HW : 0;
4544         }
4545
4546         be_cmd_get_acpi_wol_cap(adapter);
4547         pci_enable_wake(adapter->pdev, PCI_D3hot, adapter->wol_en);
4548         pci_enable_wake(adapter->pdev, PCI_D3cold, adapter->wol_en);
4549
4550         be_cmd_query_port_name(adapter);
4551
4552         if (be_physfn(adapter)) {
4553                 status = be_cmd_get_active_profile(adapter, &profile_id);
4554                 if (!status)
4555                         dev_info(&adapter->pdev->dev,
4556                                  "Using profile 0x%x\n", profile_id);
4557         }
4558
4559         return 0;
4560 }
4561
4562 static int be_mac_setup(struct be_adapter *adapter)
4563 {
4564         u8 mac[ETH_ALEN];
4565         int status;
4566
4567         if (is_zero_ether_addr(adapter->netdev->dev_addr)) {
4568                 status = be_cmd_get_perm_mac(adapter, mac);
4569                 if (status)
4570                         return status;
4571
4572                 memcpy(adapter->netdev->dev_addr, mac, ETH_ALEN);
4573                 memcpy(adapter->netdev->perm_addr, mac, ETH_ALEN);
4574
4575                 /* Initial MAC for BE3 VFs is already programmed by PF */
4576                 if (BEx_chip(adapter) && be_virtfn(adapter))
4577                         memcpy(adapter->dev_mac, mac, ETH_ALEN);
4578         }
4579
4580         return 0;
4581 }
4582
4583 static void be_schedule_worker(struct be_adapter *adapter)
4584 {
4585         queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
4586         adapter->flags |= BE_FLAGS_WORKER_SCHEDULED;
4587 }
4588
4589 static void be_destroy_err_recovery_workq(void)
4590 {
4591         if (!be_err_recovery_workq)
4592                 return;
4593
4594         flush_workqueue(be_err_recovery_workq);
4595         destroy_workqueue(be_err_recovery_workq);
4596         be_err_recovery_workq = NULL;
4597 }
4598
4599 static void be_schedule_err_detection(struct be_adapter *adapter, u32 delay)
4600 {
4601         struct be_error_recovery *err_rec = &adapter->error_recovery;
4602
4603         if (!be_err_recovery_workq)
4604                 return;
4605
4606         queue_delayed_work(be_err_recovery_workq, &err_rec->err_detection_work,
4607                            msecs_to_jiffies(delay));
4608         adapter->flags |= BE_FLAGS_ERR_DETECTION_SCHEDULED;
4609 }
4610
4611 static int be_setup_queues(struct be_adapter *adapter)
4612 {
4613         struct net_device *netdev = adapter->netdev;
4614         int status;
4615
4616         status = be_evt_queues_create(adapter);
4617         if (status)
4618                 goto err;
4619
4620         status = be_tx_qs_create(adapter);
4621         if (status)
4622                 goto err;
4623
4624         status = be_rx_cqs_create(adapter);
4625         if (status)
4626                 goto err;
4627
4628         status = be_mcc_queues_create(adapter);
4629         if (status)
4630                 goto err;
4631
4632         status = netif_set_real_num_rx_queues(netdev, adapter->num_rx_qs);
4633         if (status)
4634                 goto err;
4635
4636         status = netif_set_real_num_tx_queues(netdev, adapter->num_tx_qs);
4637         if (status)
4638                 goto err;
4639
4640         return 0;
4641 err:
4642         dev_err(&adapter->pdev->dev, "queue_setup failed\n");
4643         return status;
4644 }
4645
4646 static int be_if_create(struct be_adapter *adapter)
4647 {
4648         u32 en_flags = BE_IF_FLAGS_RSS | BE_IF_FLAGS_DEFQ_RSS;
4649         u32 cap_flags = be_if_cap_flags(adapter);
4650         int status;
4651
4652         /* alloc required memory for other filtering fields */
4653         adapter->pmac_id = kcalloc(be_max_uc(adapter),
4654                                    sizeof(*adapter->pmac_id), GFP_KERNEL);
4655         if (!adapter->pmac_id)
4656                 return -ENOMEM;
4657
4658         adapter->mc_list = kcalloc(be_max_mc(adapter),
4659                                    sizeof(*adapter->mc_list), GFP_KERNEL);
4660         if (!adapter->mc_list)
4661                 return -ENOMEM;
4662
4663         adapter->uc_list = kcalloc(be_max_uc(adapter),
4664                                    sizeof(*adapter->uc_list), GFP_KERNEL);
4665         if (!adapter->uc_list)
4666                 return -ENOMEM;
4667
4668         if (adapter->cfg_num_rx_irqs == 1)
4669                 cap_flags &= ~(BE_IF_FLAGS_DEFQ_RSS | BE_IF_FLAGS_RSS);
4670
4671         en_flags &= cap_flags;
4672         /* will enable all the needed filter flags in be_open() */
4673         status = be_cmd_if_create(adapter, be_if_cap_flags(adapter), en_flags,
4674                                   &adapter->if_handle, 0);
4675
4676         if (status)
4677                 return status;
4678
4679         return 0;
4680 }
4681
4682 int be_update_queues(struct be_adapter *adapter)
4683 {
4684         struct net_device *netdev = adapter->netdev;
4685         int status;
4686
4687         if (netif_running(netdev))
4688                 be_close(netdev);
4689
4690         be_cancel_worker(adapter);
4691
4692         /* If any vectors have been shared with RoCE we cannot re-program
4693          * the MSIx table.
4694          */
4695         if (!adapter->num_msix_roce_vec)
4696                 be_msix_disable(adapter);
4697
4698         be_clear_queues(adapter);
4699         status = be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4700         if (status)
4701                 return status;
4702
4703         if (!msix_enabled(adapter)) {
4704                 status = be_msix_enable(adapter);
4705                 if (status)
4706                         return status;
4707         }
4708
4709         status = be_if_create(adapter);
4710         if (status)
4711                 return status;
4712
4713         status = be_setup_queues(adapter);
4714         if (status)
4715                 return status;
4716
4717         be_schedule_worker(adapter);
4718
4719         if (netif_running(netdev))
4720                 status = be_open(netdev);
4721
4722         return status;
4723 }
4724
4725 static inline int fw_major_num(const char *fw_ver)
4726 {
4727         int fw_major = 0, i;
4728
4729         i = sscanf(fw_ver, "%d.", &fw_major);
4730         if (i != 1)
4731                 return 0;
4732
4733         return fw_major;
4734 }
4735
4736 /* If it is error recovery, FLR the PF
4737  * Else if any VFs are already enabled don't FLR the PF
4738  */
4739 static bool be_reset_required(struct be_adapter *adapter)
4740 {
4741         if (be_error_recovering(adapter))
4742                 return true;
4743         else
4744                 return pci_num_vf(adapter->pdev) == 0;
4745 }
4746
4747 /* Wait for the FW to be ready and perform the required initialization */
4748 static int be_func_init(struct be_adapter *adapter)
4749 {
4750         int status;
4751
4752         status = be_fw_wait_ready(adapter);
4753         if (status)
4754                 return status;
4755
4756         /* FW is now ready; clear errors to allow cmds/doorbell */
4757         be_clear_error(adapter, BE_CLEAR_ALL);
4758
4759         if (be_reset_required(adapter)) {
4760                 status = be_cmd_reset_function(adapter);
4761                 if (status)
4762                         return status;
4763
4764                 /* Wait for interrupts to quiesce after an FLR */
4765                 msleep(100);
4766         }
4767
4768         /* Tell FW we're ready to fire cmds */
4769         status = be_cmd_fw_init(adapter);
4770         if (status)
4771                 return status;
4772
4773         /* Allow interrupts for other ULPs running on NIC function */
4774         be_intr_set(adapter, true);
4775
4776         return 0;
4777 }
4778
4779 static int be_setup(struct be_adapter *adapter)
4780 {
4781         struct device *dev = &adapter->pdev->dev;
4782         int status;
4783
4784         status = be_func_init(adapter);
4785         if (status)
4786                 return status;
4787
4788         be_setup_init(adapter);
4789
4790         if (!lancer_chip(adapter))
4791                 be_cmd_req_native_mode(adapter);
4792
4793         /* invoke this cmd first to get pf_num and vf_num which are needed
4794          * for issuing profile related cmds
4795          */
4796         if (!BEx_chip(adapter)) {
4797                 status = be_cmd_get_func_config(adapter, NULL);
4798                 if (status)
4799                         return status;
4800         }
4801
4802         status = be_get_config(adapter);
4803         if (status)
4804                 goto err;
4805
4806         if (!BE2_chip(adapter) && be_physfn(adapter))
4807                 be_alloc_sriov_res(adapter);
4808
4809         status = be_get_resources(adapter);
4810         if (status)
4811                 goto err;
4812
4813         status = be_msix_enable(adapter);
4814         if (status)
4815                 goto err;
4816
4817         /* will enable all the needed filter flags in be_open() */
4818         status = be_if_create(adapter);
4819         if (status)
4820                 goto err;
4821
4822         /* Updating real_num_tx/rx_queues() requires rtnl_lock() */
4823         rtnl_lock();
4824         status = be_setup_queues(adapter);
4825         rtnl_unlock();
4826         if (status)
4827                 goto err;
4828
4829         be_cmd_get_fn_privileges(adapter, &adapter->cmd_privileges, 0);
4830
4831         status = be_mac_setup(adapter);
4832         if (status)
4833                 goto err;
4834
4835         be_cmd_get_fw_ver(adapter);
4836         dev_info(dev, "FW version is %s\n", adapter->fw_ver);
4837
4838         if (BE2_chip(adapter) && fw_major_num(adapter->fw_ver) < 4) {
4839                 dev_err(dev, "Firmware on card is old(%s), IRQs may not work",
4840                         adapter->fw_ver);
4841                 dev_err(dev, "Please upgrade firmware to version >= 4.0\n");
4842         }
4843
4844         status = be_cmd_set_flow_control(adapter, adapter->tx_fc,
4845                                          adapter->rx_fc);
4846         if (status)
4847                 be_cmd_get_flow_control(adapter, &adapter->tx_fc,
4848                                         &adapter->rx_fc);
4849
4850         dev_info(&adapter->pdev->dev, "HW Flow control - TX:%d RX:%d\n",
4851                  adapter->tx_fc, adapter->rx_fc);
4852
4853         if (be_physfn(adapter))
4854                 be_cmd_set_logical_link_config(adapter,
4855                                                IFLA_VF_LINK_STATE_AUTO, 0);
4856
4857         /* BE3 EVB echoes broadcast/multicast packets back to PF's vport
4858          * confusing a linux bridge or OVS that it might be connected to.
4859          * Set the EVB to PASSTHRU mode which effectively disables the EVB
4860          * when SRIOV is not enabled.
4861          */
4862         if (BE3_chip(adapter))
4863                 be_cmd_set_hsw_config(adapter, 0, 0, adapter->if_handle,
4864                                       PORT_FWD_TYPE_PASSTHRU, 0);
4865
4866         if (adapter->num_vfs)
4867                 be_vf_setup(adapter);
4868
4869         status = be_cmd_get_phy_info(adapter);
4870         if (!status && be_pause_supported(adapter))
4871                 adapter->phy.fc_autoneg = 1;
4872
4873         if (be_physfn(adapter) && !lancer_chip(adapter))
4874                 be_cmd_set_features(adapter);
4875
4876         be_schedule_worker(adapter);
4877         adapter->flags |= BE_FLAGS_SETUP_DONE;
4878         return 0;
4879 err:
4880         be_clear(adapter);
4881         return status;
4882 }
4883
4884 #ifdef CONFIG_NET_POLL_CONTROLLER
4885 static void be_netpoll(struct net_device *netdev)
4886 {
4887         struct be_adapter *adapter = netdev_priv(netdev);
4888         struct be_eq_obj *eqo;
4889         int i;
4890
4891         for_all_evt_queues(adapter, eqo, i) {
4892                 be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
4893                 napi_schedule(&eqo->napi);
4894         }
4895 }
4896 #endif
4897
4898 int be_load_fw(struct be_adapter *adapter, u8 *fw_file)
4899 {
4900         const struct firmware *fw;
4901         int status;
4902
4903         if (!netif_running(adapter->netdev)) {
4904                 dev_err(&adapter->pdev->dev,
4905                         "Firmware load not allowed (interface is down)\n");
4906                 return -ENETDOWN;
4907         }
4908
4909         status = request_firmware(&fw, fw_file, &adapter->pdev->dev);
4910         if (status)
4911                 goto fw_exit;
4912
4913         dev_info(&adapter->pdev->dev, "Flashing firmware file %s\n", fw_file);
4914
4915         if (lancer_chip(adapter))
4916                 status = lancer_fw_download(adapter, fw);
4917         else
4918                 status = be_fw_download(adapter, fw);
4919
4920         if (!status)
4921                 be_cmd_get_fw_ver(adapter);
4922
4923 fw_exit:
4924         release_firmware(fw);
4925         return status;
4926 }
4927
4928 static int be_ndo_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh,
4929                                  u16 flags)
4930 {
4931         struct be_adapter *adapter = netdev_priv(dev);
4932         struct nlattr *attr, *br_spec;
4933         int rem;
4934         int status = 0;
4935         u16 mode = 0;
4936
4937         if (!sriov_enabled(adapter))
4938                 return -EOPNOTSUPP;
4939
4940         br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
4941         if (!br_spec)
4942                 return -EINVAL;
4943
4944         nla_for_each_nested(attr, br_spec, rem) {
4945                 if (nla_type(attr) != IFLA_BRIDGE_MODE)
4946                         continue;
4947
4948                 if (nla_len(attr) < sizeof(mode))
4949                         return -EINVAL;
4950
4951                 mode = nla_get_u16(attr);
4952                 if (BE3_chip(adapter) && mode == BRIDGE_MODE_VEPA)
4953                         return -EOPNOTSUPP;
4954
4955                 if (mode != BRIDGE_MODE_VEPA && mode != BRIDGE_MODE_VEB)
4956                         return -EINVAL;
4957
4958                 status = be_cmd_set_hsw_config(adapter, 0, 0,
4959                                                adapter->if_handle,
4960                                                mode == BRIDGE_MODE_VEPA ?
4961                                                PORT_FWD_TYPE_VEPA :
4962                                                PORT_FWD_TYPE_VEB, 0);
4963                 if (status)
4964                         goto err;
4965
4966                 dev_info(&adapter->pdev->dev, "enabled switch mode: %s\n",
4967                          mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
4968
4969                 return status;
4970         }
4971 err:
4972         dev_err(&adapter->pdev->dev, "Failed to set switch mode %s\n",
4973                 mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
4974
4975         return status;
4976 }
4977
4978 static int be_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
4979                                  struct net_device *dev, u32 filter_mask,
4980                                  int nlflags)
4981 {
4982         struct be_adapter *adapter = netdev_priv(dev);
4983         int status = 0;
4984         u8 hsw_mode;
4985
4986         /* BE and Lancer chips support VEB mode only */
4987         if (BEx_chip(adapter) || lancer_chip(adapter)) {
4988                 /* VEB is disabled in non-SR-IOV profiles on BE3/Lancer */
4989                 if (!pci_sriov_get_totalvfs(adapter->pdev))
4990                         return 0;
4991                 hsw_mode = PORT_FWD_TYPE_VEB;
4992         } else {
4993                 status = be_cmd_get_hsw_config(adapter, NULL, 0,
4994                                                adapter->if_handle, &hsw_mode,
4995                                                NULL);
4996                 if (status)
4997                         return 0;
4998
4999                 if (hsw_mode == PORT_FWD_TYPE_PASSTHRU)
5000                         return 0;
5001         }
5002
5003         return ndo_dflt_bridge_getlink(skb, pid, seq, dev,
5004                                        hsw_mode == PORT_FWD_TYPE_VEPA ?
5005                                        BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB,
5006                                        0, 0, nlflags, filter_mask, NULL);
5007 }
5008
5009 static struct be_cmd_work *be_alloc_work(struct be_adapter *adapter,
5010                                          void (*func)(struct work_struct *))
5011 {
5012         struct be_cmd_work *work;
5013
5014         work = kzalloc(sizeof(*work), GFP_ATOMIC);
5015         if (!work) {
5016                 dev_err(&adapter->pdev->dev,
5017                         "be_work memory allocation failed\n");
5018                 return NULL;
5019         }
5020
5021         INIT_WORK(&work->work, func);
5022         work->adapter = adapter;
5023         return work;
5024 }
5025
5026 /* VxLAN offload Notes:
5027  *
5028  * The stack defines tunnel offload flags (hw_enc_features) for IP and doesn't
5029  * distinguish various types of transports (VxLAN, GRE, NVGRE ..). So, offload
5030  * is expected to work across all types of IP tunnels once exported. Skyhawk
5031  * supports offloads for either VxLAN or NVGRE, exclusively. So we export VxLAN
5032  * offloads in hw_enc_features only when a VxLAN port is added. If other (non
5033  * VxLAN) tunnels are configured while VxLAN offloads are enabled, offloads for
5034  * those other tunnels are unexported on the fly through ndo_features_check().
5035  *
5036  * Skyhawk supports VxLAN offloads only for one UDP dport. So, if the stack
5037  * adds more than one port, disable offloads and don't re-enable them again
5038  * until after all the tunnels are removed.
5039  */
5040 static void be_work_add_vxlan_port(struct work_struct *work)
5041 {
5042         struct be_cmd_work *cmd_work =
5043                                 container_of(work, struct be_cmd_work, work);
5044         struct be_adapter *adapter = cmd_work->adapter;
5045         struct net_device *netdev = adapter->netdev;
5046         struct device *dev = &adapter->pdev->dev;
5047         __be16 port = cmd_work->info.vxlan_port;
5048         int status;
5049
5050         if (adapter->vxlan_port == port && adapter->vxlan_port_count) {
5051                 adapter->vxlan_port_aliases++;
5052                 goto done;
5053         }
5054
5055         if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS) {
5056                 dev_info(dev,
5057                          "Only one UDP port supported for VxLAN offloads\n");
5058                 dev_info(dev, "Disabling VxLAN offloads\n");
5059                 adapter->vxlan_port_count++;
5060                 goto err;
5061         }
5062
5063         if (adapter->vxlan_port_count++ >= 1)
5064                 goto done;
5065
5066         status = be_cmd_manage_iface(adapter, adapter->if_handle,
5067                                      OP_CONVERT_NORMAL_TO_TUNNEL);
5068         if (status) {
5069                 dev_warn(dev, "Failed to convert normal interface to tunnel\n");
5070                 goto err;
5071         }
5072
5073         status = be_cmd_set_vxlan_port(adapter, port);
5074         if (status) {
5075                 dev_warn(dev, "Failed to add VxLAN port\n");
5076                 goto err;
5077         }
5078         adapter->flags |= BE_FLAGS_VXLAN_OFFLOADS;
5079         adapter->vxlan_port = port;
5080
5081         netdev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
5082                                    NETIF_F_TSO | NETIF_F_TSO6 |
5083                                    NETIF_F_GSO_UDP_TUNNEL;
5084         netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL;
5085         netdev->features |= NETIF_F_GSO_UDP_TUNNEL;
5086
5087         dev_info(dev, "Enabled VxLAN offloads for UDP port %d\n",
5088                  be16_to_cpu(port));
5089         goto done;
5090 err:
5091         be_disable_vxlan_offloads(adapter);
5092 done:
5093         kfree(cmd_work);
5094 }
5095
5096 static void be_work_del_vxlan_port(struct work_struct *work)
5097 {
5098         struct be_cmd_work *cmd_work =
5099                                 container_of(work, struct be_cmd_work, work);
5100         struct be_adapter *adapter = cmd_work->adapter;
5101         __be16 port = cmd_work->info.vxlan_port;
5102
5103         if (adapter->vxlan_port != port)
5104                 goto done;
5105
5106         if (adapter->vxlan_port_aliases) {
5107                 adapter->vxlan_port_aliases--;
5108                 goto out;
5109         }
5110
5111         be_disable_vxlan_offloads(adapter);
5112
5113         dev_info(&adapter->pdev->dev,
5114                  "Disabled VxLAN offloads for UDP port %d\n",
5115                  be16_to_cpu(port));
5116 done:
5117         adapter->vxlan_port_count--;
5118 out:
5119         kfree(cmd_work);
5120 }
5121
5122 static void be_cfg_vxlan_port(struct net_device *netdev,
5123                               struct udp_tunnel_info *ti,
5124                               void (*func)(struct work_struct *))
5125 {
5126         struct be_adapter *adapter = netdev_priv(netdev);
5127         struct be_cmd_work *cmd_work;
5128
5129         if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
5130                 return;
5131
5132         if (lancer_chip(adapter) || BEx_chip(adapter) || be_is_mc(adapter))
5133                 return;
5134
5135         cmd_work = be_alloc_work(adapter, func);
5136         if (cmd_work) {
5137                 cmd_work->info.vxlan_port = ti->port;
5138                 queue_work(be_wq, &cmd_work->work);
5139         }
5140 }
5141
5142 static void be_del_vxlan_port(struct net_device *netdev,
5143                               struct udp_tunnel_info *ti)
5144 {
5145         be_cfg_vxlan_port(netdev, ti, be_work_del_vxlan_port);
5146 }
5147
5148 static void be_add_vxlan_port(struct net_device *netdev,
5149                               struct udp_tunnel_info *ti)
5150 {
5151         be_cfg_vxlan_port(netdev, ti, be_work_add_vxlan_port);
5152 }
5153
5154 static netdev_features_t be_features_check(struct sk_buff *skb,
5155                                            struct net_device *dev,
5156                                            netdev_features_t features)
5157 {
5158         struct be_adapter *adapter = netdev_priv(dev);
5159         u8 l4_hdr = 0;
5160
5161         /* The code below restricts offload features for some tunneled packets.
5162          * Offload features for normal (non tunnel) packets are unchanged.
5163          */
5164         if (!skb->encapsulation ||
5165             !(adapter->flags & BE_FLAGS_VXLAN_OFFLOADS))
5166                 return features;
5167
5168         /* It's an encapsulated packet and VxLAN offloads are enabled. We
5169          * should disable tunnel offload features if it's not a VxLAN packet,
5170          * as tunnel offloads have been enabled only for VxLAN. This is done to
5171          * allow other tunneled traffic like GRE work fine while VxLAN
5172          * offloads are configured in Skyhawk-R.
5173          */
5174         switch (vlan_get_protocol(skb)) {
5175         case htons(ETH_P_IP):
5176                 l4_hdr = ip_hdr(skb)->protocol;
5177                 break;
5178         case htons(ETH_P_IPV6):
5179                 l4_hdr = ipv6_hdr(skb)->nexthdr;
5180                 break;
5181         default:
5182                 return features;
5183         }
5184
5185         if (l4_hdr != IPPROTO_UDP ||
5186             skb->inner_protocol_type != ENCAP_TYPE_ETHER ||
5187             skb->inner_protocol != htons(ETH_P_TEB) ||
5188             skb_inner_mac_header(skb) - skb_transport_header(skb) !=
5189                 sizeof(struct udphdr) + sizeof(struct vxlanhdr) ||
5190             !adapter->vxlan_port ||
5191             udp_hdr(skb)->dest != adapter->vxlan_port)
5192                 return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
5193
5194         return features;
5195 }
5196
5197 static int be_get_phys_port_id(struct net_device *dev,
5198                                struct netdev_phys_item_id *ppid)
5199 {
5200         int i, id_len = CNTL_SERIAL_NUM_WORDS * CNTL_SERIAL_NUM_WORD_SZ + 1;
5201         struct be_adapter *adapter = netdev_priv(dev);
5202         u8 *id;
5203
5204         if (MAX_PHYS_ITEM_ID_LEN < id_len)
5205                 return -ENOSPC;
5206
5207         ppid->id[0] = adapter->hba_port_num + 1;
5208         id = &ppid->id[1];
5209         for (i = CNTL_SERIAL_NUM_WORDS - 1; i >= 0;
5210              i--, id += CNTL_SERIAL_NUM_WORD_SZ)
5211                 memcpy(id, &adapter->serial_num[i], CNTL_SERIAL_NUM_WORD_SZ);
5212
5213         ppid->id_len = id_len;
5214
5215         return 0;
5216 }
5217
5218 static void be_set_rx_mode(struct net_device *dev)
5219 {
5220         struct be_adapter *adapter = netdev_priv(dev);
5221         struct be_cmd_work *work;
5222
5223         work = be_alloc_work(adapter, be_work_set_rx_mode);
5224         if (work)
5225                 queue_work(be_wq, &work->work);
5226 }
5227
5228 static const struct net_device_ops be_netdev_ops = {
5229         .ndo_open               = be_open,
5230         .ndo_stop               = be_close,
5231         .ndo_start_xmit         = be_xmit,
5232         .ndo_set_rx_mode        = be_set_rx_mode,
5233         .ndo_set_mac_address    = be_mac_addr_set,
5234         .ndo_get_stats64        = be_get_stats64,
5235         .ndo_validate_addr      = eth_validate_addr,
5236         .ndo_vlan_rx_add_vid    = be_vlan_add_vid,
5237         .ndo_vlan_rx_kill_vid   = be_vlan_rem_vid,
5238         .ndo_set_vf_mac         = be_set_vf_mac,
5239         .ndo_set_vf_vlan        = be_set_vf_vlan,
5240         .ndo_set_vf_rate        = be_set_vf_tx_rate,
5241         .ndo_get_vf_config      = be_get_vf_config,
5242         .ndo_set_vf_link_state  = be_set_vf_link_state,
5243         .ndo_set_vf_spoofchk    = be_set_vf_spoofchk,
5244 #ifdef CONFIG_NET_POLL_CONTROLLER
5245         .ndo_poll_controller    = be_netpoll,
5246 #endif
5247         .ndo_bridge_setlink     = be_ndo_bridge_setlink,
5248         .ndo_bridge_getlink     = be_ndo_bridge_getlink,
5249 #ifdef CONFIG_NET_RX_BUSY_POLL
5250         .ndo_busy_poll          = be_busy_poll,
5251 #endif
5252         .ndo_udp_tunnel_add     = be_add_vxlan_port,
5253         .ndo_udp_tunnel_del     = be_del_vxlan_port,
5254         .ndo_features_check     = be_features_check,
5255         .ndo_get_phys_port_id   = be_get_phys_port_id,
5256 };
5257
5258 static void be_netdev_init(struct net_device *netdev)
5259 {
5260         struct be_adapter *adapter = netdev_priv(netdev);
5261
5262         netdev->hw_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5263                 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM |
5264                 NETIF_F_HW_VLAN_CTAG_TX;
5265         if ((be_if_cap_flags(adapter) & BE_IF_FLAGS_RSS))
5266                 netdev->hw_features |= NETIF_F_RXHASH;
5267
5268         netdev->features |= netdev->hw_features |
5269                 NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER;
5270
5271         netdev->vlan_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5272                 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
5273
5274         netdev->priv_flags |= IFF_UNICAST_FLT;
5275
5276         netdev->flags |= IFF_MULTICAST;
5277
5278         netif_set_gso_max_size(netdev, BE_MAX_GSO_SIZE - ETH_HLEN);
5279
5280         netdev->netdev_ops = &be_netdev_ops;
5281
5282         netdev->ethtool_ops = &be_ethtool_ops;
5283
5284         /* MTU range: 256 - 9000 */
5285         netdev->min_mtu = BE_MIN_MTU;
5286         netdev->max_mtu = BE_MAX_MTU;
5287 }
5288
5289 static void be_cleanup(struct be_adapter *adapter)
5290 {
5291         struct net_device *netdev = adapter->netdev;
5292
5293         rtnl_lock();
5294         netif_device_detach(netdev);
5295         if (netif_running(netdev))
5296                 be_close(netdev);
5297         rtnl_unlock();
5298
5299         be_clear(adapter);
5300 }
5301
5302 static int be_resume(struct be_adapter *adapter)
5303 {
5304         struct net_device *netdev = adapter->netdev;
5305         int status;
5306
5307         status = be_setup(adapter);
5308         if (status)
5309                 return status;
5310
5311         rtnl_lock();
5312         if (netif_running(netdev))
5313                 status = be_open(netdev);
5314         rtnl_unlock();
5315
5316         if (status)
5317                 return status;
5318
5319         netif_device_attach(netdev);
5320
5321         return 0;
5322 }
5323
5324 static void be_soft_reset(struct be_adapter *adapter)
5325 {
5326         u32 val;
5327
5328         dev_info(&adapter->pdev->dev, "Initiating chip soft reset\n");
5329         val = ioread32(adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5330         val |= SLIPORT_SOFTRESET_SR_MASK;
5331         iowrite32(val, adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5332 }
5333
5334 static bool be_err_is_recoverable(struct be_adapter *adapter)
5335 {
5336         struct be_error_recovery *err_rec = &adapter->error_recovery;
5337         unsigned long initial_idle_time =
5338                 msecs_to_jiffies(ERR_RECOVERY_IDLE_TIME);
5339         unsigned long recovery_interval =
5340                 msecs_to_jiffies(ERR_RECOVERY_INTERVAL);
5341         u16 ue_err_code;
5342         u32 val;
5343
5344         val = be_POST_stage_get(adapter);
5345         if ((val & POST_STAGE_RECOVERABLE_ERR) != POST_STAGE_RECOVERABLE_ERR)
5346                 return false;
5347         ue_err_code = val & POST_ERR_RECOVERY_CODE_MASK;
5348         if (ue_err_code == 0)
5349                 return false;
5350
5351         dev_err(&adapter->pdev->dev, "Recoverable HW error code: 0x%x\n",
5352                 ue_err_code);
5353
5354         if (jiffies - err_rec->probe_time <= initial_idle_time) {
5355                 dev_err(&adapter->pdev->dev,
5356                         "Cannot recover within %lu sec from driver load\n",
5357                         jiffies_to_msecs(initial_idle_time) / MSEC_PER_SEC);
5358                 return false;
5359         }
5360
5361         if (err_rec->last_recovery_time &&
5362             (jiffies - err_rec->last_recovery_time <= recovery_interval)) {
5363                 dev_err(&adapter->pdev->dev,
5364                         "Cannot recover within %lu sec from last recovery\n",
5365                         jiffies_to_msecs(recovery_interval) / MSEC_PER_SEC);
5366                 return false;
5367         }
5368
5369         if (ue_err_code == err_rec->last_err_code) {
5370                 dev_err(&adapter->pdev->dev,
5371                         "Cannot recover from a consecutive TPE error\n");
5372                 return false;
5373         }
5374
5375         err_rec->last_recovery_time = jiffies;
5376         err_rec->last_err_code = ue_err_code;
5377         return true;
5378 }
5379
5380 static int be_tpe_recover(struct be_adapter *adapter)
5381 {
5382         struct be_error_recovery *err_rec = &adapter->error_recovery;
5383         int status = -EAGAIN;
5384         u32 val;
5385
5386         switch (err_rec->recovery_state) {
5387         case ERR_RECOVERY_ST_NONE:
5388                 err_rec->recovery_state = ERR_RECOVERY_ST_DETECT;
5389                 err_rec->resched_delay = ERR_RECOVERY_UE_DETECT_DURATION;
5390                 break;
5391
5392         case ERR_RECOVERY_ST_DETECT:
5393                 val = be_POST_stage_get(adapter);
5394                 if ((val & POST_STAGE_RECOVERABLE_ERR) !=
5395                     POST_STAGE_RECOVERABLE_ERR) {
5396                         dev_err(&adapter->pdev->dev,
5397                                 "Unrecoverable HW error detected: 0x%x\n", val);
5398                         status = -EINVAL;
5399                         err_rec->resched_delay = 0;
5400                         break;
5401                 }
5402
5403                 dev_err(&adapter->pdev->dev, "Recoverable HW error detected\n");
5404
5405                 /* Only PF0 initiates Chip Soft Reset. But PF0 must wait UE2SR
5406                  * milliseconds before it checks for final error status in
5407                  * SLIPORT_SEMAPHORE to determine if recovery criteria is met.
5408                  * If it does, then PF0 initiates a Soft Reset.
5409                  */
5410                 if (adapter->pf_num == 0) {
5411                         err_rec->recovery_state = ERR_RECOVERY_ST_RESET;
5412                         err_rec->resched_delay = err_rec->ue_to_reset_time -
5413                                         ERR_RECOVERY_UE_DETECT_DURATION;
5414                         break;
5415                 }
5416
5417                 err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5418                 err_rec->resched_delay = err_rec->ue_to_poll_time -
5419                                         ERR_RECOVERY_UE_DETECT_DURATION;
5420                 break;
5421
5422         case ERR_RECOVERY_ST_RESET:
5423                 if (!be_err_is_recoverable(adapter)) {
5424                         dev_err(&adapter->pdev->dev,
5425                                 "Failed to meet recovery criteria\n");
5426                         status = -EIO;
5427                         err_rec->resched_delay = 0;
5428                         break;
5429                 }
5430                 be_soft_reset(adapter);
5431                 err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5432                 err_rec->resched_delay = err_rec->ue_to_poll_time -
5433                                         err_rec->ue_to_reset_time;
5434                 break;
5435
5436         case ERR_RECOVERY_ST_PRE_POLL:
5437                 err_rec->recovery_state = ERR_RECOVERY_ST_REINIT;
5438                 err_rec->resched_delay = 0;
5439                 status = 0;                     /* done */
5440                 break;
5441
5442         default:
5443                 status = -EINVAL;
5444                 err_rec->resched_delay = 0;
5445                 break;
5446         }
5447
5448         return status;
5449 }
5450
5451 static int be_err_recover(struct be_adapter *adapter)
5452 {
5453         int status;
5454
5455         if (!lancer_chip(adapter)) {
5456                 if (!adapter->error_recovery.recovery_supported ||
5457                     adapter->priv_flags & BE_DISABLE_TPE_RECOVERY)
5458                         return -EIO;
5459                 status = be_tpe_recover(adapter);
5460                 if (status)
5461                         goto err;
5462         }
5463
5464         /* Wait for adapter to reach quiescent state before
5465          * destroying queues
5466          */
5467         status = be_fw_wait_ready(adapter);
5468         if (status)
5469                 goto err;
5470
5471         adapter->flags |= BE_FLAGS_TRY_RECOVERY;
5472
5473         be_cleanup(adapter);
5474
5475         status = be_resume(adapter);
5476         if (status)
5477                 goto err;
5478
5479         adapter->flags &= ~BE_FLAGS_TRY_RECOVERY;
5480
5481 err:
5482         return status;
5483 }
5484
5485 static void be_err_detection_task(struct work_struct *work)
5486 {
5487         struct be_error_recovery *err_rec =
5488                         container_of(work, struct be_error_recovery,
5489                                      err_detection_work.work);
5490         struct be_adapter *adapter =
5491                         container_of(err_rec, struct be_adapter,
5492                                      error_recovery);
5493         u32 resched_delay = ERR_RECOVERY_DETECTION_DELAY;
5494         struct device *dev = &adapter->pdev->dev;
5495         int recovery_status;
5496
5497         be_detect_error(adapter);
5498         if (!be_check_error(adapter, BE_ERROR_HW))
5499                 goto reschedule_task;
5500
5501         recovery_status = be_err_recover(adapter);
5502         if (!recovery_status) {
5503                 err_rec->recovery_retries = 0;
5504                 err_rec->recovery_state = ERR_RECOVERY_ST_NONE;
5505                 dev_info(dev, "Adapter recovery successful\n");
5506                 goto reschedule_task;
5507         } else if (!lancer_chip(adapter) && err_rec->resched_delay) {
5508                 /* BEx/SH recovery state machine */
5509                 if (adapter->pf_num == 0 &&
5510                     err_rec->recovery_state > ERR_RECOVERY_ST_DETECT)
5511                         dev_err(&adapter->pdev->dev,
5512                                 "Adapter recovery in progress\n");
5513                 resched_delay = err_rec->resched_delay;
5514                 goto reschedule_task;
5515         } else if (lancer_chip(adapter) && be_virtfn(adapter)) {
5516                 /* For VFs, check if PF have allocated resources
5517                  * every second.
5518                  */
5519                 dev_err(dev, "Re-trying adapter recovery\n");
5520                 goto reschedule_task;
5521         } else if (lancer_chip(adapter) && err_rec->recovery_retries++ <
5522                    ERR_RECOVERY_MAX_RETRY_COUNT) {
5523                 /* In case of another error during recovery, it takes 30 sec
5524                  * for adapter to come out of error. Retry error recovery after
5525                  * this time interval.
5526                  */
5527                 dev_err(&adapter->pdev->dev, "Re-trying adapter recovery\n");
5528                 resched_delay = ERR_RECOVERY_RETRY_DELAY;
5529                 goto reschedule_task;
5530         } else {
5531                 dev_err(dev, "Adapter recovery failed\n");
5532                 dev_err(dev, "Please reboot server to recover\n");
5533         }
5534
5535         return;
5536
5537 reschedule_task:
5538         be_schedule_err_detection(adapter, resched_delay);
5539 }
5540
5541 static void be_log_sfp_info(struct be_adapter *adapter)
5542 {
5543         int status;
5544
5545         status = be_cmd_query_sfp_info(adapter);
5546         if (!status) {
5547                 dev_err(&adapter->pdev->dev,
5548                         "Port %c: %s Vendor: %s part no: %s",
5549                         adapter->port_name,
5550                         be_misconfig_evt_port_state[adapter->phy_state],
5551                         adapter->phy.vendor_name,
5552                         adapter->phy.vendor_pn);
5553         }
5554         adapter->flags &= ~BE_FLAGS_PHY_MISCONFIGURED;
5555 }
5556
5557 static void be_worker(struct work_struct *work)
5558 {
5559         struct be_adapter *adapter =
5560                 container_of(work, struct be_adapter, work.work);
5561         struct be_rx_obj *rxo;
5562         int i;
5563
5564         if (be_physfn(adapter) &&
5565             MODULO(adapter->work_counter, adapter->be_get_temp_freq) == 0)
5566                 be_cmd_get_die_temperature(adapter);
5567
5568         /* when interrupts are not yet enabled, just reap any pending
5569          * mcc completions
5570          */
5571         if (!netif_running(adapter->netdev)) {
5572                 local_bh_disable();
5573                 be_process_mcc(adapter);
5574                 local_bh_enable();
5575                 goto reschedule;
5576         }
5577
5578         if (!adapter->stats_cmd_sent) {
5579                 if (lancer_chip(adapter))
5580                         lancer_cmd_get_pport_stats(adapter,
5581                                                    &adapter->stats_cmd);
5582                 else
5583                         be_cmd_get_stats(adapter, &adapter->stats_cmd);
5584         }
5585
5586         for_all_rx_queues(adapter, rxo, i) {
5587                 /* Replenish RX-queues starved due to memory
5588                  * allocation failures.
5589                  */
5590                 if (rxo->rx_post_starved)
5591                         be_post_rx_frags(rxo, GFP_KERNEL, MAX_RX_POST);
5592         }
5593
5594         /* EQ-delay update for Skyhawk is done while notifying EQ */
5595         if (!skyhawk_chip(adapter))
5596                 be_eqd_update(adapter, false);
5597
5598         if (adapter->flags & BE_FLAGS_PHY_MISCONFIGURED)
5599                 be_log_sfp_info(adapter);
5600
5601 reschedule:
5602         adapter->work_counter++;
5603         queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
5604 }
5605
5606 static void be_unmap_pci_bars(struct be_adapter *adapter)
5607 {
5608         if (adapter->csr)
5609                 pci_iounmap(adapter->pdev, adapter->csr);
5610         if (adapter->db)
5611                 pci_iounmap(adapter->pdev, adapter->db);
5612         if (adapter->pcicfg && adapter->pcicfg_mapped)
5613                 pci_iounmap(adapter->pdev, adapter->pcicfg);
5614 }
5615
5616 static int db_bar(struct be_adapter *adapter)
5617 {
5618         if (lancer_chip(adapter) || be_virtfn(adapter))
5619                 return 0;
5620         else
5621                 return 4;
5622 }
5623
5624 static int be_roce_map_pci_bars(struct be_adapter *adapter)
5625 {
5626         if (skyhawk_chip(adapter)) {
5627                 adapter->roce_db.size = 4096;
5628                 adapter->roce_db.io_addr = pci_resource_start(adapter->pdev,
5629                                                               db_bar(adapter));
5630                 adapter->roce_db.total_size = pci_resource_len(adapter->pdev,
5631                                                                db_bar(adapter));
5632         }
5633         return 0;
5634 }
5635
5636 static int be_map_pci_bars(struct be_adapter *adapter)
5637 {
5638         struct pci_dev *pdev = adapter->pdev;
5639         u8 __iomem *addr;
5640         u32 sli_intf;
5641
5642         pci_read_config_dword(adapter->pdev, SLI_INTF_REG_OFFSET, &sli_intf);
5643         adapter->sli_family = (sli_intf & SLI_INTF_FAMILY_MASK) >>
5644                                 SLI_INTF_FAMILY_SHIFT;
5645         adapter->virtfn = (sli_intf & SLI_INTF_FT_MASK) ? 1 : 0;
5646
5647         if (BEx_chip(adapter) && be_physfn(adapter)) {
5648                 adapter->csr = pci_iomap(pdev, 2, 0);
5649                 if (!adapter->csr)
5650                         return -ENOMEM;
5651         }
5652
5653         addr = pci_iomap(pdev, db_bar(adapter), 0);
5654         if (!addr)
5655                 goto pci_map_err;
5656         adapter->db = addr;
5657
5658         if (skyhawk_chip(adapter) || BEx_chip(adapter)) {
5659                 if (be_physfn(adapter)) {
5660                         /* PCICFG is the 2nd BAR in BE2 */
5661                         addr = pci_iomap(pdev, BE2_chip(adapter) ? 1 : 0, 0);
5662                         if (!addr)
5663                                 goto pci_map_err;
5664                         adapter->pcicfg = addr;
5665                         adapter->pcicfg_mapped = true;
5666                 } else {
5667                         adapter->pcicfg = adapter->db + SRIOV_VF_PCICFG_OFFSET;
5668                         adapter->pcicfg_mapped = false;
5669                 }
5670         }
5671
5672         be_roce_map_pci_bars(adapter);
5673         return 0;
5674
5675 pci_map_err:
5676         dev_err(&pdev->dev, "Error in mapping PCI BARs\n");
5677         be_unmap_pci_bars(adapter);
5678         return -ENOMEM;
5679 }
5680
5681 static void be_drv_cleanup(struct be_adapter *adapter)
5682 {
5683         struct be_dma_mem *mem = &adapter->mbox_mem_alloced;
5684         struct device *dev = &adapter->pdev->dev;
5685
5686         if (mem->va)
5687                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5688
5689         mem = &adapter->rx_filter;
5690         if (mem->va)
5691                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5692
5693         mem = &adapter->stats_cmd;
5694         if (mem->va)
5695                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5696 }
5697
5698 /* Allocate and initialize various fields in be_adapter struct */
5699 static int be_drv_init(struct be_adapter *adapter)
5700 {
5701         struct be_dma_mem *mbox_mem_alloc = &adapter->mbox_mem_alloced;
5702         struct be_dma_mem *mbox_mem_align = &adapter->mbox_mem;
5703         struct be_dma_mem *rx_filter = &adapter->rx_filter;
5704         struct be_dma_mem *stats_cmd = &adapter->stats_cmd;
5705         struct device *dev = &adapter->pdev->dev;
5706         int status = 0;
5707
5708         mbox_mem_alloc->size = sizeof(struct be_mcc_mailbox) + 16;
5709         mbox_mem_alloc->va = dma_zalloc_coherent(dev, mbox_mem_alloc->size,
5710                                                  &mbox_mem_alloc->dma,
5711                                                  GFP_KERNEL);
5712         if (!mbox_mem_alloc->va)
5713                 return -ENOMEM;
5714
5715         mbox_mem_align->size = sizeof(struct be_mcc_mailbox);
5716         mbox_mem_align->va = PTR_ALIGN(mbox_mem_alloc->va, 16);
5717         mbox_mem_align->dma = PTR_ALIGN(mbox_mem_alloc->dma, 16);
5718
5719         rx_filter->size = sizeof(struct be_cmd_req_rx_filter);
5720         rx_filter->va = dma_zalloc_coherent(dev, rx_filter->size,
5721                                             &rx_filter->dma, GFP_KERNEL);
5722         if (!rx_filter->va) {
5723                 status = -ENOMEM;
5724                 goto free_mbox;
5725         }
5726
5727         if (lancer_chip(adapter))
5728                 stats_cmd->size = sizeof(struct lancer_cmd_req_pport_stats);
5729         else if (BE2_chip(adapter))
5730                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v0);
5731         else if (BE3_chip(adapter))
5732                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v1);
5733         else
5734                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v2);
5735         stats_cmd->va = dma_zalloc_coherent(dev, stats_cmd->size,
5736                                             &stats_cmd->dma, GFP_KERNEL);
5737         if (!stats_cmd->va) {
5738                 status = -ENOMEM;
5739                 goto free_rx_filter;
5740         }
5741
5742         mutex_init(&adapter->mbox_lock);
5743         mutex_init(&adapter->mcc_lock);
5744         mutex_init(&adapter->rx_filter_lock);
5745         spin_lock_init(&adapter->mcc_cq_lock);
5746         init_completion(&adapter->et_cmd_compl);
5747
5748         pci_save_state(adapter->pdev);
5749
5750         INIT_DELAYED_WORK(&adapter->work, be_worker);
5751
5752         adapter->error_recovery.recovery_state = ERR_RECOVERY_ST_NONE;
5753         adapter->error_recovery.resched_delay = 0;
5754         INIT_DELAYED_WORK(&adapter->error_recovery.err_detection_work,
5755                           be_err_detection_task);
5756
5757         adapter->rx_fc = true;
5758         adapter->tx_fc = true;
5759
5760         /* Must be a power of 2 or else MODULO will BUG_ON */
5761         adapter->be_get_temp_freq = 64;
5762
5763         return 0;
5764
5765 free_rx_filter:
5766         dma_free_coherent(dev, rx_filter->size, rx_filter->va, rx_filter->dma);
5767 free_mbox:
5768         dma_free_coherent(dev, mbox_mem_alloc->size, mbox_mem_alloc->va,
5769                           mbox_mem_alloc->dma);
5770         return status;
5771 }
5772
5773 static void be_remove(struct pci_dev *pdev)
5774 {
5775         struct be_adapter *adapter = pci_get_drvdata(pdev);
5776
5777         if (!adapter)
5778                 return;
5779
5780         be_roce_dev_remove(adapter);
5781         be_intr_set(adapter, false);
5782
5783         be_cancel_err_detection(adapter);
5784
5785         unregister_netdev(adapter->netdev);
5786
5787         be_clear(adapter);
5788
5789         if (!pci_vfs_assigned(adapter->pdev))
5790                 be_cmd_reset_function(adapter);
5791
5792         /* tell fw we're done with firing cmds */
5793         be_cmd_fw_clean(adapter);
5794
5795         be_unmap_pci_bars(adapter);
5796         be_drv_cleanup(adapter);
5797
5798         pci_disable_pcie_error_reporting(pdev);
5799
5800         pci_release_regions(pdev);
5801         pci_disable_device(pdev);
5802
5803         free_netdev(adapter->netdev);
5804 }
5805
5806 static ssize_t be_hwmon_show_temp(struct device *dev,
5807                                   struct device_attribute *dev_attr,
5808                                   char *buf)
5809 {
5810         struct be_adapter *adapter = dev_get_drvdata(dev);
5811
5812         /* Unit: millidegree Celsius */
5813         if (adapter->hwmon_info.be_on_die_temp == BE_INVALID_DIE_TEMP)
5814                 return -EIO;
5815         else
5816                 return sprintf(buf, "%u\n",
5817                                adapter->hwmon_info.be_on_die_temp * 1000);
5818 }
5819
5820 static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO,
5821                           be_hwmon_show_temp, NULL, 1);
5822
5823 static struct attribute *be_hwmon_attrs[] = {
5824         &sensor_dev_attr_temp1_input.dev_attr.attr,
5825         NULL
5826 };
5827
5828 ATTRIBUTE_GROUPS(be_hwmon);
5829
5830 static char *mc_name(struct be_adapter *adapter)
5831 {
5832         char *str = ""; /* default */
5833
5834         switch (adapter->mc_type) {
5835         case UMC:
5836                 str = "UMC";
5837                 break;
5838         case FLEX10:
5839                 str = "FLEX10";
5840                 break;
5841         case vNIC1:
5842                 str = "vNIC-1";
5843                 break;
5844         case nPAR:
5845                 str = "nPAR";
5846                 break;
5847         case UFP:
5848                 str = "UFP";
5849                 break;
5850         case vNIC2:
5851                 str = "vNIC-2";
5852                 break;
5853         default:
5854                 str = "";
5855         }
5856
5857         return str;
5858 }
5859
5860 static inline char *func_name(struct be_adapter *adapter)
5861 {
5862         return be_physfn(adapter) ? "PF" : "VF";
5863 }
5864
5865 static inline char *nic_name(struct pci_dev *pdev)
5866 {
5867         switch (pdev->device) {
5868         case OC_DEVICE_ID1:
5869                 return OC_NAME;
5870         case OC_DEVICE_ID2:
5871                 return OC_NAME_BE;
5872         case OC_DEVICE_ID3:
5873         case OC_DEVICE_ID4:
5874                 return OC_NAME_LANCER;
5875         case BE_DEVICE_ID2:
5876                 return BE3_NAME;
5877         case OC_DEVICE_ID5:
5878         case OC_DEVICE_ID6:
5879                 return OC_NAME_SH;
5880         default:
5881                 return BE_NAME;
5882         }
5883 }
5884
5885 static int be_probe(struct pci_dev *pdev, const struct pci_device_id *pdev_id)
5886 {
5887         struct be_adapter *adapter;
5888         struct net_device *netdev;
5889         int status = 0;
5890
5891         dev_info(&pdev->dev, "%s version is %s\n", DRV_NAME, DRV_VER);
5892
5893         status = pci_enable_device(pdev);
5894         if (status)
5895                 goto do_none;
5896
5897         status = pci_request_regions(pdev, DRV_NAME);
5898         if (status)
5899                 goto disable_dev;
5900         pci_set_master(pdev);
5901
5902         netdev = alloc_etherdev_mqs(sizeof(*adapter), MAX_TX_QS, MAX_RX_QS);
5903         if (!netdev) {
5904                 status = -ENOMEM;
5905                 goto rel_reg;
5906         }
5907         adapter = netdev_priv(netdev);
5908         adapter->pdev = pdev;
5909         pci_set_drvdata(pdev, adapter);
5910         adapter->netdev = netdev;
5911         SET_NETDEV_DEV(netdev, &pdev->dev);
5912
5913         status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
5914         if (!status) {
5915                 netdev->features |= NETIF_F_HIGHDMA;
5916         } else {
5917                 status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
5918                 if (status) {
5919                         dev_err(&pdev->dev, "Could not set PCI DMA Mask\n");
5920                         goto free_netdev;
5921                 }
5922         }
5923
5924         status = pci_enable_pcie_error_reporting(pdev);
5925         if (!status)
5926                 dev_info(&pdev->dev, "PCIe error reporting enabled\n");
5927
5928         status = be_map_pci_bars(adapter);
5929         if (status)
5930                 goto free_netdev;
5931
5932         status = be_drv_init(adapter);
5933         if (status)
5934                 goto unmap_bars;
5935
5936         status = be_setup(adapter);
5937         if (status)
5938                 goto drv_cleanup;
5939
5940         be_netdev_init(netdev);
5941         status = register_netdev(netdev);
5942         if (status != 0)
5943                 goto unsetup;
5944
5945         be_roce_dev_add(adapter);
5946
5947         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5948         adapter->error_recovery.probe_time = jiffies;
5949
5950         /* On Die temperature not supported for VF. */
5951         if (be_physfn(adapter) && IS_ENABLED(CONFIG_BE2NET_HWMON)) {
5952                 adapter->hwmon_info.hwmon_dev =
5953                         devm_hwmon_device_register_with_groups(&pdev->dev,
5954                                                                DRV_NAME,
5955                                                                adapter,
5956                                                                be_hwmon_groups);
5957                 adapter->hwmon_info.be_on_die_temp = BE_INVALID_DIE_TEMP;
5958         }
5959
5960         dev_info(&pdev->dev, "%s: %s %s port %c\n", nic_name(pdev),
5961                  func_name(adapter), mc_name(adapter), adapter->port_name);
5962
5963         return 0;
5964
5965 unsetup:
5966         be_clear(adapter);
5967 drv_cleanup:
5968         be_drv_cleanup(adapter);
5969 unmap_bars:
5970         be_unmap_pci_bars(adapter);
5971 free_netdev:
5972         free_netdev(netdev);
5973 rel_reg:
5974         pci_release_regions(pdev);
5975 disable_dev:
5976         pci_disable_device(pdev);
5977 do_none:
5978         dev_err(&pdev->dev, "%s initialization failed\n", nic_name(pdev));
5979         return status;
5980 }
5981
5982 static int be_suspend(struct pci_dev *pdev, pm_message_t state)
5983 {
5984         struct be_adapter *adapter = pci_get_drvdata(pdev);
5985
5986         be_intr_set(adapter, false);
5987         be_cancel_err_detection(adapter);
5988
5989         be_cleanup(adapter);
5990
5991         pci_save_state(pdev);
5992         pci_disable_device(pdev);
5993         pci_set_power_state(pdev, pci_choose_state(pdev, state));
5994         return 0;
5995 }
5996
5997 static int be_pci_resume(struct pci_dev *pdev)
5998 {
5999         struct be_adapter *adapter = pci_get_drvdata(pdev);
6000         int status = 0;
6001
6002         status = pci_enable_device(pdev);
6003         if (status)
6004                 return status;
6005
6006         pci_restore_state(pdev);
6007
6008         status = be_resume(adapter);
6009         if (status)
6010                 return status;
6011
6012         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
6013
6014         return 0;
6015 }
6016
6017 /*
6018  * An FLR will stop BE from DMAing any data.
6019  */
6020 static void be_shutdown(struct pci_dev *pdev)
6021 {
6022         struct be_adapter *adapter = pci_get_drvdata(pdev);
6023
6024         if (!adapter)
6025                 return;
6026
6027         be_roce_dev_shutdown(adapter);
6028         cancel_delayed_work_sync(&adapter->work);
6029         be_cancel_err_detection(adapter);
6030
6031         netif_device_detach(adapter->netdev);
6032
6033         be_cmd_reset_function(adapter);
6034
6035         pci_disable_device(pdev);
6036 }
6037
6038 static pci_ers_result_t be_eeh_err_detected(struct pci_dev *pdev,
6039                                             pci_channel_state_t state)
6040 {
6041         struct be_adapter *adapter = pci_get_drvdata(pdev);
6042
6043         dev_err(&adapter->pdev->dev, "EEH error detected\n");
6044
6045         be_roce_dev_remove(adapter);
6046
6047         if (!be_check_error(adapter, BE_ERROR_EEH)) {
6048                 be_set_error(adapter, BE_ERROR_EEH);
6049
6050                 be_cancel_err_detection(adapter);
6051
6052                 be_cleanup(adapter);
6053         }
6054
6055         if (state == pci_channel_io_perm_failure)
6056                 return PCI_ERS_RESULT_DISCONNECT;
6057
6058         pci_disable_device(pdev);
6059
6060         /* The error could cause the FW to trigger a flash debug dump.
6061          * Resetting the card while flash dump is in progress
6062          * can cause it not to recover; wait for it to finish.
6063          * Wait only for first function as it is needed only once per
6064          * adapter.
6065          */
6066         if (pdev->devfn == 0)
6067                 ssleep(30);
6068
6069         return PCI_ERS_RESULT_NEED_RESET;
6070 }
6071
6072 static pci_ers_result_t be_eeh_reset(struct pci_dev *pdev)
6073 {
6074         struct be_adapter *adapter = pci_get_drvdata(pdev);
6075         int status;
6076
6077         dev_info(&adapter->pdev->dev, "EEH reset\n");
6078
6079         status = pci_enable_device(pdev);
6080         if (status)
6081                 return PCI_ERS_RESULT_DISCONNECT;
6082
6083         pci_set_master(pdev);
6084         pci_restore_state(pdev);
6085
6086         /* Check if card is ok and fw is ready */
6087         dev_info(&adapter->pdev->dev,
6088                  "Waiting for FW to be ready after EEH reset\n");
6089         status = be_fw_wait_ready(adapter);
6090         if (status)
6091                 return PCI_ERS_RESULT_DISCONNECT;
6092
6093         pci_cleanup_aer_uncorrect_error_status(pdev);
6094         be_clear_error(adapter, BE_CLEAR_ALL);
6095         return PCI_ERS_RESULT_RECOVERED;
6096 }
6097
6098 static void be_eeh_resume(struct pci_dev *pdev)
6099 {
6100         int status = 0;
6101         struct be_adapter *adapter = pci_get_drvdata(pdev);
6102
6103         dev_info(&adapter->pdev->dev, "EEH resume\n");
6104
6105         pci_save_state(pdev);
6106
6107         status = be_resume(adapter);
6108         if (status)
6109                 goto err;
6110
6111         be_roce_dev_add(adapter);
6112
6113         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
6114         return;
6115 err:
6116         dev_err(&adapter->pdev->dev, "EEH resume failed\n");
6117 }
6118
6119 static int be_pci_sriov_configure(struct pci_dev *pdev, int num_vfs)
6120 {
6121         struct be_adapter *adapter = pci_get_drvdata(pdev);
6122         struct be_resources vft_res = {0};
6123         int status;
6124
6125         if (!num_vfs)
6126                 be_vf_clear(adapter);
6127
6128         adapter->num_vfs = num_vfs;
6129
6130         if (adapter->num_vfs == 0 && pci_vfs_assigned(pdev)) {
6131                 dev_warn(&pdev->dev,
6132                          "Cannot disable VFs while they are assigned\n");
6133                 return -EBUSY;
6134         }
6135
6136         /* When the HW is in SRIOV capable configuration, the PF-pool resources
6137          * are equally distributed across the max-number of VFs. The user may
6138          * request only a subset of the max-vfs to be enabled.
6139          * Based on num_vfs, redistribute the resources across num_vfs so that
6140          * each VF will have access to more number of resources.
6141          * This facility is not available in BE3 FW.
6142          * Also, this is done by FW in Lancer chip.
6143          */
6144         if (skyhawk_chip(adapter) && !pci_num_vf(pdev)) {
6145                 be_calculate_vf_res(adapter, adapter->num_vfs,
6146                                     &vft_res);
6147                 status = be_cmd_set_sriov_config(adapter, adapter->pool_res,
6148                                                  adapter->num_vfs, &vft_res);
6149                 if (status)
6150                         dev_err(&pdev->dev,
6151                                 "Failed to optimize SR-IOV resources\n");
6152         }
6153
6154         status = be_get_resources(adapter);
6155         if (status)
6156                 return be_cmd_status(status);
6157
6158         /* Updating real_num_tx/rx_queues() requires rtnl_lock() */
6159         rtnl_lock();
6160         status = be_update_queues(adapter);
6161         rtnl_unlock();
6162         if (status)
6163                 return be_cmd_status(status);
6164
6165         if (adapter->num_vfs)
6166                 status = be_vf_setup(adapter);
6167
6168         if (!status)
6169                 return adapter->num_vfs;
6170
6171         return 0;
6172 }
6173
6174 static const struct pci_error_handlers be_eeh_handlers = {
6175         .error_detected = be_eeh_err_detected,
6176         .slot_reset = be_eeh_reset,
6177         .resume = be_eeh_resume,
6178 };
6179
6180 static struct pci_driver be_driver = {
6181         .name = DRV_NAME,
6182         .id_table = be_dev_ids,
6183         .probe = be_probe,
6184         .remove = be_remove,
6185         .suspend = be_suspend,
6186         .resume = be_pci_resume,
6187         .shutdown = be_shutdown,
6188         .sriov_configure = be_pci_sriov_configure,
6189         .err_handler = &be_eeh_handlers
6190 };
6191
6192 static int __init be_init_module(void)
6193 {
6194         int status;
6195
6196         if (rx_frag_size != 8192 && rx_frag_size != 4096 &&
6197             rx_frag_size != 2048) {
6198                 printk(KERN_WARNING DRV_NAME
6199                         " : Module param rx_frag_size must be 2048/4096/8192."
6200                         " Using 2048\n");
6201                 rx_frag_size = 2048;
6202         }
6203
6204         if (num_vfs > 0) {
6205                 pr_info(DRV_NAME " : Module param num_vfs is obsolete.");
6206                 pr_info(DRV_NAME " : Use sysfs method to enable VFs\n");
6207         }
6208
6209         be_wq = create_singlethread_workqueue("be_wq");
6210         if (!be_wq) {
6211                 pr_warn(DRV_NAME "workqueue creation failed\n");
6212                 return -1;
6213         }
6214
6215         be_err_recovery_workq =
6216                 create_singlethread_workqueue("be_err_recover");
6217         if (!be_err_recovery_workq)
6218                 pr_warn(DRV_NAME "Could not create error recovery workqueue\n");
6219
6220         status = pci_register_driver(&be_driver);
6221         if (status) {
6222                 destroy_workqueue(be_wq);
6223                 be_destroy_err_recovery_workq();
6224         }
6225         return status;
6226 }
6227 module_init(be_init_module);
6228
6229 static void __exit be_exit_module(void)
6230 {
6231         pci_unregister_driver(&be_driver);
6232
6233         be_destroy_err_recovery_workq();
6234
6235         if (be_wq)
6236                 destroy_workqueue(be_wq);
6237 }
6238 module_exit(be_exit_module);