]> asedeno.scripts.mit.edu Git - linux.git/blob - drivers/net/ethernet/emulex/benet/be_main.c
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
[linux.git] / drivers / net / ethernet / emulex / benet / be_main.c
1 /*
2  * Copyright (C) 2005 - 2016 Broadcom
3  * All rights reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License version 2
7  * as published by the Free Software Foundation.  The full GNU General
8  * Public License is included in this distribution in the file called COPYING.
9  *
10  * Contact Information:
11  * linux-drivers@emulex.com
12  *
13  * Emulex
14  * 3333 Susan Street
15  * Costa Mesa, CA 92626
16  */
17
18 #include <linux/prefetch.h>
19 #include <linux/module.h>
20 #include "be.h"
21 #include "be_cmds.h"
22 #include <asm/div64.h>
23 #include <linux/aer.h>
24 #include <linux/if_bridge.h>
25 #include <net/busy_poll.h>
26 #include <net/vxlan.h>
27
28 MODULE_VERSION(DRV_VER);
29 MODULE_DESCRIPTION(DRV_DESC " " DRV_VER);
30 MODULE_AUTHOR("Emulex Corporation");
31 MODULE_LICENSE("GPL");
32
33 /* num_vfs module param is obsolete.
34  * Use sysfs method to enable/disable VFs.
35  */
36 static unsigned int num_vfs;
37 module_param(num_vfs, uint, S_IRUGO);
38 MODULE_PARM_DESC(num_vfs, "Number of PCI VFs to initialize");
39
40 static ushort rx_frag_size = 2048;
41 module_param(rx_frag_size, ushort, S_IRUGO);
42 MODULE_PARM_DESC(rx_frag_size, "Size of a fragment that holds rcvd data.");
43
44 /* Per-module error detection/recovery workq shared across all functions.
45  * Each function schedules its own work request on this shared workq.
46  */
47 static struct workqueue_struct *be_err_recovery_workq;
48
49 static const struct pci_device_id be_dev_ids[] = {
50         { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID1) },
51         { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID2) },
52         { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID1) },
53         { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID2) },
54         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID3)},
55         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID4)},
56         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID5)},
57         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID6)},
58         { 0 }
59 };
60 MODULE_DEVICE_TABLE(pci, be_dev_ids);
61
62 /* Workqueue used by all functions for defering cmd calls to the adapter */
63 static struct workqueue_struct *be_wq;
64
65 /* UE Status Low CSR */
66 static const char * const ue_status_low_desc[] = {
67         "CEV",
68         "CTX",
69         "DBUF",
70         "ERX",
71         "Host",
72         "MPU",
73         "NDMA",
74         "PTC ",
75         "RDMA ",
76         "RXF ",
77         "RXIPS ",
78         "RXULP0 ",
79         "RXULP1 ",
80         "RXULP2 ",
81         "TIM ",
82         "TPOST ",
83         "TPRE ",
84         "TXIPS ",
85         "TXULP0 ",
86         "TXULP1 ",
87         "UC ",
88         "WDMA ",
89         "TXULP2 ",
90         "HOST1 ",
91         "P0_OB_LINK ",
92         "P1_OB_LINK ",
93         "HOST_GPIO ",
94         "MBOX ",
95         "ERX2 ",
96         "SPARE ",
97         "JTAG ",
98         "MPU_INTPEND "
99 };
100
101 /* UE Status High CSR */
102 static const char * const ue_status_hi_desc[] = {
103         "LPCMEMHOST",
104         "MGMT_MAC",
105         "PCS0ONLINE",
106         "MPU_IRAM",
107         "PCS1ONLINE",
108         "PCTL0",
109         "PCTL1",
110         "PMEM",
111         "RR",
112         "TXPB",
113         "RXPP",
114         "XAUI",
115         "TXP",
116         "ARM",
117         "IPC",
118         "HOST2",
119         "HOST3",
120         "HOST4",
121         "HOST5",
122         "HOST6",
123         "HOST7",
124         "ECRC",
125         "Poison TLP",
126         "NETC",
127         "PERIPH",
128         "LLTXULP",
129         "D2P",
130         "RCON",
131         "LDMA",
132         "LLTXP",
133         "LLTXPB",
134         "Unknown"
135 };
136
137 #define BE_VF_IF_EN_FLAGS       (BE_IF_FLAGS_UNTAGGED | \
138                                  BE_IF_FLAGS_BROADCAST | \
139                                  BE_IF_FLAGS_MULTICAST | \
140                                  BE_IF_FLAGS_PASS_L3L4_ERRORS)
141
142 static void be_queue_free(struct be_adapter *adapter, struct be_queue_info *q)
143 {
144         struct be_dma_mem *mem = &q->dma_mem;
145
146         if (mem->va) {
147                 dma_free_coherent(&adapter->pdev->dev, mem->size, mem->va,
148                                   mem->dma);
149                 mem->va = NULL;
150         }
151 }
152
153 static int be_queue_alloc(struct be_adapter *adapter, struct be_queue_info *q,
154                           u16 len, u16 entry_size)
155 {
156         struct be_dma_mem *mem = &q->dma_mem;
157
158         memset(q, 0, sizeof(*q));
159         q->len = len;
160         q->entry_size = entry_size;
161         mem->size = len * entry_size;
162         mem->va = dma_zalloc_coherent(&adapter->pdev->dev, mem->size, &mem->dma,
163                                       GFP_KERNEL);
164         if (!mem->va)
165                 return -ENOMEM;
166         return 0;
167 }
168
169 static void be_reg_intr_set(struct be_adapter *adapter, bool enable)
170 {
171         u32 reg, enabled;
172
173         pci_read_config_dword(adapter->pdev, PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET,
174                               &reg);
175         enabled = reg & MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
176
177         if (!enabled && enable)
178                 reg |= MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
179         else if (enabled && !enable)
180                 reg &= ~MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
181         else
182                 return;
183
184         pci_write_config_dword(adapter->pdev,
185                                PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET, reg);
186 }
187
188 static void be_intr_set(struct be_adapter *adapter, bool enable)
189 {
190         int status = 0;
191
192         /* On lancer interrupts can't be controlled via this register */
193         if (lancer_chip(adapter))
194                 return;
195
196         if (be_check_error(adapter, BE_ERROR_EEH))
197                 return;
198
199         status = be_cmd_intr_set(adapter, enable);
200         if (status)
201                 be_reg_intr_set(adapter, enable);
202 }
203
204 static void be_rxq_notify(struct be_adapter *adapter, u16 qid, u16 posted)
205 {
206         u32 val = 0;
207
208         if (be_check_error(adapter, BE_ERROR_HW))
209                 return;
210
211         val |= qid & DB_RQ_RING_ID_MASK;
212         val |= posted << DB_RQ_NUM_POSTED_SHIFT;
213
214         wmb();
215         iowrite32(val, adapter->db + DB_RQ_OFFSET);
216 }
217
218 static void be_txq_notify(struct be_adapter *adapter, struct be_tx_obj *txo,
219                           u16 posted)
220 {
221         u32 val = 0;
222
223         if (be_check_error(adapter, BE_ERROR_HW))
224                 return;
225
226         val |= txo->q.id & DB_TXULP_RING_ID_MASK;
227         val |= (posted & DB_TXULP_NUM_POSTED_MASK) << DB_TXULP_NUM_POSTED_SHIFT;
228
229         wmb();
230         iowrite32(val, adapter->db + txo->db_offset);
231 }
232
233 static void be_eq_notify(struct be_adapter *adapter, u16 qid,
234                          bool arm, bool clear_int, u16 num_popped,
235                          u32 eq_delay_mult_enc)
236 {
237         u32 val = 0;
238
239         val |= qid & DB_EQ_RING_ID_MASK;
240         val |= ((qid & DB_EQ_RING_ID_EXT_MASK) << DB_EQ_RING_ID_EXT_MASK_SHIFT);
241
242         if (be_check_error(adapter, BE_ERROR_HW))
243                 return;
244
245         if (arm)
246                 val |= 1 << DB_EQ_REARM_SHIFT;
247         if (clear_int)
248                 val |= 1 << DB_EQ_CLR_SHIFT;
249         val |= 1 << DB_EQ_EVNT_SHIFT;
250         val |= num_popped << DB_EQ_NUM_POPPED_SHIFT;
251         val |= eq_delay_mult_enc << DB_EQ_R2I_DLY_SHIFT;
252         iowrite32(val, adapter->db + DB_EQ_OFFSET);
253 }
254
255 void be_cq_notify(struct be_adapter *adapter, u16 qid, bool arm, u16 num_popped)
256 {
257         u32 val = 0;
258
259         val |= qid & DB_CQ_RING_ID_MASK;
260         val |= ((qid & DB_CQ_RING_ID_EXT_MASK) <<
261                         DB_CQ_RING_ID_EXT_MASK_SHIFT);
262
263         if (be_check_error(adapter, BE_ERROR_HW))
264                 return;
265
266         if (arm)
267                 val |= 1 << DB_CQ_REARM_SHIFT;
268         val |= num_popped << DB_CQ_NUM_POPPED_SHIFT;
269         iowrite32(val, adapter->db + DB_CQ_OFFSET);
270 }
271
272 static int be_dev_mac_add(struct be_adapter *adapter, u8 *mac)
273 {
274         int i;
275
276         /* Check if mac has already been added as part of uc-list */
277         for (i = 0; i < adapter->uc_macs; i++) {
278                 if (ether_addr_equal(adapter->uc_list[i].mac, mac)) {
279                         /* mac already added, skip addition */
280                         adapter->pmac_id[0] = adapter->pmac_id[i + 1];
281                         return 0;
282                 }
283         }
284
285         return be_cmd_pmac_add(adapter, mac, adapter->if_handle,
286                                &adapter->pmac_id[0], 0);
287 }
288
289 static void be_dev_mac_del(struct be_adapter *adapter, int pmac_id)
290 {
291         int i;
292
293         /* Skip deletion if the programmed mac is
294          * being used in uc-list
295          */
296         for (i = 0; i < adapter->uc_macs; i++) {
297                 if (adapter->pmac_id[i + 1] == pmac_id)
298                         return;
299         }
300         be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
301 }
302
303 static int be_mac_addr_set(struct net_device *netdev, void *p)
304 {
305         struct be_adapter *adapter = netdev_priv(netdev);
306         struct device *dev = &adapter->pdev->dev;
307         struct sockaddr *addr = p;
308         int status;
309         u8 mac[ETH_ALEN];
310         u32 old_pmac_id = adapter->pmac_id[0];
311
312         if (!is_valid_ether_addr(addr->sa_data))
313                 return -EADDRNOTAVAIL;
314
315         /* Proceed further only if, User provided MAC is different
316          * from active MAC
317          */
318         if (ether_addr_equal(addr->sa_data, adapter->dev_mac))
319                 return 0;
320
321         /* BE3 VFs without FILTMGMT privilege are not allowed to set its MAC
322          * address
323          */
324         if (BEx_chip(adapter) && be_virtfn(adapter) &&
325             !check_privilege(adapter, BE_PRIV_FILTMGMT))
326                 return -EPERM;
327
328         /* if device is not running, copy MAC to netdev->dev_addr */
329         if (!netif_running(netdev))
330                 goto done;
331
332         /* The PMAC_ADD cmd may fail if the VF doesn't have FILTMGMT
333          * privilege or if PF did not provision the new MAC address.
334          * On BE3, this cmd will always fail if the VF doesn't have the
335          * FILTMGMT privilege. This failure is OK, only if the PF programmed
336          * the MAC for the VF.
337          */
338         mutex_lock(&adapter->rx_filter_lock);
339         status = be_dev_mac_add(adapter, (u8 *)addr->sa_data);
340         if (!status) {
341
342                 /* Delete the old programmed MAC. This call may fail if the
343                  * old MAC was already deleted by the PF driver.
344                  */
345                 if (adapter->pmac_id[0] != old_pmac_id)
346                         be_dev_mac_del(adapter, old_pmac_id);
347         }
348
349         mutex_unlock(&adapter->rx_filter_lock);
350         /* Decide if the new MAC is successfully activated only after
351          * querying the FW
352          */
353         status = be_cmd_get_active_mac(adapter, adapter->pmac_id[0], mac,
354                                        adapter->if_handle, true, 0);
355         if (status)
356                 goto err;
357
358         /* The MAC change did not happen, either due to lack of privilege
359          * or PF didn't pre-provision.
360          */
361         if (!ether_addr_equal(addr->sa_data, mac)) {
362                 status = -EPERM;
363                 goto err;
364         }
365 done:
366         ether_addr_copy(adapter->dev_mac, addr->sa_data);
367         ether_addr_copy(netdev->dev_addr, addr->sa_data);
368         dev_info(dev, "MAC address changed to %pM\n", addr->sa_data);
369         return 0;
370 err:
371         dev_warn(dev, "MAC address change to %pM failed\n", addr->sa_data);
372         return status;
373 }
374
375 /* BE2 supports only v0 cmd */
376 static void *hw_stats_from_cmd(struct be_adapter *adapter)
377 {
378         if (BE2_chip(adapter)) {
379                 struct be_cmd_resp_get_stats_v0 *cmd = adapter->stats_cmd.va;
380
381                 return &cmd->hw_stats;
382         } else if (BE3_chip(adapter)) {
383                 struct be_cmd_resp_get_stats_v1 *cmd = adapter->stats_cmd.va;
384
385                 return &cmd->hw_stats;
386         } else {
387                 struct be_cmd_resp_get_stats_v2 *cmd = adapter->stats_cmd.va;
388
389                 return &cmd->hw_stats;
390         }
391 }
392
393 /* BE2 supports only v0 cmd */
394 static void *be_erx_stats_from_cmd(struct be_adapter *adapter)
395 {
396         if (BE2_chip(adapter)) {
397                 struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
398
399                 return &hw_stats->erx;
400         } else if (BE3_chip(adapter)) {
401                 struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
402
403                 return &hw_stats->erx;
404         } else {
405                 struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
406
407                 return &hw_stats->erx;
408         }
409 }
410
411 static void populate_be_v0_stats(struct be_adapter *adapter)
412 {
413         struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
414         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
415         struct be_rxf_stats_v0 *rxf_stats = &hw_stats->rxf;
416         struct be_port_rxf_stats_v0 *port_stats =
417                                         &rxf_stats->port[adapter->port_num];
418         struct be_drv_stats *drvs = &adapter->drv_stats;
419
420         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
421         drvs->rx_pause_frames = port_stats->rx_pause_frames;
422         drvs->rx_crc_errors = port_stats->rx_crc_errors;
423         drvs->rx_control_frames = port_stats->rx_control_frames;
424         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
425         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
426         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
427         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
428         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
429         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
430         drvs->rxpp_fifo_overflow_drop = port_stats->rx_fifo_overflow;
431         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
432         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
433         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
434         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
435         drvs->rx_input_fifo_overflow_drop = port_stats->rx_input_fifo_overflow;
436         drvs->rx_dropped_header_too_small =
437                 port_stats->rx_dropped_header_too_small;
438         drvs->rx_address_filtered =
439                                         port_stats->rx_address_filtered +
440                                         port_stats->rx_vlan_filtered;
441         drvs->rx_alignment_symbol_errors =
442                 port_stats->rx_alignment_symbol_errors;
443
444         drvs->tx_pauseframes = port_stats->tx_pauseframes;
445         drvs->tx_controlframes = port_stats->tx_controlframes;
446
447         if (adapter->port_num)
448                 drvs->jabber_events = rxf_stats->port1_jabber_events;
449         else
450                 drvs->jabber_events = rxf_stats->port0_jabber_events;
451         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
452         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
453         drvs->forwarded_packets = rxf_stats->forwarded_packets;
454         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
455         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
456         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
457         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
458 }
459
460 static void populate_be_v1_stats(struct be_adapter *adapter)
461 {
462         struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
463         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
464         struct be_rxf_stats_v1 *rxf_stats = &hw_stats->rxf;
465         struct be_port_rxf_stats_v1 *port_stats =
466                                         &rxf_stats->port[adapter->port_num];
467         struct be_drv_stats *drvs = &adapter->drv_stats;
468
469         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
470         drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
471         drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
472         drvs->rx_pause_frames = port_stats->rx_pause_frames;
473         drvs->rx_crc_errors = port_stats->rx_crc_errors;
474         drvs->rx_control_frames = port_stats->rx_control_frames;
475         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
476         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
477         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
478         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
479         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
480         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
481         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
482         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
483         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
484         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
485         drvs->rx_dropped_header_too_small =
486                 port_stats->rx_dropped_header_too_small;
487         drvs->rx_input_fifo_overflow_drop =
488                 port_stats->rx_input_fifo_overflow_drop;
489         drvs->rx_address_filtered = port_stats->rx_address_filtered;
490         drvs->rx_alignment_symbol_errors =
491                 port_stats->rx_alignment_symbol_errors;
492         drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
493         drvs->tx_pauseframes = port_stats->tx_pauseframes;
494         drvs->tx_controlframes = port_stats->tx_controlframes;
495         drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
496         drvs->jabber_events = port_stats->jabber_events;
497         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
498         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
499         drvs->forwarded_packets = rxf_stats->forwarded_packets;
500         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
501         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
502         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
503         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
504 }
505
506 static void populate_be_v2_stats(struct be_adapter *adapter)
507 {
508         struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
509         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
510         struct be_rxf_stats_v2 *rxf_stats = &hw_stats->rxf;
511         struct be_port_rxf_stats_v2 *port_stats =
512                                         &rxf_stats->port[adapter->port_num];
513         struct be_drv_stats *drvs = &adapter->drv_stats;
514
515         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
516         drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
517         drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
518         drvs->rx_pause_frames = port_stats->rx_pause_frames;
519         drvs->rx_crc_errors = port_stats->rx_crc_errors;
520         drvs->rx_control_frames = port_stats->rx_control_frames;
521         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
522         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
523         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
524         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
525         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
526         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
527         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
528         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
529         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
530         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
531         drvs->rx_dropped_header_too_small =
532                 port_stats->rx_dropped_header_too_small;
533         drvs->rx_input_fifo_overflow_drop =
534                 port_stats->rx_input_fifo_overflow_drop;
535         drvs->rx_address_filtered = port_stats->rx_address_filtered;
536         drvs->rx_alignment_symbol_errors =
537                 port_stats->rx_alignment_symbol_errors;
538         drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
539         drvs->tx_pauseframes = port_stats->tx_pauseframes;
540         drvs->tx_controlframes = port_stats->tx_controlframes;
541         drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
542         drvs->jabber_events = port_stats->jabber_events;
543         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
544         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
545         drvs->forwarded_packets = rxf_stats->forwarded_packets;
546         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
547         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
548         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
549         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
550         if (be_roce_supported(adapter)) {
551                 drvs->rx_roce_bytes_lsd = port_stats->roce_bytes_received_lsd;
552                 drvs->rx_roce_bytes_msd = port_stats->roce_bytes_received_msd;
553                 drvs->rx_roce_frames = port_stats->roce_frames_received;
554                 drvs->roce_drops_crc = port_stats->roce_drops_crc;
555                 drvs->roce_drops_payload_len =
556                         port_stats->roce_drops_payload_len;
557         }
558 }
559
560 static void populate_lancer_stats(struct be_adapter *adapter)
561 {
562         struct be_drv_stats *drvs = &adapter->drv_stats;
563         struct lancer_pport_stats *pport_stats = pport_stats_from_cmd(adapter);
564
565         be_dws_le_to_cpu(pport_stats, sizeof(*pport_stats));
566         drvs->rx_pause_frames = pport_stats->rx_pause_frames_lo;
567         drvs->rx_crc_errors = pport_stats->rx_crc_errors_lo;
568         drvs->rx_control_frames = pport_stats->rx_control_frames_lo;
569         drvs->rx_in_range_errors = pport_stats->rx_in_range_errors;
570         drvs->rx_frame_too_long = pport_stats->rx_frames_too_long_lo;
571         drvs->rx_dropped_runt = pport_stats->rx_dropped_runt;
572         drvs->rx_ip_checksum_errs = pport_stats->rx_ip_checksum_errors;
573         drvs->rx_tcp_checksum_errs = pport_stats->rx_tcp_checksum_errors;
574         drvs->rx_udp_checksum_errs = pport_stats->rx_udp_checksum_errors;
575         drvs->rx_dropped_tcp_length =
576                                 pport_stats->rx_dropped_invalid_tcp_length;
577         drvs->rx_dropped_too_small = pport_stats->rx_dropped_too_small;
578         drvs->rx_dropped_too_short = pport_stats->rx_dropped_too_short;
579         drvs->rx_out_range_errors = pport_stats->rx_out_of_range_errors;
580         drvs->rx_dropped_header_too_small =
581                                 pport_stats->rx_dropped_header_too_small;
582         drvs->rx_input_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
583         drvs->rx_address_filtered =
584                                         pport_stats->rx_address_filtered +
585                                         pport_stats->rx_vlan_filtered;
586         drvs->rx_alignment_symbol_errors = pport_stats->rx_symbol_errors_lo;
587         drvs->rxpp_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
588         drvs->tx_pauseframes = pport_stats->tx_pause_frames_lo;
589         drvs->tx_controlframes = pport_stats->tx_control_frames_lo;
590         drvs->jabber_events = pport_stats->rx_jabbers;
591         drvs->forwarded_packets = pport_stats->num_forwards_lo;
592         drvs->rx_drops_mtu = pport_stats->rx_drops_mtu_lo;
593         drvs->rx_drops_too_many_frags =
594                                 pport_stats->rx_drops_too_many_frags_lo;
595 }
596
597 static void accumulate_16bit_val(u32 *acc, u16 val)
598 {
599 #define lo(x)                   (x & 0xFFFF)
600 #define hi(x)                   (x & 0xFFFF0000)
601         bool wrapped = val < lo(*acc);
602         u32 newacc = hi(*acc) + val;
603
604         if (wrapped)
605                 newacc += 65536;
606         ACCESS_ONCE(*acc) = newacc;
607 }
608
609 static void populate_erx_stats(struct be_adapter *adapter,
610                                struct be_rx_obj *rxo, u32 erx_stat)
611 {
612         if (!BEx_chip(adapter))
613                 rx_stats(rxo)->rx_drops_no_frags = erx_stat;
614         else
615                 /* below erx HW counter can actually wrap around after
616                  * 65535. Driver accumulates a 32-bit value
617                  */
618                 accumulate_16bit_val(&rx_stats(rxo)->rx_drops_no_frags,
619                                      (u16)erx_stat);
620 }
621
622 void be_parse_stats(struct be_adapter *adapter)
623 {
624         struct be_erx_stats_v2 *erx = be_erx_stats_from_cmd(adapter);
625         struct be_rx_obj *rxo;
626         int i;
627         u32 erx_stat;
628
629         if (lancer_chip(adapter)) {
630                 populate_lancer_stats(adapter);
631         } else {
632                 if (BE2_chip(adapter))
633                         populate_be_v0_stats(adapter);
634                 else if (BE3_chip(adapter))
635                         /* for BE3 */
636                         populate_be_v1_stats(adapter);
637                 else
638                         populate_be_v2_stats(adapter);
639
640                 /* erx_v2 is longer than v0, v1. use v2 for v0, v1 access */
641                 for_all_rx_queues(adapter, rxo, i) {
642                         erx_stat = erx->rx_drops_no_fragments[rxo->q.id];
643                         populate_erx_stats(adapter, rxo, erx_stat);
644                 }
645         }
646 }
647
648 static void be_get_stats64(struct net_device *netdev,
649                            struct rtnl_link_stats64 *stats)
650 {
651         struct be_adapter *adapter = netdev_priv(netdev);
652         struct be_drv_stats *drvs = &adapter->drv_stats;
653         struct be_rx_obj *rxo;
654         struct be_tx_obj *txo;
655         u64 pkts, bytes;
656         unsigned int start;
657         int i;
658
659         for_all_rx_queues(adapter, rxo, i) {
660                 const struct be_rx_stats *rx_stats = rx_stats(rxo);
661
662                 do {
663                         start = u64_stats_fetch_begin_irq(&rx_stats->sync);
664                         pkts = rx_stats(rxo)->rx_pkts;
665                         bytes = rx_stats(rxo)->rx_bytes;
666                 } while (u64_stats_fetch_retry_irq(&rx_stats->sync, start));
667                 stats->rx_packets += pkts;
668                 stats->rx_bytes += bytes;
669                 stats->multicast += rx_stats(rxo)->rx_mcast_pkts;
670                 stats->rx_dropped += rx_stats(rxo)->rx_drops_no_skbs +
671                                         rx_stats(rxo)->rx_drops_no_frags;
672         }
673
674         for_all_tx_queues(adapter, txo, i) {
675                 const struct be_tx_stats *tx_stats = tx_stats(txo);
676
677                 do {
678                         start = u64_stats_fetch_begin_irq(&tx_stats->sync);
679                         pkts = tx_stats(txo)->tx_pkts;
680                         bytes = tx_stats(txo)->tx_bytes;
681                 } while (u64_stats_fetch_retry_irq(&tx_stats->sync, start));
682                 stats->tx_packets += pkts;
683                 stats->tx_bytes += bytes;
684         }
685
686         /* bad pkts received */
687         stats->rx_errors = drvs->rx_crc_errors +
688                 drvs->rx_alignment_symbol_errors +
689                 drvs->rx_in_range_errors +
690                 drvs->rx_out_range_errors +
691                 drvs->rx_frame_too_long +
692                 drvs->rx_dropped_too_small +
693                 drvs->rx_dropped_too_short +
694                 drvs->rx_dropped_header_too_small +
695                 drvs->rx_dropped_tcp_length +
696                 drvs->rx_dropped_runt;
697
698         /* detailed rx errors */
699         stats->rx_length_errors = drvs->rx_in_range_errors +
700                 drvs->rx_out_range_errors +
701                 drvs->rx_frame_too_long;
702
703         stats->rx_crc_errors = drvs->rx_crc_errors;
704
705         /* frame alignment errors */
706         stats->rx_frame_errors = drvs->rx_alignment_symbol_errors;
707
708         /* receiver fifo overrun */
709         /* drops_no_pbuf is no per i/f, it's per BE card */
710         stats->rx_fifo_errors = drvs->rxpp_fifo_overflow_drop +
711                                 drvs->rx_input_fifo_overflow_drop +
712                                 drvs->rx_drops_no_pbuf;
713 }
714
715 void be_link_status_update(struct be_adapter *adapter, u8 link_status)
716 {
717         struct net_device *netdev = adapter->netdev;
718
719         if (!(adapter->flags & BE_FLAGS_LINK_STATUS_INIT)) {
720                 netif_carrier_off(netdev);
721                 adapter->flags |= BE_FLAGS_LINK_STATUS_INIT;
722         }
723
724         if (link_status)
725                 netif_carrier_on(netdev);
726         else
727                 netif_carrier_off(netdev);
728
729         netdev_info(netdev, "Link is %s\n", link_status ? "Up" : "Down");
730 }
731
732 static int be_gso_hdr_len(struct sk_buff *skb)
733 {
734         if (skb->encapsulation)
735                 return skb_inner_transport_offset(skb) +
736                        inner_tcp_hdrlen(skb);
737         return skb_transport_offset(skb) + tcp_hdrlen(skb);
738 }
739
740 static void be_tx_stats_update(struct be_tx_obj *txo, struct sk_buff *skb)
741 {
742         struct be_tx_stats *stats = tx_stats(txo);
743         u32 tx_pkts = skb_shinfo(skb)->gso_segs ? : 1;
744         /* Account for headers which get duplicated in TSO pkt */
745         u32 dup_hdr_len = tx_pkts > 1 ? be_gso_hdr_len(skb) * (tx_pkts - 1) : 0;
746
747         u64_stats_update_begin(&stats->sync);
748         stats->tx_reqs++;
749         stats->tx_bytes += skb->len + dup_hdr_len;
750         stats->tx_pkts += tx_pkts;
751         if (skb->encapsulation && skb->ip_summed == CHECKSUM_PARTIAL)
752                 stats->tx_vxlan_offload_pkts += tx_pkts;
753         u64_stats_update_end(&stats->sync);
754 }
755
756 /* Returns number of WRBs needed for the skb */
757 static u32 skb_wrb_cnt(struct sk_buff *skb)
758 {
759         /* +1 for the header wrb */
760         return 1 + (skb_headlen(skb) ? 1 : 0) + skb_shinfo(skb)->nr_frags;
761 }
762
763 static inline void wrb_fill(struct be_eth_wrb *wrb, u64 addr, int len)
764 {
765         wrb->frag_pa_hi = cpu_to_le32(upper_32_bits(addr));
766         wrb->frag_pa_lo = cpu_to_le32(lower_32_bits(addr));
767         wrb->frag_len = cpu_to_le32(len & ETH_WRB_FRAG_LEN_MASK);
768         wrb->rsvd0 = 0;
769 }
770
771 /* A dummy wrb is just all zeros. Using a separate routine for dummy-wrb
772  * to avoid the swap and shift/mask operations in wrb_fill().
773  */
774 static inline void wrb_fill_dummy(struct be_eth_wrb *wrb)
775 {
776         wrb->frag_pa_hi = 0;
777         wrb->frag_pa_lo = 0;
778         wrb->frag_len = 0;
779         wrb->rsvd0 = 0;
780 }
781
782 static inline u16 be_get_tx_vlan_tag(struct be_adapter *adapter,
783                                      struct sk_buff *skb)
784 {
785         u8 vlan_prio;
786         u16 vlan_tag;
787
788         vlan_tag = skb_vlan_tag_get(skb);
789         vlan_prio = (vlan_tag & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
790         /* If vlan priority provided by OS is NOT in available bmap */
791         if (!(adapter->vlan_prio_bmap & (1 << vlan_prio)))
792                 vlan_tag = (vlan_tag & ~VLAN_PRIO_MASK) |
793                                 adapter->recommended_prio_bits;
794
795         return vlan_tag;
796 }
797
798 /* Used only for IP tunnel packets */
799 static u16 skb_inner_ip_proto(struct sk_buff *skb)
800 {
801         return (inner_ip_hdr(skb)->version == 4) ?
802                 inner_ip_hdr(skb)->protocol : inner_ipv6_hdr(skb)->nexthdr;
803 }
804
805 static u16 skb_ip_proto(struct sk_buff *skb)
806 {
807         return (ip_hdr(skb)->version == 4) ?
808                 ip_hdr(skb)->protocol : ipv6_hdr(skb)->nexthdr;
809 }
810
811 static inline bool be_is_txq_full(struct be_tx_obj *txo)
812 {
813         return atomic_read(&txo->q.used) + BE_MAX_TX_FRAG_COUNT >= txo->q.len;
814 }
815
816 static inline bool be_can_txq_wake(struct be_tx_obj *txo)
817 {
818         return atomic_read(&txo->q.used) < txo->q.len / 2;
819 }
820
821 static inline bool be_is_tx_compl_pending(struct be_tx_obj *txo)
822 {
823         return atomic_read(&txo->q.used) > txo->pend_wrb_cnt;
824 }
825
826 static void be_get_wrb_params_from_skb(struct be_adapter *adapter,
827                                        struct sk_buff *skb,
828                                        struct be_wrb_params *wrb_params)
829 {
830         u16 proto;
831
832         if (skb_is_gso(skb)) {
833                 BE_WRB_F_SET(wrb_params->features, LSO, 1);
834                 wrb_params->lso_mss = skb_shinfo(skb)->gso_size;
835                 if (skb_is_gso_v6(skb) && !lancer_chip(adapter))
836                         BE_WRB_F_SET(wrb_params->features, LSO6, 1);
837         } else if (skb->ip_summed == CHECKSUM_PARTIAL) {
838                 if (skb->encapsulation) {
839                         BE_WRB_F_SET(wrb_params->features, IPCS, 1);
840                         proto = skb_inner_ip_proto(skb);
841                 } else {
842                         proto = skb_ip_proto(skb);
843                 }
844                 if (proto == IPPROTO_TCP)
845                         BE_WRB_F_SET(wrb_params->features, TCPCS, 1);
846                 else if (proto == IPPROTO_UDP)
847                         BE_WRB_F_SET(wrb_params->features, UDPCS, 1);
848         }
849
850         if (skb_vlan_tag_present(skb)) {
851                 BE_WRB_F_SET(wrb_params->features, VLAN, 1);
852                 wrb_params->vlan_tag = be_get_tx_vlan_tag(adapter, skb);
853         }
854
855         BE_WRB_F_SET(wrb_params->features, CRC, 1);
856 }
857
858 static void wrb_fill_hdr(struct be_adapter *adapter,
859                          struct be_eth_hdr_wrb *hdr,
860                          struct be_wrb_params *wrb_params,
861                          struct sk_buff *skb)
862 {
863         memset(hdr, 0, sizeof(*hdr));
864
865         SET_TX_WRB_HDR_BITS(crc, hdr,
866                             BE_WRB_F_GET(wrb_params->features, CRC));
867         SET_TX_WRB_HDR_BITS(ipcs, hdr,
868                             BE_WRB_F_GET(wrb_params->features, IPCS));
869         SET_TX_WRB_HDR_BITS(tcpcs, hdr,
870                             BE_WRB_F_GET(wrb_params->features, TCPCS));
871         SET_TX_WRB_HDR_BITS(udpcs, hdr,
872                             BE_WRB_F_GET(wrb_params->features, UDPCS));
873
874         SET_TX_WRB_HDR_BITS(lso, hdr,
875                             BE_WRB_F_GET(wrb_params->features, LSO));
876         SET_TX_WRB_HDR_BITS(lso6, hdr,
877                             BE_WRB_F_GET(wrb_params->features, LSO6));
878         SET_TX_WRB_HDR_BITS(lso_mss, hdr, wrb_params->lso_mss);
879
880         /* Hack to skip HW VLAN tagging needs evt = 1, compl = 0. When this
881          * hack is not needed, the evt bit is set while ringing DB.
882          */
883         SET_TX_WRB_HDR_BITS(event, hdr,
884                             BE_WRB_F_GET(wrb_params->features, VLAN_SKIP_HW));
885         SET_TX_WRB_HDR_BITS(vlan, hdr,
886                             BE_WRB_F_GET(wrb_params->features, VLAN));
887         SET_TX_WRB_HDR_BITS(vlan_tag, hdr, wrb_params->vlan_tag);
888
889         SET_TX_WRB_HDR_BITS(num_wrb, hdr, skb_wrb_cnt(skb));
890         SET_TX_WRB_HDR_BITS(len, hdr, skb->len);
891         SET_TX_WRB_HDR_BITS(mgmt, hdr,
892                             BE_WRB_F_GET(wrb_params->features, OS2BMC));
893 }
894
895 static void unmap_tx_frag(struct device *dev, struct be_eth_wrb *wrb,
896                           bool unmap_single)
897 {
898         dma_addr_t dma;
899         u32 frag_len = le32_to_cpu(wrb->frag_len);
900
901
902         dma = (u64)le32_to_cpu(wrb->frag_pa_hi) << 32 |
903                 (u64)le32_to_cpu(wrb->frag_pa_lo);
904         if (frag_len) {
905                 if (unmap_single)
906                         dma_unmap_single(dev, dma, frag_len, DMA_TO_DEVICE);
907                 else
908                         dma_unmap_page(dev, dma, frag_len, DMA_TO_DEVICE);
909         }
910 }
911
912 /* Grab a WRB header for xmit */
913 static u32 be_tx_get_wrb_hdr(struct be_tx_obj *txo)
914 {
915         u32 head = txo->q.head;
916
917         queue_head_inc(&txo->q);
918         return head;
919 }
920
921 /* Set up the WRB header for xmit */
922 static void be_tx_setup_wrb_hdr(struct be_adapter *adapter,
923                                 struct be_tx_obj *txo,
924                                 struct be_wrb_params *wrb_params,
925                                 struct sk_buff *skb, u16 head)
926 {
927         u32 num_frags = skb_wrb_cnt(skb);
928         struct be_queue_info *txq = &txo->q;
929         struct be_eth_hdr_wrb *hdr = queue_index_node(txq, head);
930
931         wrb_fill_hdr(adapter, hdr, wrb_params, skb);
932         be_dws_cpu_to_le(hdr, sizeof(*hdr));
933
934         BUG_ON(txo->sent_skb_list[head]);
935         txo->sent_skb_list[head] = skb;
936         txo->last_req_hdr = head;
937         atomic_add(num_frags, &txq->used);
938         txo->last_req_wrb_cnt = num_frags;
939         txo->pend_wrb_cnt += num_frags;
940 }
941
942 /* Setup a WRB fragment (buffer descriptor) for xmit */
943 static void be_tx_setup_wrb_frag(struct be_tx_obj *txo, dma_addr_t busaddr,
944                                  int len)
945 {
946         struct be_eth_wrb *wrb;
947         struct be_queue_info *txq = &txo->q;
948
949         wrb = queue_head_node(txq);
950         wrb_fill(wrb, busaddr, len);
951         queue_head_inc(txq);
952 }
953
954 /* Bring the queue back to the state it was in before be_xmit_enqueue() routine
955  * was invoked. The producer index is restored to the previous packet and the
956  * WRBs of the current packet are unmapped. Invoked to handle tx setup errors.
957  */
958 static void be_xmit_restore(struct be_adapter *adapter,
959                             struct be_tx_obj *txo, u32 head, bool map_single,
960                             u32 copied)
961 {
962         struct device *dev;
963         struct be_eth_wrb *wrb;
964         struct be_queue_info *txq = &txo->q;
965
966         dev = &adapter->pdev->dev;
967         txq->head = head;
968
969         /* skip the first wrb (hdr); it's not mapped */
970         queue_head_inc(txq);
971         while (copied) {
972                 wrb = queue_head_node(txq);
973                 unmap_tx_frag(dev, wrb, map_single);
974                 map_single = false;
975                 copied -= le32_to_cpu(wrb->frag_len);
976                 queue_head_inc(txq);
977         }
978
979         txq->head = head;
980 }
981
982 /* Enqueue the given packet for transmit. This routine allocates WRBs for the
983  * packet, dma maps the packet buffers and sets up the WRBs. Returns the number
984  * of WRBs used up by the packet.
985  */
986 static u32 be_xmit_enqueue(struct be_adapter *adapter, struct be_tx_obj *txo,
987                            struct sk_buff *skb,
988                            struct be_wrb_params *wrb_params)
989 {
990         u32 i, copied = 0, wrb_cnt = skb_wrb_cnt(skb);
991         struct device *dev = &adapter->pdev->dev;
992         struct be_queue_info *txq = &txo->q;
993         bool map_single = false;
994         u32 head = txq->head;
995         dma_addr_t busaddr;
996         int len;
997
998         head = be_tx_get_wrb_hdr(txo);
999
1000         if (skb->len > skb->data_len) {
1001                 len = skb_headlen(skb);
1002
1003                 busaddr = dma_map_single(dev, skb->data, len, DMA_TO_DEVICE);
1004                 if (dma_mapping_error(dev, busaddr))
1005                         goto dma_err;
1006                 map_single = true;
1007                 be_tx_setup_wrb_frag(txo, busaddr, len);
1008                 copied += len;
1009         }
1010
1011         for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1012                 const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
1013                 len = skb_frag_size(frag);
1014
1015                 busaddr = skb_frag_dma_map(dev, frag, 0, len, DMA_TO_DEVICE);
1016                 if (dma_mapping_error(dev, busaddr))
1017                         goto dma_err;
1018                 be_tx_setup_wrb_frag(txo, busaddr, len);
1019                 copied += len;
1020         }
1021
1022         be_tx_setup_wrb_hdr(adapter, txo, wrb_params, skb, head);
1023
1024         be_tx_stats_update(txo, skb);
1025         return wrb_cnt;
1026
1027 dma_err:
1028         adapter->drv_stats.dma_map_errors++;
1029         be_xmit_restore(adapter, txo, head, map_single, copied);
1030         return 0;
1031 }
1032
1033 static inline int qnq_async_evt_rcvd(struct be_adapter *adapter)
1034 {
1035         return adapter->flags & BE_FLAGS_QNQ_ASYNC_EVT_RCVD;
1036 }
1037
1038 static struct sk_buff *be_insert_vlan_in_pkt(struct be_adapter *adapter,
1039                                              struct sk_buff *skb,
1040                                              struct be_wrb_params
1041                                              *wrb_params)
1042 {
1043         u16 vlan_tag = 0;
1044
1045         skb = skb_share_check(skb, GFP_ATOMIC);
1046         if (unlikely(!skb))
1047                 return skb;
1048
1049         if (skb_vlan_tag_present(skb))
1050                 vlan_tag = be_get_tx_vlan_tag(adapter, skb);
1051
1052         if (qnq_async_evt_rcvd(adapter) && adapter->pvid) {
1053                 if (!vlan_tag)
1054                         vlan_tag = adapter->pvid;
1055                 /* f/w workaround to set skip_hw_vlan = 1, informs the F/W to
1056                  * skip VLAN insertion
1057                  */
1058                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1059         }
1060
1061         if (vlan_tag) {
1062                 skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1063                                                 vlan_tag);
1064                 if (unlikely(!skb))
1065                         return skb;
1066                 skb->vlan_tci = 0;
1067         }
1068
1069         /* Insert the outer VLAN, if any */
1070         if (adapter->qnq_vid) {
1071                 vlan_tag = adapter->qnq_vid;
1072                 skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1073                                                 vlan_tag);
1074                 if (unlikely(!skb))
1075                         return skb;
1076                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1077         }
1078
1079         return skb;
1080 }
1081
1082 static bool be_ipv6_exthdr_check(struct sk_buff *skb)
1083 {
1084         struct ethhdr *eh = (struct ethhdr *)skb->data;
1085         u16 offset = ETH_HLEN;
1086
1087         if (eh->h_proto == htons(ETH_P_IPV6)) {
1088                 struct ipv6hdr *ip6h = (struct ipv6hdr *)(skb->data + offset);
1089
1090                 offset += sizeof(struct ipv6hdr);
1091                 if (ip6h->nexthdr != NEXTHDR_TCP &&
1092                     ip6h->nexthdr != NEXTHDR_UDP) {
1093                         struct ipv6_opt_hdr *ehdr =
1094                                 (struct ipv6_opt_hdr *)(skb->data + offset);
1095
1096                         /* offending pkt: 2nd byte following IPv6 hdr is 0xff */
1097                         if (ehdr->hdrlen == 0xff)
1098                                 return true;
1099                 }
1100         }
1101         return false;
1102 }
1103
1104 static int be_vlan_tag_tx_chk(struct be_adapter *adapter, struct sk_buff *skb)
1105 {
1106         return skb_vlan_tag_present(skb) || adapter->pvid || adapter->qnq_vid;
1107 }
1108
1109 static int be_ipv6_tx_stall_chk(struct be_adapter *adapter, struct sk_buff *skb)
1110 {
1111         return BE3_chip(adapter) && be_ipv6_exthdr_check(skb);
1112 }
1113
1114 static struct sk_buff *be_lancer_xmit_workarounds(struct be_adapter *adapter,
1115                                                   struct sk_buff *skb,
1116                                                   struct be_wrb_params
1117                                                   *wrb_params)
1118 {
1119         struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
1120         unsigned int eth_hdr_len;
1121         struct iphdr *ip;
1122
1123         /* For padded packets, BE HW modifies tot_len field in IP header
1124          * incorrecly when VLAN tag is inserted by HW.
1125          * For padded packets, Lancer computes incorrect checksum.
1126          */
1127         eth_hdr_len = ntohs(skb->protocol) == ETH_P_8021Q ?
1128                                                 VLAN_ETH_HLEN : ETH_HLEN;
1129         if (skb->len <= 60 &&
1130             (lancer_chip(adapter) || skb_vlan_tag_present(skb)) &&
1131             is_ipv4_pkt(skb)) {
1132                 ip = (struct iphdr *)ip_hdr(skb);
1133                 pskb_trim(skb, eth_hdr_len + ntohs(ip->tot_len));
1134         }
1135
1136         /* If vlan tag is already inlined in the packet, skip HW VLAN
1137          * tagging in pvid-tagging mode
1138          */
1139         if (be_pvid_tagging_enabled(adapter) &&
1140             veh->h_vlan_proto == htons(ETH_P_8021Q))
1141                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1142
1143         /* HW has a bug wherein it will calculate CSUM for VLAN
1144          * pkts even though it is disabled.
1145          * Manually insert VLAN in pkt.
1146          */
1147         if (skb->ip_summed != CHECKSUM_PARTIAL &&
1148             skb_vlan_tag_present(skb)) {
1149                 skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1150                 if (unlikely(!skb))
1151                         goto err;
1152         }
1153
1154         /* HW may lockup when VLAN HW tagging is requested on
1155          * certain ipv6 packets. Drop such pkts if the HW workaround to
1156          * skip HW tagging is not enabled by FW.
1157          */
1158         if (unlikely(be_ipv6_tx_stall_chk(adapter, skb) &&
1159                      (adapter->pvid || adapter->qnq_vid) &&
1160                      !qnq_async_evt_rcvd(adapter)))
1161                 goto tx_drop;
1162
1163         /* Manual VLAN tag insertion to prevent:
1164          * ASIC lockup when the ASIC inserts VLAN tag into
1165          * certain ipv6 packets. Insert VLAN tags in driver,
1166          * and set event, completion, vlan bits accordingly
1167          * in the Tx WRB.
1168          */
1169         if (be_ipv6_tx_stall_chk(adapter, skb) &&
1170             be_vlan_tag_tx_chk(adapter, skb)) {
1171                 skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1172                 if (unlikely(!skb))
1173                         goto err;
1174         }
1175
1176         return skb;
1177 tx_drop:
1178         dev_kfree_skb_any(skb);
1179 err:
1180         return NULL;
1181 }
1182
1183 static struct sk_buff *be_xmit_workarounds(struct be_adapter *adapter,
1184                                            struct sk_buff *skb,
1185                                            struct be_wrb_params *wrb_params)
1186 {
1187         int err;
1188
1189         /* Lancer, SH and BE3 in SRIOV mode have a bug wherein
1190          * packets that are 32b or less may cause a transmit stall
1191          * on that port. The workaround is to pad such packets
1192          * (len <= 32 bytes) to a minimum length of 36b.
1193          */
1194         if (skb->len <= 32) {
1195                 if (skb_put_padto(skb, 36))
1196                         return NULL;
1197         }
1198
1199         if (BEx_chip(adapter) || lancer_chip(adapter)) {
1200                 skb = be_lancer_xmit_workarounds(adapter, skb, wrb_params);
1201                 if (!skb)
1202                         return NULL;
1203         }
1204
1205         /* The stack can send us skbs with length greater than
1206          * what the HW can handle. Trim the extra bytes.
1207          */
1208         WARN_ON_ONCE(skb->len > BE_MAX_GSO_SIZE);
1209         err = pskb_trim(skb, BE_MAX_GSO_SIZE);
1210         WARN_ON(err);
1211
1212         return skb;
1213 }
1214
1215 static void be_xmit_flush(struct be_adapter *adapter, struct be_tx_obj *txo)
1216 {
1217         struct be_queue_info *txq = &txo->q;
1218         struct be_eth_hdr_wrb *hdr = queue_index_node(txq, txo->last_req_hdr);
1219
1220         /* Mark the last request eventable if it hasn't been marked already */
1221         if (!(hdr->dw[2] & cpu_to_le32(TX_HDR_WRB_EVT)))
1222                 hdr->dw[2] |= cpu_to_le32(TX_HDR_WRB_EVT | TX_HDR_WRB_COMPL);
1223
1224         /* compose a dummy wrb if there are odd set of wrbs to notify */
1225         if (!lancer_chip(adapter) && (txo->pend_wrb_cnt & 1)) {
1226                 wrb_fill_dummy(queue_head_node(txq));
1227                 queue_head_inc(txq);
1228                 atomic_inc(&txq->used);
1229                 txo->pend_wrb_cnt++;
1230                 hdr->dw[2] &= ~cpu_to_le32(TX_HDR_WRB_NUM_MASK <<
1231                                            TX_HDR_WRB_NUM_SHIFT);
1232                 hdr->dw[2] |= cpu_to_le32((txo->last_req_wrb_cnt + 1) <<
1233                                           TX_HDR_WRB_NUM_SHIFT);
1234         }
1235         be_txq_notify(adapter, txo, txo->pend_wrb_cnt);
1236         txo->pend_wrb_cnt = 0;
1237 }
1238
1239 /* OS2BMC related */
1240
1241 #define DHCP_CLIENT_PORT        68
1242 #define DHCP_SERVER_PORT        67
1243 #define NET_BIOS_PORT1          137
1244 #define NET_BIOS_PORT2          138
1245 #define DHCPV6_RAS_PORT         547
1246
1247 #define is_mc_allowed_on_bmc(adapter, eh)       \
1248         (!is_multicast_filt_enabled(adapter) && \
1249          is_multicast_ether_addr(eh->h_dest) && \
1250          !is_broadcast_ether_addr(eh->h_dest))
1251
1252 #define is_bc_allowed_on_bmc(adapter, eh)       \
1253         (!is_broadcast_filt_enabled(adapter) && \
1254          is_broadcast_ether_addr(eh->h_dest))
1255
1256 #define is_arp_allowed_on_bmc(adapter, skb)     \
1257         (is_arp(skb) && is_arp_filt_enabled(adapter))
1258
1259 #define is_broadcast_packet(eh, adapter)        \
1260                 (is_multicast_ether_addr(eh->h_dest) && \
1261                 !compare_ether_addr(eh->h_dest, adapter->netdev->broadcast))
1262
1263 #define is_arp(skb)     (skb->protocol == htons(ETH_P_ARP))
1264
1265 #define is_arp_filt_enabled(adapter)    \
1266                 (adapter->bmc_filt_mask & (BMC_FILT_BROADCAST_ARP))
1267
1268 #define is_dhcp_client_filt_enabled(adapter)    \
1269                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_CLIENT)
1270
1271 #define is_dhcp_srvr_filt_enabled(adapter)      \
1272                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_SERVER)
1273
1274 #define is_nbios_filt_enabled(adapter)  \
1275                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_NET_BIOS)
1276
1277 #define is_ipv6_na_filt_enabled(adapter)        \
1278                 (adapter->bmc_filt_mask &       \
1279                         BMC_FILT_MULTICAST_IPV6_NEIGH_ADVER)
1280
1281 #define is_ipv6_ra_filt_enabled(adapter)        \
1282                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RA)
1283
1284 #define is_ipv6_ras_filt_enabled(adapter)       \
1285                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RAS)
1286
1287 #define is_broadcast_filt_enabled(adapter)      \
1288                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST)
1289
1290 #define is_multicast_filt_enabled(adapter)      \
1291                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST)
1292
1293 static bool be_send_pkt_to_bmc(struct be_adapter *adapter,
1294                                struct sk_buff **skb)
1295 {
1296         struct ethhdr *eh = (struct ethhdr *)(*skb)->data;
1297         bool os2bmc = false;
1298
1299         if (!be_is_os2bmc_enabled(adapter))
1300                 goto done;
1301
1302         if (!is_multicast_ether_addr(eh->h_dest))
1303                 goto done;
1304
1305         if (is_mc_allowed_on_bmc(adapter, eh) ||
1306             is_bc_allowed_on_bmc(adapter, eh) ||
1307             is_arp_allowed_on_bmc(adapter, (*skb))) {
1308                 os2bmc = true;
1309                 goto done;
1310         }
1311
1312         if ((*skb)->protocol == htons(ETH_P_IPV6)) {
1313                 struct ipv6hdr *hdr = ipv6_hdr((*skb));
1314                 u8 nexthdr = hdr->nexthdr;
1315
1316                 if (nexthdr == IPPROTO_ICMPV6) {
1317                         struct icmp6hdr *icmp6 = icmp6_hdr((*skb));
1318
1319                         switch (icmp6->icmp6_type) {
1320                         case NDISC_ROUTER_ADVERTISEMENT:
1321                                 os2bmc = is_ipv6_ra_filt_enabled(adapter);
1322                                 goto done;
1323                         case NDISC_NEIGHBOUR_ADVERTISEMENT:
1324                                 os2bmc = is_ipv6_na_filt_enabled(adapter);
1325                                 goto done;
1326                         default:
1327                                 break;
1328                         }
1329                 }
1330         }
1331
1332         if (is_udp_pkt((*skb))) {
1333                 struct udphdr *udp = udp_hdr((*skb));
1334
1335                 switch (ntohs(udp->dest)) {
1336                 case DHCP_CLIENT_PORT:
1337                         os2bmc = is_dhcp_client_filt_enabled(adapter);
1338                         goto done;
1339                 case DHCP_SERVER_PORT:
1340                         os2bmc = is_dhcp_srvr_filt_enabled(adapter);
1341                         goto done;
1342                 case NET_BIOS_PORT1:
1343                 case NET_BIOS_PORT2:
1344                         os2bmc = is_nbios_filt_enabled(adapter);
1345                         goto done;
1346                 case DHCPV6_RAS_PORT:
1347                         os2bmc = is_ipv6_ras_filt_enabled(adapter);
1348                         goto done;
1349                 default:
1350                         break;
1351                 }
1352         }
1353 done:
1354         /* For packets over a vlan, which are destined
1355          * to BMC, asic expects the vlan to be inline in the packet.
1356          */
1357         if (os2bmc)
1358                 *skb = be_insert_vlan_in_pkt(adapter, *skb, NULL);
1359
1360         return os2bmc;
1361 }
1362
1363 static netdev_tx_t be_xmit(struct sk_buff *skb, struct net_device *netdev)
1364 {
1365         struct be_adapter *adapter = netdev_priv(netdev);
1366         u16 q_idx = skb_get_queue_mapping(skb);
1367         struct be_tx_obj *txo = &adapter->tx_obj[q_idx];
1368         struct be_wrb_params wrb_params = { 0 };
1369         bool flush = !skb->xmit_more;
1370         u16 wrb_cnt;
1371
1372         skb = be_xmit_workarounds(adapter, skb, &wrb_params);
1373         if (unlikely(!skb))
1374                 goto drop;
1375
1376         be_get_wrb_params_from_skb(adapter, skb, &wrb_params);
1377
1378         wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1379         if (unlikely(!wrb_cnt)) {
1380                 dev_kfree_skb_any(skb);
1381                 goto drop;
1382         }
1383
1384         /* if os2bmc is enabled and if the pkt is destined to bmc,
1385          * enqueue the pkt a 2nd time with mgmt bit set.
1386          */
1387         if (be_send_pkt_to_bmc(adapter, &skb)) {
1388                 BE_WRB_F_SET(wrb_params.features, OS2BMC, 1);
1389                 wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1390                 if (unlikely(!wrb_cnt))
1391                         goto drop;
1392                 else
1393                         skb_get(skb);
1394         }
1395
1396         if (be_is_txq_full(txo)) {
1397                 netif_stop_subqueue(netdev, q_idx);
1398                 tx_stats(txo)->tx_stops++;
1399         }
1400
1401         if (flush || __netif_subqueue_stopped(netdev, q_idx))
1402                 be_xmit_flush(adapter, txo);
1403
1404         return NETDEV_TX_OK;
1405 drop:
1406         tx_stats(txo)->tx_drv_drops++;
1407         /* Flush the already enqueued tx requests */
1408         if (flush && txo->pend_wrb_cnt)
1409                 be_xmit_flush(adapter, txo);
1410
1411         return NETDEV_TX_OK;
1412 }
1413
1414 static inline bool be_in_all_promisc(struct be_adapter *adapter)
1415 {
1416         return (adapter->if_flags & BE_IF_FLAGS_ALL_PROMISCUOUS) ==
1417                         BE_IF_FLAGS_ALL_PROMISCUOUS;
1418 }
1419
1420 static int be_set_vlan_promisc(struct be_adapter *adapter)
1421 {
1422         struct device *dev = &adapter->pdev->dev;
1423         int status;
1424
1425         if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS)
1426                 return 0;
1427
1428         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, ON);
1429         if (!status) {
1430                 dev_info(dev, "Enabled VLAN promiscuous mode\n");
1431                 adapter->if_flags |= BE_IF_FLAGS_VLAN_PROMISCUOUS;
1432         } else {
1433                 dev_err(dev, "Failed to enable VLAN promiscuous mode\n");
1434         }
1435         return status;
1436 }
1437
1438 static int be_clear_vlan_promisc(struct be_adapter *adapter)
1439 {
1440         struct device *dev = &adapter->pdev->dev;
1441         int status;
1442
1443         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, OFF);
1444         if (!status) {
1445                 dev_info(dev, "Disabling VLAN promiscuous mode\n");
1446                 adapter->if_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
1447         }
1448         return status;
1449 }
1450
1451 /*
1452  * A max of 64 (BE_NUM_VLANS_SUPPORTED) vlans can be configured in BE.
1453  * If the user configures more, place BE in vlan promiscuous mode.
1454  */
1455 static int be_vid_config(struct be_adapter *adapter)
1456 {
1457         struct device *dev = &adapter->pdev->dev;
1458         u16 vids[BE_NUM_VLANS_SUPPORTED];
1459         u16 num = 0, i = 0;
1460         int status = 0;
1461
1462         /* No need to change the VLAN state if the I/F is in promiscuous */
1463         if (adapter->netdev->flags & IFF_PROMISC)
1464                 return 0;
1465
1466         if (adapter->vlans_added > be_max_vlans(adapter))
1467                 return be_set_vlan_promisc(adapter);
1468
1469         if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
1470                 status = be_clear_vlan_promisc(adapter);
1471                 if (status)
1472                         return status;
1473         }
1474         /* Construct VLAN Table to give to HW */
1475         for_each_set_bit(i, adapter->vids, VLAN_N_VID)
1476                 vids[num++] = cpu_to_le16(i);
1477
1478         status = be_cmd_vlan_config(adapter, adapter->if_handle, vids, num, 0);
1479         if (status) {
1480                 dev_err(dev, "Setting HW VLAN filtering failed\n");
1481                 /* Set to VLAN promisc mode as setting VLAN filter failed */
1482                 if (addl_status(status) == MCC_ADDL_STATUS_INSUFFICIENT_VLANS ||
1483                     addl_status(status) ==
1484                                 MCC_ADDL_STATUS_INSUFFICIENT_RESOURCES)
1485                         return be_set_vlan_promisc(adapter);
1486         }
1487         return status;
1488 }
1489
1490 static int be_vlan_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
1491 {
1492         struct be_adapter *adapter = netdev_priv(netdev);
1493         int status = 0;
1494
1495         mutex_lock(&adapter->rx_filter_lock);
1496
1497         /* Packets with VID 0 are always received by Lancer by default */
1498         if (lancer_chip(adapter) && vid == 0)
1499                 goto done;
1500
1501         if (test_bit(vid, adapter->vids))
1502                 goto done;
1503
1504         set_bit(vid, adapter->vids);
1505         adapter->vlans_added++;
1506
1507         status = be_vid_config(adapter);
1508 done:
1509         mutex_unlock(&adapter->rx_filter_lock);
1510         return status;
1511 }
1512
1513 static int be_vlan_rem_vid(struct net_device *netdev, __be16 proto, u16 vid)
1514 {
1515         struct be_adapter *adapter = netdev_priv(netdev);
1516         int status = 0;
1517
1518         mutex_lock(&adapter->rx_filter_lock);
1519
1520         /* Packets with VID 0 are always received by Lancer by default */
1521         if (lancer_chip(adapter) && vid == 0)
1522                 goto done;
1523
1524         if (!test_bit(vid, adapter->vids))
1525                 goto done;
1526
1527         clear_bit(vid, adapter->vids);
1528         adapter->vlans_added--;
1529
1530         status = be_vid_config(adapter);
1531 done:
1532         mutex_unlock(&adapter->rx_filter_lock);
1533         return status;
1534 }
1535
1536 static void be_set_all_promisc(struct be_adapter *adapter)
1537 {
1538         be_cmd_rx_filter(adapter, BE_IF_FLAGS_ALL_PROMISCUOUS, ON);
1539         adapter->if_flags |= BE_IF_FLAGS_ALL_PROMISCUOUS;
1540 }
1541
1542 static void be_set_mc_promisc(struct be_adapter *adapter)
1543 {
1544         int status;
1545
1546         if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS)
1547                 return;
1548
1549         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MCAST_PROMISCUOUS, ON);
1550         if (!status)
1551                 adapter->if_flags |= BE_IF_FLAGS_MCAST_PROMISCUOUS;
1552 }
1553
1554 static void be_set_uc_promisc(struct be_adapter *adapter)
1555 {
1556         int status;
1557
1558         if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS)
1559                 return;
1560
1561         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, ON);
1562         if (!status)
1563                 adapter->if_flags |= BE_IF_FLAGS_PROMISCUOUS;
1564 }
1565
1566 static void be_clear_uc_promisc(struct be_adapter *adapter)
1567 {
1568         int status;
1569
1570         if (!(adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS))
1571                 return;
1572
1573         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, OFF);
1574         if (!status)
1575                 adapter->if_flags &= ~BE_IF_FLAGS_PROMISCUOUS;
1576 }
1577
1578 /* The below 2 functions are the callback args for __dev_mc_sync/dev_uc_sync().
1579  * We use a single callback function for both sync and unsync. We really don't
1580  * add/remove addresses through this callback. But, we use it to detect changes
1581  * to the uc/mc lists. The entire uc/mc list is programmed in be_set_rx_mode().
1582  */
1583 static int be_uc_list_update(struct net_device *netdev,
1584                              const unsigned char *addr)
1585 {
1586         struct be_adapter *adapter = netdev_priv(netdev);
1587
1588         adapter->update_uc_list = true;
1589         return 0;
1590 }
1591
1592 static int be_mc_list_update(struct net_device *netdev,
1593                              const unsigned char *addr)
1594 {
1595         struct be_adapter *adapter = netdev_priv(netdev);
1596
1597         adapter->update_mc_list = true;
1598         return 0;
1599 }
1600
1601 static void be_set_mc_list(struct be_adapter *adapter)
1602 {
1603         struct net_device *netdev = adapter->netdev;
1604         struct netdev_hw_addr *ha;
1605         bool mc_promisc = false;
1606         int status;
1607
1608         netif_addr_lock_bh(netdev);
1609         __dev_mc_sync(netdev, be_mc_list_update, be_mc_list_update);
1610
1611         if (netdev->flags & IFF_PROMISC) {
1612                 adapter->update_mc_list = false;
1613         } else if (netdev->flags & IFF_ALLMULTI ||
1614                    netdev_mc_count(netdev) > be_max_mc(adapter)) {
1615                 /* Enable multicast promisc if num configured exceeds
1616                  * what we support
1617                  */
1618                 mc_promisc = true;
1619                 adapter->update_mc_list = false;
1620         } else if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS) {
1621                 /* Update mc-list unconditionally if the iface was previously
1622                  * in mc-promisc mode and now is out of that mode.
1623                  */
1624                 adapter->update_mc_list = true;
1625         }
1626
1627         if (adapter->update_mc_list) {
1628                 int i = 0;
1629
1630                 /* cache the mc-list in adapter */
1631                 netdev_for_each_mc_addr(ha, netdev) {
1632                         ether_addr_copy(adapter->mc_list[i].mac, ha->addr);
1633                         i++;
1634                 }
1635                 adapter->mc_count = netdev_mc_count(netdev);
1636         }
1637         netif_addr_unlock_bh(netdev);
1638
1639         if (mc_promisc) {
1640                 be_set_mc_promisc(adapter);
1641         } else if (adapter->update_mc_list) {
1642                 status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, ON);
1643                 if (!status)
1644                         adapter->if_flags &= ~BE_IF_FLAGS_MCAST_PROMISCUOUS;
1645                 else
1646                         be_set_mc_promisc(adapter);
1647
1648                 adapter->update_mc_list = false;
1649         }
1650 }
1651
1652 static void be_clear_mc_list(struct be_adapter *adapter)
1653 {
1654         struct net_device *netdev = adapter->netdev;
1655
1656         __dev_mc_unsync(netdev, NULL);
1657         be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, OFF);
1658         adapter->mc_count = 0;
1659 }
1660
1661 static int be_uc_mac_add(struct be_adapter *adapter, int uc_idx)
1662 {
1663         if (ether_addr_equal(adapter->uc_list[uc_idx].mac, adapter->dev_mac)) {
1664                 adapter->pmac_id[uc_idx + 1] = adapter->pmac_id[0];
1665                 return 0;
1666         }
1667
1668         return be_cmd_pmac_add(adapter, adapter->uc_list[uc_idx].mac,
1669                                adapter->if_handle,
1670                                &adapter->pmac_id[uc_idx + 1], 0);
1671 }
1672
1673 static void be_uc_mac_del(struct be_adapter *adapter, int pmac_id)
1674 {
1675         if (pmac_id == adapter->pmac_id[0])
1676                 return;
1677
1678         be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
1679 }
1680
1681 static void be_set_uc_list(struct be_adapter *adapter)
1682 {
1683         struct net_device *netdev = adapter->netdev;
1684         struct netdev_hw_addr *ha;
1685         bool uc_promisc = false;
1686         int curr_uc_macs = 0, i;
1687
1688         netif_addr_lock_bh(netdev);
1689         __dev_uc_sync(netdev, be_uc_list_update, be_uc_list_update);
1690
1691         if (netdev->flags & IFF_PROMISC) {
1692                 adapter->update_uc_list = false;
1693         } else if (netdev_uc_count(netdev) > (be_max_uc(adapter) - 1)) {
1694                 uc_promisc = true;
1695                 adapter->update_uc_list = false;
1696         }  else if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS) {
1697                 /* Update uc-list unconditionally if the iface was previously
1698                  * in uc-promisc mode and now is out of that mode.
1699                  */
1700                 adapter->update_uc_list = true;
1701         }
1702
1703         if (adapter->update_uc_list) {
1704                 /* cache the uc-list in adapter array */
1705                 i = 0;
1706                 netdev_for_each_uc_addr(ha, netdev) {
1707                         ether_addr_copy(adapter->uc_list[i].mac, ha->addr);
1708                         i++;
1709                 }
1710                 curr_uc_macs = netdev_uc_count(netdev);
1711         }
1712         netif_addr_unlock_bh(netdev);
1713
1714         if (uc_promisc) {
1715                 be_set_uc_promisc(adapter);
1716         } else if (adapter->update_uc_list) {
1717                 be_clear_uc_promisc(adapter);
1718
1719                 for (i = 0; i < adapter->uc_macs; i++)
1720                         be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1721
1722                 for (i = 0; i < curr_uc_macs; i++)
1723                         be_uc_mac_add(adapter, i);
1724                 adapter->uc_macs = curr_uc_macs;
1725                 adapter->update_uc_list = false;
1726         }
1727 }
1728
1729 static void be_clear_uc_list(struct be_adapter *adapter)
1730 {
1731         struct net_device *netdev = adapter->netdev;
1732         int i;
1733
1734         __dev_uc_unsync(netdev, NULL);
1735         for (i = 0; i < adapter->uc_macs; i++)
1736                 be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1737
1738         adapter->uc_macs = 0;
1739 }
1740
1741 static void __be_set_rx_mode(struct be_adapter *adapter)
1742 {
1743         struct net_device *netdev = adapter->netdev;
1744
1745         mutex_lock(&adapter->rx_filter_lock);
1746
1747         if (netdev->flags & IFF_PROMISC) {
1748                 if (!be_in_all_promisc(adapter))
1749                         be_set_all_promisc(adapter);
1750         } else if (be_in_all_promisc(adapter)) {
1751                 /* We need to re-program the vlan-list or clear
1752                  * vlan-promisc mode (if needed) when the interface
1753                  * comes out of promisc mode.
1754                  */
1755                 be_vid_config(adapter);
1756         }
1757
1758         be_set_uc_list(adapter);
1759         be_set_mc_list(adapter);
1760
1761         mutex_unlock(&adapter->rx_filter_lock);
1762 }
1763
1764 static void be_work_set_rx_mode(struct work_struct *work)
1765 {
1766         struct be_cmd_work *cmd_work =
1767                                 container_of(work, struct be_cmd_work, work);
1768
1769         __be_set_rx_mode(cmd_work->adapter);
1770         kfree(cmd_work);
1771 }
1772
1773 static int be_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
1774 {
1775         struct be_adapter *adapter = netdev_priv(netdev);
1776         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1777         int status;
1778
1779         if (!sriov_enabled(adapter))
1780                 return -EPERM;
1781
1782         if (!is_valid_ether_addr(mac) || vf >= adapter->num_vfs)
1783                 return -EINVAL;
1784
1785         /* Proceed further only if user provided MAC is different
1786          * from active MAC
1787          */
1788         if (ether_addr_equal(mac, vf_cfg->mac_addr))
1789                 return 0;
1790
1791         if (BEx_chip(adapter)) {
1792                 be_cmd_pmac_del(adapter, vf_cfg->if_handle, vf_cfg->pmac_id,
1793                                 vf + 1);
1794
1795                 status = be_cmd_pmac_add(adapter, mac, vf_cfg->if_handle,
1796                                          &vf_cfg->pmac_id, vf + 1);
1797         } else {
1798                 status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
1799                                         vf + 1);
1800         }
1801
1802         if (status) {
1803                 dev_err(&adapter->pdev->dev, "MAC %pM set on VF %d Failed: %#x",
1804                         mac, vf, status);
1805                 return be_cmd_status(status);
1806         }
1807
1808         ether_addr_copy(vf_cfg->mac_addr, mac);
1809
1810         return 0;
1811 }
1812
1813 static int be_get_vf_config(struct net_device *netdev, int vf,
1814                             struct ifla_vf_info *vi)
1815 {
1816         struct be_adapter *adapter = netdev_priv(netdev);
1817         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1818
1819         if (!sriov_enabled(adapter))
1820                 return -EPERM;
1821
1822         if (vf >= adapter->num_vfs)
1823                 return -EINVAL;
1824
1825         vi->vf = vf;
1826         vi->max_tx_rate = vf_cfg->tx_rate;
1827         vi->min_tx_rate = 0;
1828         vi->vlan = vf_cfg->vlan_tag & VLAN_VID_MASK;
1829         vi->qos = vf_cfg->vlan_tag >> VLAN_PRIO_SHIFT;
1830         memcpy(&vi->mac, vf_cfg->mac_addr, ETH_ALEN);
1831         vi->linkstate = adapter->vf_cfg[vf].plink_tracking;
1832         vi->spoofchk = adapter->vf_cfg[vf].spoofchk;
1833
1834         return 0;
1835 }
1836
1837 static int be_set_vf_tvt(struct be_adapter *adapter, int vf, u16 vlan)
1838 {
1839         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1840         u16 vids[BE_NUM_VLANS_SUPPORTED];
1841         int vf_if_id = vf_cfg->if_handle;
1842         int status;
1843
1844         /* Enable Transparent VLAN Tagging */
1845         status = be_cmd_set_hsw_config(adapter, vlan, vf + 1, vf_if_id, 0, 0);
1846         if (status)
1847                 return status;
1848
1849         /* Clear pre-programmed VLAN filters on VF if any, if TVT is enabled */
1850         vids[0] = 0;
1851         status = be_cmd_vlan_config(adapter, vf_if_id, vids, 1, vf + 1);
1852         if (!status)
1853                 dev_info(&adapter->pdev->dev,
1854                          "Cleared guest VLANs on VF%d", vf);
1855
1856         /* After TVT is enabled, disallow VFs to program VLAN filters */
1857         if (vf_cfg->privileges & BE_PRIV_FILTMGMT) {
1858                 status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges &
1859                                                   ~BE_PRIV_FILTMGMT, vf + 1);
1860                 if (!status)
1861                         vf_cfg->privileges &= ~BE_PRIV_FILTMGMT;
1862         }
1863         return 0;
1864 }
1865
1866 static int be_clear_vf_tvt(struct be_adapter *adapter, int vf)
1867 {
1868         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1869         struct device *dev = &adapter->pdev->dev;
1870         int status;
1871
1872         /* Reset Transparent VLAN Tagging. */
1873         status = be_cmd_set_hsw_config(adapter, BE_RESET_VLAN_TAG_ID, vf + 1,
1874                                        vf_cfg->if_handle, 0, 0);
1875         if (status)
1876                 return status;
1877
1878         /* Allow VFs to program VLAN filtering */
1879         if (!(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
1880                 status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges |
1881                                                   BE_PRIV_FILTMGMT, vf + 1);
1882                 if (!status) {
1883                         vf_cfg->privileges |= BE_PRIV_FILTMGMT;
1884                         dev_info(dev, "VF%d: FILTMGMT priv enabled", vf);
1885                 }
1886         }
1887
1888         dev_info(dev,
1889                  "Disable/re-enable i/f in VM to clear Transparent VLAN tag");
1890         return 0;
1891 }
1892
1893 static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos,
1894                           __be16 vlan_proto)
1895 {
1896         struct be_adapter *adapter = netdev_priv(netdev);
1897         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1898         int status;
1899
1900         if (!sriov_enabled(adapter))
1901                 return -EPERM;
1902
1903         if (vf >= adapter->num_vfs || vlan > 4095 || qos > 7)
1904                 return -EINVAL;
1905
1906         if (vlan_proto != htons(ETH_P_8021Q))
1907                 return -EPROTONOSUPPORT;
1908
1909         if (vlan || qos) {
1910                 vlan |= qos << VLAN_PRIO_SHIFT;
1911                 status = be_set_vf_tvt(adapter, vf, vlan);
1912         } else {
1913                 status = be_clear_vf_tvt(adapter, vf);
1914         }
1915
1916         if (status) {
1917                 dev_err(&adapter->pdev->dev,
1918                         "VLAN %d config on VF %d failed : %#x\n", vlan, vf,
1919                         status);
1920                 return be_cmd_status(status);
1921         }
1922
1923         vf_cfg->vlan_tag = vlan;
1924         return 0;
1925 }
1926
1927 static int be_set_vf_tx_rate(struct net_device *netdev, int vf,
1928                              int min_tx_rate, int max_tx_rate)
1929 {
1930         struct be_adapter *adapter = netdev_priv(netdev);
1931         struct device *dev = &adapter->pdev->dev;
1932         int percent_rate, status = 0;
1933         u16 link_speed = 0;
1934         u8 link_status;
1935
1936         if (!sriov_enabled(adapter))
1937                 return -EPERM;
1938
1939         if (vf >= adapter->num_vfs)
1940                 return -EINVAL;
1941
1942         if (min_tx_rate)
1943                 return -EINVAL;
1944
1945         if (!max_tx_rate)
1946                 goto config_qos;
1947
1948         status = be_cmd_link_status_query(adapter, &link_speed,
1949                                           &link_status, 0);
1950         if (status)
1951                 goto err;
1952
1953         if (!link_status) {
1954                 dev_err(dev, "TX-rate setting not allowed when link is down\n");
1955                 status = -ENETDOWN;
1956                 goto err;
1957         }
1958
1959         if (max_tx_rate < 100 || max_tx_rate > link_speed) {
1960                 dev_err(dev, "TX-rate must be between 100 and %d Mbps\n",
1961                         link_speed);
1962                 status = -EINVAL;
1963                 goto err;
1964         }
1965
1966         /* On Skyhawk the QOS setting must be done only as a % value */
1967         percent_rate = link_speed / 100;
1968         if (skyhawk_chip(adapter) && (max_tx_rate % percent_rate)) {
1969                 dev_err(dev, "TX-rate must be a multiple of %d Mbps\n",
1970                         percent_rate);
1971                 status = -EINVAL;
1972                 goto err;
1973         }
1974
1975 config_qos:
1976         status = be_cmd_config_qos(adapter, max_tx_rate, link_speed, vf + 1);
1977         if (status)
1978                 goto err;
1979
1980         adapter->vf_cfg[vf].tx_rate = max_tx_rate;
1981         return 0;
1982
1983 err:
1984         dev_err(dev, "TX-rate setting of %dMbps on VF%d failed\n",
1985                 max_tx_rate, vf);
1986         return be_cmd_status(status);
1987 }
1988
1989 static int be_set_vf_link_state(struct net_device *netdev, int vf,
1990                                 int link_state)
1991 {
1992         struct be_adapter *adapter = netdev_priv(netdev);
1993         int status;
1994
1995         if (!sriov_enabled(adapter))
1996                 return -EPERM;
1997
1998         if (vf >= adapter->num_vfs)
1999                 return -EINVAL;
2000
2001         status = be_cmd_set_logical_link_config(adapter, link_state, vf+1);
2002         if (status) {
2003                 dev_err(&adapter->pdev->dev,
2004                         "Link state change on VF %d failed: %#x\n", vf, status);
2005                 return be_cmd_status(status);
2006         }
2007
2008         adapter->vf_cfg[vf].plink_tracking = link_state;
2009
2010         return 0;
2011 }
2012
2013 static int be_set_vf_spoofchk(struct net_device *netdev, int vf, bool enable)
2014 {
2015         struct be_adapter *adapter = netdev_priv(netdev);
2016         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
2017         u8 spoofchk;
2018         int status;
2019
2020         if (!sriov_enabled(adapter))
2021                 return -EPERM;
2022
2023         if (vf >= adapter->num_vfs)
2024                 return -EINVAL;
2025
2026         if (BEx_chip(adapter))
2027                 return -EOPNOTSUPP;
2028
2029         if (enable == vf_cfg->spoofchk)
2030                 return 0;
2031
2032         spoofchk = enable ? ENABLE_MAC_SPOOFCHK : DISABLE_MAC_SPOOFCHK;
2033
2034         status = be_cmd_set_hsw_config(adapter, 0, vf + 1, vf_cfg->if_handle,
2035                                        0, spoofchk);
2036         if (status) {
2037                 dev_err(&adapter->pdev->dev,
2038                         "Spoofchk change on VF %d failed: %#x\n", vf, status);
2039                 return be_cmd_status(status);
2040         }
2041
2042         vf_cfg->spoofchk = enable;
2043         return 0;
2044 }
2045
2046 static void be_aic_update(struct be_aic_obj *aic, u64 rx_pkts, u64 tx_pkts,
2047                           ulong now)
2048 {
2049         aic->rx_pkts_prev = rx_pkts;
2050         aic->tx_reqs_prev = tx_pkts;
2051         aic->jiffies = now;
2052 }
2053
2054 static int be_get_new_eqd(struct be_eq_obj *eqo)
2055 {
2056         struct be_adapter *adapter = eqo->adapter;
2057         int eqd, start;
2058         struct be_aic_obj *aic;
2059         struct be_rx_obj *rxo;
2060         struct be_tx_obj *txo;
2061         u64 rx_pkts = 0, tx_pkts = 0;
2062         ulong now;
2063         u32 pps, delta;
2064         int i;
2065
2066         aic = &adapter->aic_obj[eqo->idx];
2067         if (!aic->enable) {
2068                 if (aic->jiffies)
2069                         aic->jiffies = 0;
2070                 eqd = aic->et_eqd;
2071                 return eqd;
2072         }
2073
2074         for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
2075                 do {
2076                         start = u64_stats_fetch_begin_irq(&rxo->stats.sync);
2077                         rx_pkts += rxo->stats.rx_pkts;
2078                 } while (u64_stats_fetch_retry_irq(&rxo->stats.sync, start));
2079         }
2080
2081         for_all_tx_queues_on_eq(adapter, eqo, txo, i) {
2082                 do {
2083                         start = u64_stats_fetch_begin_irq(&txo->stats.sync);
2084                         tx_pkts += txo->stats.tx_reqs;
2085                 } while (u64_stats_fetch_retry_irq(&txo->stats.sync, start));
2086         }
2087
2088         /* Skip, if wrapped around or first calculation */
2089         now = jiffies;
2090         if (!aic->jiffies || time_before(now, aic->jiffies) ||
2091             rx_pkts < aic->rx_pkts_prev ||
2092             tx_pkts < aic->tx_reqs_prev) {
2093                 be_aic_update(aic, rx_pkts, tx_pkts, now);
2094                 return aic->prev_eqd;
2095         }
2096
2097         delta = jiffies_to_msecs(now - aic->jiffies);
2098         if (delta == 0)
2099                 return aic->prev_eqd;
2100
2101         pps = (((u32)(rx_pkts - aic->rx_pkts_prev) * 1000) / delta) +
2102                 (((u32)(tx_pkts - aic->tx_reqs_prev) * 1000) / delta);
2103         eqd = (pps / 15000) << 2;
2104
2105         if (eqd < 8)
2106                 eqd = 0;
2107         eqd = min_t(u32, eqd, aic->max_eqd);
2108         eqd = max_t(u32, eqd, aic->min_eqd);
2109
2110         be_aic_update(aic, rx_pkts, tx_pkts, now);
2111
2112         return eqd;
2113 }
2114
2115 /* For Skyhawk-R only */
2116 static u32 be_get_eq_delay_mult_enc(struct be_eq_obj *eqo)
2117 {
2118         struct be_adapter *adapter = eqo->adapter;
2119         struct be_aic_obj *aic = &adapter->aic_obj[eqo->idx];
2120         ulong now = jiffies;
2121         int eqd;
2122         u32 mult_enc;
2123
2124         if (!aic->enable)
2125                 return 0;
2126
2127         if (jiffies_to_msecs(now - aic->jiffies) < 1)
2128                 eqd = aic->prev_eqd;
2129         else
2130                 eqd = be_get_new_eqd(eqo);
2131
2132         if (eqd > 100)
2133                 mult_enc = R2I_DLY_ENC_1;
2134         else if (eqd > 60)
2135                 mult_enc = R2I_DLY_ENC_2;
2136         else if (eqd > 20)
2137                 mult_enc = R2I_DLY_ENC_3;
2138         else
2139                 mult_enc = R2I_DLY_ENC_0;
2140
2141         aic->prev_eqd = eqd;
2142
2143         return mult_enc;
2144 }
2145
2146 void be_eqd_update(struct be_adapter *adapter, bool force_update)
2147 {
2148         struct be_set_eqd set_eqd[MAX_EVT_QS];
2149         struct be_aic_obj *aic;
2150         struct be_eq_obj *eqo;
2151         int i, num = 0, eqd;
2152
2153         for_all_evt_queues(adapter, eqo, i) {
2154                 aic = &adapter->aic_obj[eqo->idx];
2155                 eqd = be_get_new_eqd(eqo);
2156                 if (force_update || eqd != aic->prev_eqd) {
2157                         set_eqd[num].delay_multiplier = (eqd * 65)/100;
2158                         set_eqd[num].eq_id = eqo->q.id;
2159                         aic->prev_eqd = eqd;
2160                         num++;
2161                 }
2162         }
2163
2164         if (num)
2165                 be_cmd_modify_eqd(adapter, set_eqd, num);
2166 }
2167
2168 static void be_rx_stats_update(struct be_rx_obj *rxo,
2169                                struct be_rx_compl_info *rxcp)
2170 {
2171         struct be_rx_stats *stats = rx_stats(rxo);
2172
2173         u64_stats_update_begin(&stats->sync);
2174         stats->rx_compl++;
2175         stats->rx_bytes += rxcp->pkt_size;
2176         stats->rx_pkts++;
2177         if (rxcp->tunneled)
2178                 stats->rx_vxlan_offload_pkts++;
2179         if (rxcp->pkt_type == BE_MULTICAST_PACKET)
2180                 stats->rx_mcast_pkts++;
2181         if (rxcp->err)
2182                 stats->rx_compl_err++;
2183         u64_stats_update_end(&stats->sync);
2184 }
2185
2186 static inline bool csum_passed(struct be_rx_compl_info *rxcp)
2187 {
2188         /* L4 checksum is not reliable for non TCP/UDP packets.
2189          * Also ignore ipcksm for ipv6 pkts
2190          */
2191         return (rxcp->tcpf || rxcp->udpf) && rxcp->l4_csum &&
2192                 (rxcp->ip_csum || rxcp->ipv6) && !rxcp->err;
2193 }
2194
2195 static struct be_rx_page_info *get_rx_page_info(struct be_rx_obj *rxo)
2196 {
2197         struct be_adapter *adapter = rxo->adapter;
2198         struct be_rx_page_info *rx_page_info;
2199         struct be_queue_info *rxq = &rxo->q;
2200         u32 frag_idx = rxq->tail;
2201
2202         rx_page_info = &rxo->page_info_tbl[frag_idx];
2203         BUG_ON(!rx_page_info->page);
2204
2205         if (rx_page_info->last_frag) {
2206                 dma_unmap_page(&adapter->pdev->dev,
2207                                dma_unmap_addr(rx_page_info, bus),
2208                                adapter->big_page_size, DMA_FROM_DEVICE);
2209                 rx_page_info->last_frag = false;
2210         } else {
2211                 dma_sync_single_for_cpu(&adapter->pdev->dev,
2212                                         dma_unmap_addr(rx_page_info, bus),
2213                                         rx_frag_size, DMA_FROM_DEVICE);
2214         }
2215
2216         queue_tail_inc(rxq);
2217         atomic_dec(&rxq->used);
2218         return rx_page_info;
2219 }
2220
2221 /* Throwaway the data in the Rx completion */
2222 static void be_rx_compl_discard(struct be_rx_obj *rxo,
2223                                 struct be_rx_compl_info *rxcp)
2224 {
2225         struct be_rx_page_info *page_info;
2226         u16 i, num_rcvd = rxcp->num_rcvd;
2227
2228         for (i = 0; i < num_rcvd; i++) {
2229                 page_info = get_rx_page_info(rxo);
2230                 put_page(page_info->page);
2231                 memset(page_info, 0, sizeof(*page_info));
2232         }
2233 }
2234
2235 /*
2236  * skb_fill_rx_data forms a complete skb for an ether frame
2237  * indicated by rxcp.
2238  */
2239 static void skb_fill_rx_data(struct be_rx_obj *rxo, struct sk_buff *skb,
2240                              struct be_rx_compl_info *rxcp)
2241 {
2242         struct be_rx_page_info *page_info;
2243         u16 i, j;
2244         u16 hdr_len, curr_frag_len, remaining;
2245         u8 *start;
2246
2247         page_info = get_rx_page_info(rxo);
2248         start = page_address(page_info->page) + page_info->page_offset;
2249         prefetch(start);
2250
2251         /* Copy data in the first descriptor of this completion */
2252         curr_frag_len = min(rxcp->pkt_size, rx_frag_size);
2253
2254         skb->len = curr_frag_len;
2255         if (curr_frag_len <= BE_HDR_LEN) { /* tiny packet */
2256                 memcpy(skb->data, start, curr_frag_len);
2257                 /* Complete packet has now been moved to data */
2258                 put_page(page_info->page);
2259                 skb->data_len = 0;
2260                 skb->tail += curr_frag_len;
2261         } else {
2262                 hdr_len = ETH_HLEN;
2263                 memcpy(skb->data, start, hdr_len);
2264                 skb_shinfo(skb)->nr_frags = 1;
2265                 skb_frag_set_page(skb, 0, page_info->page);
2266                 skb_shinfo(skb)->frags[0].page_offset =
2267                                         page_info->page_offset + hdr_len;
2268                 skb_frag_size_set(&skb_shinfo(skb)->frags[0],
2269                                   curr_frag_len - hdr_len);
2270                 skb->data_len = curr_frag_len - hdr_len;
2271                 skb->truesize += rx_frag_size;
2272                 skb->tail += hdr_len;
2273         }
2274         page_info->page = NULL;
2275
2276         if (rxcp->pkt_size <= rx_frag_size) {
2277                 BUG_ON(rxcp->num_rcvd != 1);
2278                 return;
2279         }
2280
2281         /* More frags present for this completion */
2282         remaining = rxcp->pkt_size - curr_frag_len;
2283         for (i = 1, j = 0; i < rxcp->num_rcvd; i++) {
2284                 page_info = get_rx_page_info(rxo);
2285                 curr_frag_len = min(remaining, rx_frag_size);
2286
2287                 /* Coalesce all frags from the same physical page in one slot */
2288                 if (page_info->page_offset == 0) {
2289                         /* Fresh page */
2290                         j++;
2291                         skb_frag_set_page(skb, j, page_info->page);
2292                         skb_shinfo(skb)->frags[j].page_offset =
2293                                                         page_info->page_offset;
2294                         skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2295                         skb_shinfo(skb)->nr_frags++;
2296                 } else {
2297                         put_page(page_info->page);
2298                 }
2299
2300                 skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2301                 skb->len += curr_frag_len;
2302                 skb->data_len += curr_frag_len;
2303                 skb->truesize += rx_frag_size;
2304                 remaining -= curr_frag_len;
2305                 page_info->page = NULL;
2306         }
2307         BUG_ON(j > MAX_SKB_FRAGS);
2308 }
2309
2310 /* Process the RX completion indicated by rxcp when GRO is disabled */
2311 static void be_rx_compl_process(struct be_rx_obj *rxo, struct napi_struct *napi,
2312                                 struct be_rx_compl_info *rxcp)
2313 {
2314         struct be_adapter *adapter = rxo->adapter;
2315         struct net_device *netdev = adapter->netdev;
2316         struct sk_buff *skb;
2317
2318         skb = netdev_alloc_skb_ip_align(netdev, BE_RX_SKB_ALLOC_SIZE);
2319         if (unlikely(!skb)) {
2320                 rx_stats(rxo)->rx_drops_no_skbs++;
2321                 be_rx_compl_discard(rxo, rxcp);
2322                 return;
2323         }
2324
2325         skb_fill_rx_data(rxo, skb, rxcp);
2326
2327         if (likely((netdev->features & NETIF_F_RXCSUM) && csum_passed(rxcp)))
2328                 skb->ip_summed = CHECKSUM_UNNECESSARY;
2329         else
2330                 skb_checksum_none_assert(skb);
2331
2332         skb->protocol = eth_type_trans(skb, netdev);
2333         skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2334         if (netdev->features & NETIF_F_RXHASH)
2335                 skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2336
2337         skb->csum_level = rxcp->tunneled;
2338         skb_mark_napi_id(skb, napi);
2339
2340         if (rxcp->vlanf)
2341                 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2342
2343         netif_receive_skb(skb);
2344 }
2345
2346 /* Process the RX completion indicated by rxcp when GRO is enabled */
2347 static void be_rx_compl_process_gro(struct be_rx_obj *rxo,
2348                                     struct napi_struct *napi,
2349                                     struct be_rx_compl_info *rxcp)
2350 {
2351         struct be_adapter *adapter = rxo->adapter;
2352         struct be_rx_page_info *page_info;
2353         struct sk_buff *skb = NULL;
2354         u16 remaining, curr_frag_len;
2355         u16 i, j;
2356
2357         skb = napi_get_frags(napi);
2358         if (!skb) {
2359                 be_rx_compl_discard(rxo, rxcp);
2360                 return;
2361         }
2362
2363         remaining = rxcp->pkt_size;
2364         for (i = 0, j = -1; i < rxcp->num_rcvd; i++) {
2365                 page_info = get_rx_page_info(rxo);
2366
2367                 curr_frag_len = min(remaining, rx_frag_size);
2368
2369                 /* Coalesce all frags from the same physical page in one slot */
2370                 if (i == 0 || page_info->page_offset == 0) {
2371                         /* First frag or Fresh page */
2372                         j++;
2373                         skb_frag_set_page(skb, j, page_info->page);
2374                         skb_shinfo(skb)->frags[j].page_offset =
2375                                                         page_info->page_offset;
2376                         skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2377                 } else {
2378                         put_page(page_info->page);
2379                 }
2380                 skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2381                 skb->truesize += rx_frag_size;
2382                 remaining -= curr_frag_len;
2383                 memset(page_info, 0, sizeof(*page_info));
2384         }
2385         BUG_ON(j > MAX_SKB_FRAGS);
2386
2387         skb_shinfo(skb)->nr_frags = j + 1;
2388         skb->len = rxcp->pkt_size;
2389         skb->data_len = rxcp->pkt_size;
2390         skb->ip_summed = CHECKSUM_UNNECESSARY;
2391         skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2392         if (adapter->netdev->features & NETIF_F_RXHASH)
2393                 skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2394
2395         skb->csum_level = rxcp->tunneled;
2396
2397         if (rxcp->vlanf)
2398                 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2399
2400         napi_gro_frags(napi);
2401 }
2402
2403 static void be_parse_rx_compl_v1(struct be_eth_rx_compl *compl,
2404                                  struct be_rx_compl_info *rxcp)
2405 {
2406         rxcp->pkt_size = GET_RX_COMPL_V1_BITS(pktsize, compl);
2407         rxcp->vlanf = GET_RX_COMPL_V1_BITS(vtp, compl);
2408         rxcp->err = GET_RX_COMPL_V1_BITS(err, compl);
2409         rxcp->tcpf = GET_RX_COMPL_V1_BITS(tcpf, compl);
2410         rxcp->udpf = GET_RX_COMPL_V1_BITS(udpf, compl);
2411         rxcp->ip_csum = GET_RX_COMPL_V1_BITS(ipcksm, compl);
2412         rxcp->l4_csum = GET_RX_COMPL_V1_BITS(l4_cksm, compl);
2413         rxcp->ipv6 = GET_RX_COMPL_V1_BITS(ip_version, compl);
2414         rxcp->num_rcvd = GET_RX_COMPL_V1_BITS(numfrags, compl);
2415         rxcp->pkt_type = GET_RX_COMPL_V1_BITS(cast_enc, compl);
2416         rxcp->rss_hash = GET_RX_COMPL_V1_BITS(rsshash, compl);
2417         if (rxcp->vlanf) {
2418                 rxcp->qnq = GET_RX_COMPL_V1_BITS(qnq, compl);
2419                 rxcp->vlan_tag = GET_RX_COMPL_V1_BITS(vlan_tag, compl);
2420         }
2421         rxcp->port = GET_RX_COMPL_V1_BITS(port, compl);
2422         rxcp->tunneled =
2423                 GET_RX_COMPL_V1_BITS(tunneled, compl);
2424 }
2425
2426 static void be_parse_rx_compl_v0(struct be_eth_rx_compl *compl,
2427                                  struct be_rx_compl_info *rxcp)
2428 {
2429         rxcp->pkt_size = GET_RX_COMPL_V0_BITS(pktsize, compl);
2430         rxcp->vlanf = GET_RX_COMPL_V0_BITS(vtp, compl);
2431         rxcp->err = GET_RX_COMPL_V0_BITS(err, compl);
2432         rxcp->tcpf = GET_RX_COMPL_V0_BITS(tcpf, compl);
2433         rxcp->udpf = GET_RX_COMPL_V0_BITS(udpf, compl);
2434         rxcp->ip_csum = GET_RX_COMPL_V0_BITS(ipcksm, compl);
2435         rxcp->l4_csum = GET_RX_COMPL_V0_BITS(l4_cksm, compl);
2436         rxcp->ipv6 = GET_RX_COMPL_V0_BITS(ip_version, compl);
2437         rxcp->num_rcvd = GET_RX_COMPL_V0_BITS(numfrags, compl);
2438         rxcp->pkt_type = GET_RX_COMPL_V0_BITS(cast_enc, compl);
2439         rxcp->rss_hash = GET_RX_COMPL_V0_BITS(rsshash, compl);
2440         if (rxcp->vlanf) {
2441                 rxcp->qnq = GET_RX_COMPL_V0_BITS(qnq, compl);
2442                 rxcp->vlan_tag = GET_RX_COMPL_V0_BITS(vlan_tag, compl);
2443         }
2444         rxcp->port = GET_RX_COMPL_V0_BITS(port, compl);
2445         rxcp->ip_frag = GET_RX_COMPL_V0_BITS(ip_frag, compl);
2446 }
2447
2448 static struct be_rx_compl_info *be_rx_compl_get(struct be_rx_obj *rxo)
2449 {
2450         struct be_eth_rx_compl *compl = queue_tail_node(&rxo->cq);
2451         struct be_rx_compl_info *rxcp = &rxo->rxcp;
2452         struct be_adapter *adapter = rxo->adapter;
2453
2454         /* For checking the valid bit it is Ok to use either definition as the
2455          * valid bit is at the same position in both v0 and v1 Rx compl */
2456         if (compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] == 0)
2457                 return NULL;
2458
2459         rmb();
2460         be_dws_le_to_cpu(compl, sizeof(*compl));
2461
2462         if (adapter->be3_native)
2463                 be_parse_rx_compl_v1(compl, rxcp);
2464         else
2465                 be_parse_rx_compl_v0(compl, rxcp);
2466
2467         if (rxcp->ip_frag)
2468                 rxcp->l4_csum = 0;
2469
2470         if (rxcp->vlanf) {
2471                 /* In QNQ modes, if qnq bit is not set, then the packet was
2472                  * tagged only with the transparent outer vlan-tag and must
2473                  * not be treated as a vlan packet by host
2474                  */
2475                 if (be_is_qnq_mode(adapter) && !rxcp->qnq)
2476                         rxcp->vlanf = 0;
2477
2478                 if (!lancer_chip(adapter))
2479                         rxcp->vlan_tag = swab16(rxcp->vlan_tag);
2480
2481                 if (adapter->pvid == (rxcp->vlan_tag & VLAN_VID_MASK) &&
2482                     !test_bit(rxcp->vlan_tag, adapter->vids))
2483                         rxcp->vlanf = 0;
2484         }
2485
2486         /* As the compl has been parsed, reset it; we wont touch it again */
2487         compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] = 0;
2488
2489         queue_tail_inc(&rxo->cq);
2490         return rxcp;
2491 }
2492
2493 static inline struct page *be_alloc_pages(u32 size, gfp_t gfp)
2494 {
2495         u32 order = get_order(size);
2496
2497         if (order > 0)
2498                 gfp |= __GFP_COMP;
2499         return  alloc_pages(gfp, order);
2500 }
2501
2502 /*
2503  * Allocate a page, split it to fragments of size rx_frag_size and post as
2504  * receive buffers to BE
2505  */
2506 static void be_post_rx_frags(struct be_rx_obj *rxo, gfp_t gfp, u32 frags_needed)
2507 {
2508         struct be_adapter *adapter = rxo->adapter;
2509         struct be_rx_page_info *page_info = NULL, *prev_page_info = NULL;
2510         struct be_queue_info *rxq = &rxo->q;
2511         struct page *pagep = NULL;
2512         struct device *dev = &adapter->pdev->dev;
2513         struct be_eth_rx_d *rxd;
2514         u64 page_dmaaddr = 0, frag_dmaaddr;
2515         u32 posted, page_offset = 0, notify = 0;
2516
2517         page_info = &rxo->page_info_tbl[rxq->head];
2518         for (posted = 0; posted < frags_needed && !page_info->page; posted++) {
2519                 if (!pagep) {
2520                         pagep = be_alloc_pages(adapter->big_page_size, gfp);
2521                         if (unlikely(!pagep)) {
2522                                 rx_stats(rxo)->rx_post_fail++;
2523                                 break;
2524                         }
2525                         page_dmaaddr = dma_map_page(dev, pagep, 0,
2526                                                     adapter->big_page_size,
2527                                                     DMA_FROM_DEVICE);
2528                         if (dma_mapping_error(dev, page_dmaaddr)) {
2529                                 put_page(pagep);
2530                                 pagep = NULL;
2531                                 adapter->drv_stats.dma_map_errors++;
2532                                 break;
2533                         }
2534                         page_offset = 0;
2535                 } else {
2536                         get_page(pagep);
2537                         page_offset += rx_frag_size;
2538                 }
2539                 page_info->page_offset = page_offset;
2540                 page_info->page = pagep;
2541
2542                 rxd = queue_head_node(rxq);
2543                 frag_dmaaddr = page_dmaaddr + page_info->page_offset;
2544                 rxd->fragpa_lo = cpu_to_le32(frag_dmaaddr & 0xFFFFFFFF);
2545                 rxd->fragpa_hi = cpu_to_le32(upper_32_bits(frag_dmaaddr));
2546
2547                 /* Any space left in the current big page for another frag? */
2548                 if ((page_offset + rx_frag_size + rx_frag_size) >
2549                                         adapter->big_page_size) {
2550                         pagep = NULL;
2551                         page_info->last_frag = true;
2552                         dma_unmap_addr_set(page_info, bus, page_dmaaddr);
2553                 } else {
2554                         dma_unmap_addr_set(page_info, bus, frag_dmaaddr);
2555                 }
2556
2557                 prev_page_info = page_info;
2558                 queue_head_inc(rxq);
2559                 page_info = &rxo->page_info_tbl[rxq->head];
2560         }
2561
2562         /* Mark the last frag of a page when we break out of the above loop
2563          * with no more slots available in the RXQ
2564          */
2565         if (pagep) {
2566                 prev_page_info->last_frag = true;
2567                 dma_unmap_addr_set(prev_page_info, bus, page_dmaaddr);
2568         }
2569
2570         if (posted) {
2571                 atomic_add(posted, &rxq->used);
2572                 if (rxo->rx_post_starved)
2573                         rxo->rx_post_starved = false;
2574                 do {
2575                         notify = min(MAX_NUM_POST_ERX_DB, posted);
2576                         be_rxq_notify(adapter, rxq->id, notify);
2577                         posted -= notify;
2578                 } while (posted);
2579         } else if (atomic_read(&rxq->used) == 0) {
2580                 /* Let be_worker replenish when memory is available */
2581                 rxo->rx_post_starved = true;
2582         }
2583 }
2584
2585 static struct be_tx_compl_info *be_tx_compl_get(struct be_tx_obj *txo)
2586 {
2587         struct be_queue_info *tx_cq = &txo->cq;
2588         struct be_tx_compl_info *txcp = &txo->txcp;
2589         struct be_eth_tx_compl *compl = queue_tail_node(tx_cq);
2590
2591         if (compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] == 0)
2592                 return NULL;
2593
2594         /* Ensure load ordering of valid bit dword and other dwords below */
2595         rmb();
2596         be_dws_le_to_cpu(compl, sizeof(*compl));
2597
2598         txcp->status = GET_TX_COMPL_BITS(status, compl);
2599         txcp->end_index = GET_TX_COMPL_BITS(wrb_index, compl);
2600
2601         compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] = 0;
2602         queue_tail_inc(tx_cq);
2603         return txcp;
2604 }
2605
2606 static u16 be_tx_compl_process(struct be_adapter *adapter,
2607                                struct be_tx_obj *txo, u16 last_index)
2608 {
2609         struct sk_buff **sent_skbs = txo->sent_skb_list;
2610         struct be_queue_info *txq = &txo->q;
2611         struct sk_buff *skb = NULL;
2612         bool unmap_skb_hdr = false;
2613         struct be_eth_wrb *wrb;
2614         u16 num_wrbs = 0;
2615         u32 frag_index;
2616
2617         do {
2618                 if (sent_skbs[txq->tail]) {
2619                         /* Free skb from prev req */
2620                         if (skb)
2621                                 dev_consume_skb_any(skb);
2622                         skb = sent_skbs[txq->tail];
2623                         sent_skbs[txq->tail] = NULL;
2624                         queue_tail_inc(txq);  /* skip hdr wrb */
2625                         num_wrbs++;
2626                         unmap_skb_hdr = true;
2627                 }
2628                 wrb = queue_tail_node(txq);
2629                 frag_index = txq->tail;
2630                 unmap_tx_frag(&adapter->pdev->dev, wrb,
2631                               (unmap_skb_hdr && skb_headlen(skb)));
2632                 unmap_skb_hdr = false;
2633                 queue_tail_inc(txq);
2634                 num_wrbs++;
2635         } while (frag_index != last_index);
2636         dev_consume_skb_any(skb);
2637
2638         return num_wrbs;
2639 }
2640
2641 /* Return the number of events in the event queue */
2642 static inline int events_get(struct be_eq_obj *eqo)
2643 {
2644         struct be_eq_entry *eqe;
2645         int num = 0;
2646
2647         do {
2648                 eqe = queue_tail_node(&eqo->q);
2649                 if (eqe->evt == 0)
2650                         break;
2651
2652                 rmb();
2653                 eqe->evt = 0;
2654                 num++;
2655                 queue_tail_inc(&eqo->q);
2656         } while (true);
2657
2658         return num;
2659 }
2660
2661 /* Leaves the EQ is disarmed state */
2662 static void be_eq_clean(struct be_eq_obj *eqo)
2663 {
2664         int num = events_get(eqo);
2665
2666         be_eq_notify(eqo->adapter, eqo->q.id, false, true, num, 0);
2667 }
2668
2669 /* Free posted rx buffers that were not used */
2670 static void be_rxq_clean(struct be_rx_obj *rxo)
2671 {
2672         struct be_queue_info *rxq = &rxo->q;
2673         struct be_rx_page_info *page_info;
2674
2675         while (atomic_read(&rxq->used) > 0) {
2676                 page_info = get_rx_page_info(rxo);
2677                 put_page(page_info->page);
2678                 memset(page_info, 0, sizeof(*page_info));
2679         }
2680         BUG_ON(atomic_read(&rxq->used));
2681         rxq->tail = 0;
2682         rxq->head = 0;
2683 }
2684
2685 static void be_rx_cq_clean(struct be_rx_obj *rxo)
2686 {
2687         struct be_queue_info *rx_cq = &rxo->cq;
2688         struct be_rx_compl_info *rxcp;
2689         struct be_adapter *adapter = rxo->adapter;
2690         int flush_wait = 0;
2691
2692         /* Consume pending rx completions.
2693          * Wait for the flush completion (identified by zero num_rcvd)
2694          * to arrive. Notify CQ even when there are no more CQ entries
2695          * for HW to flush partially coalesced CQ entries.
2696          * In Lancer, there is no need to wait for flush compl.
2697          */
2698         for (;;) {
2699                 rxcp = be_rx_compl_get(rxo);
2700                 if (!rxcp) {
2701                         if (lancer_chip(adapter))
2702                                 break;
2703
2704                         if (flush_wait++ > 50 ||
2705                             be_check_error(adapter,
2706                                            BE_ERROR_HW)) {
2707                                 dev_warn(&adapter->pdev->dev,
2708                                          "did not receive flush compl\n");
2709                                 break;
2710                         }
2711                         be_cq_notify(adapter, rx_cq->id, true, 0);
2712                         mdelay(1);
2713                 } else {
2714                         be_rx_compl_discard(rxo, rxcp);
2715                         be_cq_notify(adapter, rx_cq->id, false, 1);
2716                         if (rxcp->num_rcvd == 0)
2717                                 break;
2718                 }
2719         }
2720
2721         /* After cleanup, leave the CQ in unarmed state */
2722         be_cq_notify(adapter, rx_cq->id, false, 0);
2723 }
2724
2725 static void be_tx_compl_clean(struct be_adapter *adapter)
2726 {
2727         struct device *dev = &adapter->pdev->dev;
2728         u16 cmpl = 0, timeo = 0, num_wrbs = 0;
2729         struct be_tx_compl_info *txcp;
2730         struct be_queue_info *txq;
2731         u32 end_idx, notified_idx;
2732         struct be_tx_obj *txo;
2733         int i, pending_txqs;
2734
2735         /* Stop polling for compls when HW has been silent for 10ms */
2736         do {
2737                 pending_txqs = adapter->num_tx_qs;
2738
2739                 for_all_tx_queues(adapter, txo, i) {
2740                         cmpl = 0;
2741                         num_wrbs = 0;
2742                         txq = &txo->q;
2743                         while ((txcp = be_tx_compl_get(txo))) {
2744                                 num_wrbs +=
2745                                         be_tx_compl_process(adapter, txo,
2746                                                             txcp->end_index);
2747                                 cmpl++;
2748                         }
2749                         if (cmpl) {
2750                                 be_cq_notify(adapter, txo->cq.id, false, cmpl);
2751                                 atomic_sub(num_wrbs, &txq->used);
2752                                 timeo = 0;
2753                         }
2754                         if (!be_is_tx_compl_pending(txo))
2755                                 pending_txqs--;
2756                 }
2757
2758                 if (pending_txqs == 0 || ++timeo > 10 ||
2759                     be_check_error(adapter, BE_ERROR_HW))
2760                         break;
2761
2762                 mdelay(1);
2763         } while (true);
2764
2765         /* Free enqueued TX that was never notified to HW */
2766         for_all_tx_queues(adapter, txo, i) {
2767                 txq = &txo->q;
2768
2769                 if (atomic_read(&txq->used)) {
2770                         dev_info(dev, "txq%d: cleaning %d pending tx-wrbs\n",
2771                                  i, atomic_read(&txq->used));
2772                         notified_idx = txq->tail;
2773                         end_idx = txq->tail;
2774                         index_adv(&end_idx, atomic_read(&txq->used) - 1,
2775                                   txq->len);
2776                         /* Use the tx-compl process logic to handle requests
2777                          * that were not sent to the HW.
2778                          */
2779                         num_wrbs = be_tx_compl_process(adapter, txo, end_idx);
2780                         atomic_sub(num_wrbs, &txq->used);
2781                         BUG_ON(atomic_read(&txq->used));
2782                         txo->pend_wrb_cnt = 0;
2783                         /* Since hw was never notified of these requests,
2784                          * reset TXQ indices
2785                          */
2786                         txq->head = notified_idx;
2787                         txq->tail = notified_idx;
2788                 }
2789         }
2790 }
2791
2792 static void be_evt_queues_destroy(struct be_adapter *adapter)
2793 {
2794         struct be_eq_obj *eqo;
2795         int i;
2796
2797         for_all_evt_queues(adapter, eqo, i) {
2798                 if (eqo->q.created) {
2799                         be_eq_clean(eqo);
2800                         be_cmd_q_destroy(adapter, &eqo->q, QTYPE_EQ);
2801                         netif_napi_del(&eqo->napi);
2802                         free_cpumask_var(eqo->affinity_mask);
2803                 }
2804                 be_queue_free(adapter, &eqo->q);
2805         }
2806 }
2807
2808 static int be_evt_queues_create(struct be_adapter *adapter)
2809 {
2810         struct be_queue_info *eq;
2811         struct be_eq_obj *eqo;
2812         struct be_aic_obj *aic;
2813         int i, rc;
2814
2815         /* need enough EQs to service both RX and TX queues */
2816         adapter->num_evt_qs = min_t(u16, num_irqs(adapter),
2817                                     max(adapter->cfg_num_rx_irqs,
2818                                         adapter->cfg_num_tx_irqs));
2819
2820         for_all_evt_queues(adapter, eqo, i) {
2821                 int numa_node = dev_to_node(&adapter->pdev->dev);
2822
2823                 aic = &adapter->aic_obj[i];
2824                 eqo->adapter = adapter;
2825                 eqo->idx = i;
2826                 aic->max_eqd = BE_MAX_EQD;
2827                 aic->enable = true;
2828
2829                 eq = &eqo->q;
2830                 rc = be_queue_alloc(adapter, eq, EVNT_Q_LEN,
2831                                     sizeof(struct be_eq_entry));
2832                 if (rc)
2833                         return rc;
2834
2835                 rc = be_cmd_eq_create(adapter, eqo);
2836                 if (rc)
2837                         return rc;
2838
2839                 if (!zalloc_cpumask_var(&eqo->affinity_mask, GFP_KERNEL))
2840                         return -ENOMEM;
2841                 cpumask_set_cpu(cpumask_local_spread(i, numa_node),
2842                                 eqo->affinity_mask);
2843                 netif_napi_add(adapter->netdev, &eqo->napi, be_poll,
2844                                BE_NAPI_WEIGHT);
2845         }
2846         return 0;
2847 }
2848
2849 static void be_mcc_queues_destroy(struct be_adapter *adapter)
2850 {
2851         struct be_queue_info *q;
2852
2853         q = &adapter->mcc_obj.q;
2854         if (q->created)
2855                 be_cmd_q_destroy(adapter, q, QTYPE_MCCQ);
2856         be_queue_free(adapter, q);
2857
2858         q = &adapter->mcc_obj.cq;
2859         if (q->created)
2860                 be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2861         be_queue_free(adapter, q);
2862 }
2863
2864 /* Must be called only after TX qs are created as MCC shares TX EQ */
2865 static int be_mcc_queues_create(struct be_adapter *adapter)
2866 {
2867         struct be_queue_info *q, *cq;
2868
2869         cq = &adapter->mcc_obj.cq;
2870         if (be_queue_alloc(adapter, cq, MCC_CQ_LEN,
2871                            sizeof(struct be_mcc_compl)))
2872                 goto err;
2873
2874         /* Use the default EQ for MCC completions */
2875         if (be_cmd_cq_create(adapter, cq, &mcc_eqo(adapter)->q, true, 0))
2876                 goto mcc_cq_free;
2877
2878         q = &adapter->mcc_obj.q;
2879         if (be_queue_alloc(adapter, q, MCC_Q_LEN, sizeof(struct be_mcc_wrb)))
2880                 goto mcc_cq_destroy;
2881
2882         if (be_cmd_mccq_create(adapter, q, cq))
2883                 goto mcc_q_free;
2884
2885         return 0;
2886
2887 mcc_q_free:
2888         be_queue_free(adapter, q);
2889 mcc_cq_destroy:
2890         be_cmd_q_destroy(adapter, cq, QTYPE_CQ);
2891 mcc_cq_free:
2892         be_queue_free(adapter, cq);
2893 err:
2894         return -1;
2895 }
2896
2897 static void be_tx_queues_destroy(struct be_adapter *adapter)
2898 {
2899         struct be_queue_info *q;
2900         struct be_tx_obj *txo;
2901         u8 i;
2902
2903         for_all_tx_queues(adapter, txo, i) {
2904                 q = &txo->q;
2905                 if (q->created)
2906                         be_cmd_q_destroy(adapter, q, QTYPE_TXQ);
2907                 be_queue_free(adapter, q);
2908
2909                 q = &txo->cq;
2910                 if (q->created)
2911                         be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2912                 be_queue_free(adapter, q);
2913         }
2914 }
2915
2916 static int be_tx_qs_create(struct be_adapter *adapter)
2917 {
2918         struct be_queue_info *cq;
2919         struct be_tx_obj *txo;
2920         struct be_eq_obj *eqo;
2921         int status, i;
2922
2923         adapter->num_tx_qs = min(adapter->num_evt_qs, adapter->cfg_num_tx_irqs);
2924
2925         for_all_tx_queues(adapter, txo, i) {
2926                 cq = &txo->cq;
2927                 status = be_queue_alloc(adapter, cq, TX_CQ_LEN,
2928                                         sizeof(struct be_eth_tx_compl));
2929                 if (status)
2930                         return status;
2931
2932                 u64_stats_init(&txo->stats.sync);
2933                 u64_stats_init(&txo->stats.sync_compl);
2934
2935                 /* If num_evt_qs is less than num_tx_qs, then more than
2936                  * one txq share an eq
2937                  */
2938                 eqo = &adapter->eq_obj[i % adapter->num_evt_qs];
2939                 status = be_cmd_cq_create(adapter, cq, &eqo->q, false, 3);
2940                 if (status)
2941                         return status;
2942
2943                 status = be_queue_alloc(adapter, &txo->q, TX_Q_LEN,
2944                                         sizeof(struct be_eth_wrb));
2945                 if (status)
2946                         return status;
2947
2948                 status = be_cmd_txq_create(adapter, txo);
2949                 if (status)
2950                         return status;
2951
2952                 netif_set_xps_queue(adapter->netdev, eqo->affinity_mask,
2953                                     eqo->idx);
2954         }
2955
2956         dev_info(&adapter->pdev->dev, "created %d TX queue(s)\n",
2957                  adapter->num_tx_qs);
2958         return 0;
2959 }
2960
2961 static void be_rx_cqs_destroy(struct be_adapter *adapter)
2962 {
2963         struct be_queue_info *q;
2964         struct be_rx_obj *rxo;
2965         int i;
2966
2967         for_all_rx_queues(adapter, rxo, i) {
2968                 q = &rxo->cq;
2969                 if (q->created)
2970                         be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2971                 be_queue_free(adapter, q);
2972         }
2973 }
2974
2975 static int be_rx_cqs_create(struct be_adapter *adapter)
2976 {
2977         struct be_queue_info *eq, *cq;
2978         struct be_rx_obj *rxo;
2979         int rc, i;
2980
2981         adapter->num_rss_qs =
2982                         min(adapter->num_evt_qs, adapter->cfg_num_rx_irqs);
2983
2984         /* We'll use RSS only if atleast 2 RSS rings are supported. */
2985         if (adapter->num_rss_qs < 2)
2986                 adapter->num_rss_qs = 0;
2987
2988         adapter->num_rx_qs = adapter->num_rss_qs + adapter->need_def_rxq;
2989
2990         /* When the interface is not capable of RSS rings (and there is no
2991          * need to create a default RXQ) we'll still need one RXQ
2992          */
2993         if (adapter->num_rx_qs == 0)
2994                 adapter->num_rx_qs = 1;
2995
2996         adapter->big_page_size = (1 << get_order(rx_frag_size)) * PAGE_SIZE;
2997         for_all_rx_queues(adapter, rxo, i) {
2998                 rxo->adapter = adapter;
2999                 cq = &rxo->cq;
3000                 rc = be_queue_alloc(adapter, cq, RX_CQ_LEN,
3001                                     sizeof(struct be_eth_rx_compl));
3002                 if (rc)
3003                         return rc;
3004
3005                 u64_stats_init(&rxo->stats.sync);
3006                 eq = &adapter->eq_obj[i % adapter->num_evt_qs].q;
3007                 rc = be_cmd_cq_create(adapter, cq, eq, false, 3);
3008                 if (rc)
3009                         return rc;
3010         }
3011
3012         dev_info(&adapter->pdev->dev,
3013                  "created %d RX queue(s)\n", adapter->num_rx_qs);
3014         return 0;
3015 }
3016
3017 static irqreturn_t be_intx(int irq, void *dev)
3018 {
3019         struct be_eq_obj *eqo = dev;
3020         struct be_adapter *adapter = eqo->adapter;
3021         int num_evts = 0;
3022
3023         /* IRQ is not expected when NAPI is scheduled as the EQ
3024          * will not be armed.
3025          * But, this can happen on Lancer INTx where it takes
3026          * a while to de-assert INTx or in BE2 where occasionaly
3027          * an interrupt may be raised even when EQ is unarmed.
3028          * If NAPI is already scheduled, then counting & notifying
3029          * events will orphan them.
3030          */
3031         if (napi_schedule_prep(&eqo->napi)) {
3032                 num_evts = events_get(eqo);
3033                 __napi_schedule(&eqo->napi);
3034                 if (num_evts)
3035                         eqo->spurious_intr = 0;
3036         }
3037         be_eq_notify(adapter, eqo->q.id, false, true, num_evts, 0);
3038
3039         /* Return IRQ_HANDLED only for the the first spurious intr
3040          * after a valid intr to stop the kernel from branding
3041          * this irq as a bad one!
3042          */
3043         if (num_evts || eqo->spurious_intr++ == 0)
3044                 return IRQ_HANDLED;
3045         else
3046                 return IRQ_NONE;
3047 }
3048
3049 static irqreturn_t be_msix(int irq, void *dev)
3050 {
3051         struct be_eq_obj *eqo = dev;
3052
3053         be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
3054         napi_schedule(&eqo->napi);
3055         return IRQ_HANDLED;
3056 }
3057
3058 static inline bool do_gro(struct be_rx_compl_info *rxcp)
3059 {
3060         return (rxcp->tcpf && !rxcp->err && rxcp->l4_csum) ? true : false;
3061 }
3062
3063 static int be_process_rx(struct be_rx_obj *rxo, struct napi_struct *napi,
3064                          int budget, int polling)
3065 {
3066         struct be_adapter *adapter = rxo->adapter;
3067         struct be_queue_info *rx_cq = &rxo->cq;
3068         struct be_rx_compl_info *rxcp;
3069         u32 work_done;
3070         u32 frags_consumed = 0;
3071
3072         for (work_done = 0; work_done < budget; work_done++) {
3073                 rxcp = be_rx_compl_get(rxo);
3074                 if (!rxcp)
3075                         break;
3076
3077                 /* Is it a flush compl that has no data */
3078                 if (unlikely(rxcp->num_rcvd == 0))
3079                         goto loop_continue;
3080
3081                 /* Discard compl with partial DMA Lancer B0 */
3082                 if (unlikely(!rxcp->pkt_size)) {
3083                         be_rx_compl_discard(rxo, rxcp);
3084                         goto loop_continue;
3085                 }
3086
3087                 /* On BE drop pkts that arrive due to imperfect filtering in
3088                  * promiscuous mode on some skews
3089                  */
3090                 if (unlikely(rxcp->port != adapter->port_num &&
3091                              !lancer_chip(adapter))) {
3092                         be_rx_compl_discard(rxo, rxcp);
3093                         goto loop_continue;
3094                 }
3095
3096                 /* Don't do gro when we're busy_polling */
3097                 if (do_gro(rxcp) && polling != BUSY_POLLING)
3098                         be_rx_compl_process_gro(rxo, napi, rxcp);
3099                 else
3100                         be_rx_compl_process(rxo, napi, rxcp);
3101
3102 loop_continue:
3103                 frags_consumed += rxcp->num_rcvd;
3104                 be_rx_stats_update(rxo, rxcp);
3105         }
3106
3107         if (work_done) {
3108                 be_cq_notify(adapter, rx_cq->id, true, work_done);
3109
3110                 /* When an rx-obj gets into post_starved state, just
3111                  * let be_worker do the posting.
3112                  */
3113                 if (atomic_read(&rxo->q.used) < RX_FRAGS_REFILL_WM &&
3114                     !rxo->rx_post_starved)
3115                         be_post_rx_frags(rxo, GFP_ATOMIC,
3116                                          max_t(u32, MAX_RX_POST,
3117                                                frags_consumed));
3118         }
3119
3120         return work_done;
3121 }
3122
3123 static inline void be_update_tx_err(struct be_tx_obj *txo, u8 status)
3124 {
3125         switch (status) {
3126         case BE_TX_COMP_HDR_PARSE_ERR:
3127                 tx_stats(txo)->tx_hdr_parse_err++;
3128                 break;
3129         case BE_TX_COMP_NDMA_ERR:
3130                 tx_stats(txo)->tx_dma_err++;
3131                 break;
3132         case BE_TX_COMP_ACL_ERR:
3133                 tx_stats(txo)->tx_spoof_check_err++;
3134                 break;
3135         }
3136 }
3137
3138 static inline void lancer_update_tx_err(struct be_tx_obj *txo, u8 status)
3139 {
3140         switch (status) {
3141         case LANCER_TX_COMP_LSO_ERR:
3142                 tx_stats(txo)->tx_tso_err++;
3143                 break;
3144         case LANCER_TX_COMP_HSW_DROP_MAC_ERR:
3145         case LANCER_TX_COMP_HSW_DROP_VLAN_ERR:
3146                 tx_stats(txo)->tx_spoof_check_err++;
3147                 break;
3148         case LANCER_TX_COMP_QINQ_ERR:
3149                 tx_stats(txo)->tx_qinq_err++;
3150                 break;
3151         case LANCER_TX_COMP_PARITY_ERR:
3152                 tx_stats(txo)->tx_internal_parity_err++;
3153                 break;
3154         case LANCER_TX_COMP_DMA_ERR:
3155                 tx_stats(txo)->tx_dma_err++;
3156                 break;
3157         }
3158 }
3159
3160 static void be_process_tx(struct be_adapter *adapter, struct be_tx_obj *txo,
3161                           int idx)
3162 {
3163         int num_wrbs = 0, work_done = 0;
3164         struct be_tx_compl_info *txcp;
3165
3166         while ((txcp = be_tx_compl_get(txo))) {
3167                 num_wrbs += be_tx_compl_process(adapter, txo, txcp->end_index);
3168                 work_done++;
3169
3170                 if (txcp->status) {
3171                         if (lancer_chip(adapter))
3172                                 lancer_update_tx_err(txo, txcp->status);
3173                         else
3174                                 be_update_tx_err(txo, txcp->status);
3175                 }
3176         }
3177
3178         if (work_done) {
3179                 be_cq_notify(adapter, txo->cq.id, true, work_done);
3180                 atomic_sub(num_wrbs, &txo->q.used);
3181
3182                 /* As Tx wrbs have been freed up, wake up netdev queue
3183                  * if it was stopped due to lack of tx wrbs.  */
3184                 if (__netif_subqueue_stopped(adapter->netdev, idx) &&
3185                     be_can_txq_wake(txo)) {
3186                         netif_wake_subqueue(adapter->netdev, idx);
3187                 }
3188
3189                 u64_stats_update_begin(&tx_stats(txo)->sync_compl);
3190                 tx_stats(txo)->tx_compl += work_done;
3191                 u64_stats_update_end(&tx_stats(txo)->sync_compl);
3192         }
3193 }
3194
3195 #ifdef CONFIG_NET_RX_BUSY_POLL
3196 static inline bool be_lock_napi(struct be_eq_obj *eqo)
3197 {
3198         bool status = true;
3199
3200         spin_lock(&eqo->lock); /* BH is already disabled */
3201         if (eqo->state & BE_EQ_LOCKED) {
3202                 WARN_ON(eqo->state & BE_EQ_NAPI);
3203                 eqo->state |= BE_EQ_NAPI_YIELD;
3204                 status = false;
3205         } else {
3206                 eqo->state = BE_EQ_NAPI;
3207         }
3208         spin_unlock(&eqo->lock);
3209         return status;
3210 }
3211
3212 static inline void be_unlock_napi(struct be_eq_obj *eqo)
3213 {
3214         spin_lock(&eqo->lock); /* BH is already disabled */
3215
3216         WARN_ON(eqo->state & (BE_EQ_POLL | BE_EQ_NAPI_YIELD));
3217         eqo->state = BE_EQ_IDLE;
3218
3219         spin_unlock(&eqo->lock);
3220 }
3221
3222 static inline bool be_lock_busy_poll(struct be_eq_obj *eqo)
3223 {
3224         bool status = true;
3225
3226         spin_lock_bh(&eqo->lock);
3227         if (eqo->state & BE_EQ_LOCKED) {
3228                 eqo->state |= BE_EQ_POLL_YIELD;
3229                 status = false;
3230         } else {
3231                 eqo->state |= BE_EQ_POLL;
3232         }
3233         spin_unlock_bh(&eqo->lock);
3234         return status;
3235 }
3236
3237 static inline void be_unlock_busy_poll(struct be_eq_obj *eqo)
3238 {
3239         spin_lock_bh(&eqo->lock);
3240
3241         WARN_ON(eqo->state & (BE_EQ_NAPI));
3242         eqo->state = BE_EQ_IDLE;
3243
3244         spin_unlock_bh(&eqo->lock);
3245 }
3246
3247 static inline void be_enable_busy_poll(struct be_eq_obj *eqo)
3248 {
3249         spin_lock_init(&eqo->lock);
3250         eqo->state = BE_EQ_IDLE;
3251 }
3252
3253 static inline void be_disable_busy_poll(struct be_eq_obj *eqo)
3254 {
3255         local_bh_disable();
3256
3257         /* It's enough to just acquire napi lock on the eqo to stop
3258          * be_busy_poll() from processing any queueus.
3259          */
3260         while (!be_lock_napi(eqo))
3261                 mdelay(1);
3262
3263         local_bh_enable();
3264 }
3265
3266 #else /* CONFIG_NET_RX_BUSY_POLL */
3267
3268 static inline bool be_lock_napi(struct be_eq_obj *eqo)
3269 {
3270         return true;
3271 }
3272
3273 static inline void be_unlock_napi(struct be_eq_obj *eqo)
3274 {
3275 }
3276
3277 static inline bool be_lock_busy_poll(struct be_eq_obj *eqo)
3278 {
3279         return false;
3280 }
3281
3282 static inline void be_unlock_busy_poll(struct be_eq_obj *eqo)
3283 {
3284 }
3285
3286 static inline void be_enable_busy_poll(struct be_eq_obj *eqo)
3287 {
3288 }
3289
3290 static inline void be_disable_busy_poll(struct be_eq_obj *eqo)
3291 {
3292 }
3293 #endif /* CONFIG_NET_RX_BUSY_POLL */
3294
3295 int be_poll(struct napi_struct *napi, int budget)
3296 {
3297         struct be_eq_obj *eqo = container_of(napi, struct be_eq_obj, napi);
3298         struct be_adapter *adapter = eqo->adapter;
3299         int max_work = 0, work, i, num_evts;
3300         struct be_rx_obj *rxo;
3301         struct be_tx_obj *txo;
3302         u32 mult_enc = 0;
3303
3304         num_evts = events_get(eqo);
3305
3306         for_all_tx_queues_on_eq(adapter, eqo, txo, i)
3307                 be_process_tx(adapter, txo, i);
3308
3309         if (be_lock_napi(eqo)) {
3310                 /* This loop will iterate twice for EQ0 in which
3311                  * completions of the last RXQ (default one) are also processed
3312                  * For other EQs the loop iterates only once
3313                  */
3314                 for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
3315                         work = be_process_rx(rxo, napi, budget, NAPI_POLLING);
3316                         max_work = max(work, max_work);
3317                 }
3318                 be_unlock_napi(eqo);
3319         } else {
3320                 max_work = budget;
3321         }
3322
3323         if (is_mcc_eqo(eqo))
3324                 be_process_mcc(adapter);
3325
3326         if (max_work < budget) {
3327                 napi_complete(napi);
3328
3329                 /* Skyhawk EQ_DB has a provision to set the rearm to interrupt
3330                  * delay via a delay multiplier encoding value
3331                  */
3332                 if (skyhawk_chip(adapter))
3333                         mult_enc = be_get_eq_delay_mult_enc(eqo);
3334
3335                 be_eq_notify(adapter, eqo->q.id, true, false, num_evts,
3336                              mult_enc);
3337         } else {
3338                 /* As we'll continue in polling mode, count and clear events */
3339                 be_eq_notify(adapter, eqo->q.id, false, false, num_evts, 0);
3340         }
3341         return max_work;
3342 }
3343
3344 #ifdef CONFIG_NET_RX_BUSY_POLL
3345 static int be_busy_poll(struct napi_struct *napi)
3346 {
3347         struct be_eq_obj *eqo = container_of(napi, struct be_eq_obj, napi);
3348         struct be_adapter *adapter = eqo->adapter;
3349         struct be_rx_obj *rxo;
3350         int i, work = 0;
3351
3352         if (!be_lock_busy_poll(eqo))
3353                 return LL_FLUSH_BUSY;
3354
3355         for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
3356                 work = be_process_rx(rxo, napi, 4, BUSY_POLLING);
3357                 if (work)
3358                         break;
3359         }
3360
3361         be_unlock_busy_poll(eqo);
3362         return work;
3363 }
3364 #endif
3365
3366 void be_detect_error(struct be_adapter *adapter)
3367 {
3368         u32 ue_lo = 0, ue_hi = 0, ue_lo_mask = 0, ue_hi_mask = 0;
3369         u32 sliport_status = 0, sliport_err1 = 0, sliport_err2 = 0;
3370         u32 i;
3371         struct device *dev = &adapter->pdev->dev;
3372
3373         if (be_check_error(adapter, BE_ERROR_HW))
3374                 return;
3375
3376         if (lancer_chip(adapter)) {
3377                 sliport_status = ioread32(adapter->db + SLIPORT_STATUS_OFFSET);
3378                 if (sliport_status & SLIPORT_STATUS_ERR_MASK) {
3379                         be_set_error(adapter, BE_ERROR_UE);
3380                         sliport_err1 = ioread32(adapter->db +
3381                                                 SLIPORT_ERROR1_OFFSET);
3382                         sliport_err2 = ioread32(adapter->db +
3383                                                 SLIPORT_ERROR2_OFFSET);
3384                         /* Do not log error messages if its a FW reset */
3385                         if (sliport_err1 == SLIPORT_ERROR_FW_RESET1 &&
3386                             sliport_err2 == SLIPORT_ERROR_FW_RESET2) {
3387                                 dev_info(dev, "Firmware update in progress\n");
3388                         } else {
3389                                 dev_err(dev, "Error detected in the card\n");
3390                                 dev_err(dev, "ERR: sliport status 0x%x\n",
3391                                         sliport_status);
3392                                 dev_err(dev, "ERR: sliport error1 0x%x\n",
3393                                         sliport_err1);
3394                                 dev_err(dev, "ERR: sliport error2 0x%x\n",
3395                                         sliport_err2);
3396                         }
3397                 }
3398         } else {
3399                 ue_lo = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_LOW);
3400                 ue_hi = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_HIGH);
3401                 ue_lo_mask = ioread32(adapter->pcicfg +
3402                                       PCICFG_UE_STATUS_LOW_MASK);
3403                 ue_hi_mask = ioread32(adapter->pcicfg +
3404                                       PCICFG_UE_STATUS_HI_MASK);
3405
3406                 ue_lo = (ue_lo & ~ue_lo_mask);
3407                 ue_hi = (ue_hi & ~ue_hi_mask);
3408
3409                 /* On certain platforms BE hardware can indicate spurious UEs.
3410                  * Allow HW to stop working completely in case of a real UE.
3411                  * Hence not setting the hw_error for UE detection.
3412                  */
3413
3414                 if (ue_lo || ue_hi) {
3415                         dev_err(dev, "Error detected in the adapter");
3416                         if (skyhawk_chip(adapter))
3417                                 be_set_error(adapter, BE_ERROR_UE);
3418
3419                         for (i = 0; ue_lo; ue_lo >>= 1, i++) {
3420                                 if (ue_lo & 1)
3421                                         dev_err(dev, "UE: %s bit set\n",
3422                                                 ue_status_low_desc[i]);
3423                         }
3424                         for (i = 0; ue_hi; ue_hi >>= 1, i++) {
3425                                 if (ue_hi & 1)
3426                                         dev_err(dev, "UE: %s bit set\n",
3427                                                 ue_status_hi_desc[i]);
3428                         }
3429                 }
3430         }
3431 }
3432
3433 static void be_msix_disable(struct be_adapter *adapter)
3434 {
3435         if (msix_enabled(adapter)) {
3436                 pci_disable_msix(adapter->pdev);
3437                 adapter->num_msix_vec = 0;
3438                 adapter->num_msix_roce_vec = 0;
3439         }
3440 }
3441
3442 static int be_msix_enable(struct be_adapter *adapter)
3443 {
3444         unsigned int i, max_roce_eqs;
3445         struct device *dev = &adapter->pdev->dev;
3446         int num_vec;
3447
3448         /* If RoCE is supported, program the max number of vectors that
3449          * could be used for NIC and RoCE, else, just program the number
3450          * we'll use initially.
3451          */
3452         if (be_roce_supported(adapter)) {
3453                 max_roce_eqs =
3454                         be_max_func_eqs(adapter) - be_max_nic_eqs(adapter);
3455                 max_roce_eqs = min(max_roce_eqs, num_online_cpus());
3456                 num_vec = be_max_any_irqs(adapter) + max_roce_eqs;
3457         } else {
3458                 num_vec = max(adapter->cfg_num_rx_irqs,
3459                               adapter->cfg_num_tx_irqs);
3460         }
3461
3462         for (i = 0; i < num_vec; i++)
3463                 adapter->msix_entries[i].entry = i;
3464
3465         num_vec = pci_enable_msix_range(adapter->pdev, adapter->msix_entries,
3466                                         MIN_MSIX_VECTORS, num_vec);
3467         if (num_vec < 0)
3468                 goto fail;
3469
3470         if (be_roce_supported(adapter) && num_vec > MIN_MSIX_VECTORS) {
3471                 adapter->num_msix_roce_vec = num_vec / 2;
3472                 dev_info(dev, "enabled %d MSI-x vector(s) for RoCE\n",
3473                          adapter->num_msix_roce_vec);
3474         }
3475
3476         adapter->num_msix_vec = num_vec - adapter->num_msix_roce_vec;
3477
3478         dev_info(dev, "enabled %d MSI-x vector(s) for NIC\n",
3479                  adapter->num_msix_vec);
3480         return 0;
3481
3482 fail:
3483         dev_warn(dev, "MSIx enable failed\n");
3484
3485         /* INTx is not supported in VFs, so fail probe if enable_msix fails */
3486         if (be_virtfn(adapter))
3487                 return num_vec;
3488         return 0;
3489 }
3490
3491 static inline int be_msix_vec_get(struct be_adapter *adapter,
3492                                   struct be_eq_obj *eqo)
3493 {
3494         return adapter->msix_entries[eqo->msix_idx].vector;
3495 }
3496
3497 static int be_msix_register(struct be_adapter *adapter)
3498 {
3499         struct net_device *netdev = adapter->netdev;
3500         struct be_eq_obj *eqo;
3501         int status, i, vec;
3502
3503         for_all_evt_queues(adapter, eqo, i) {
3504                 sprintf(eqo->desc, "%s-q%d", netdev->name, i);
3505                 vec = be_msix_vec_get(adapter, eqo);
3506                 status = request_irq(vec, be_msix, 0, eqo->desc, eqo);
3507                 if (status)
3508                         goto err_msix;
3509
3510                 irq_set_affinity_hint(vec, eqo->affinity_mask);
3511         }
3512
3513         return 0;
3514 err_msix:
3515         for (i--; i >= 0; i--) {
3516                 eqo = &adapter->eq_obj[i];
3517                 free_irq(be_msix_vec_get(adapter, eqo), eqo);
3518         }
3519         dev_warn(&adapter->pdev->dev, "MSIX Request IRQ failed - err %d\n",
3520                  status);
3521         be_msix_disable(adapter);
3522         return status;
3523 }
3524
3525 static int be_irq_register(struct be_adapter *adapter)
3526 {
3527         struct net_device *netdev = adapter->netdev;
3528         int status;
3529
3530         if (msix_enabled(adapter)) {
3531                 status = be_msix_register(adapter);
3532                 if (status == 0)
3533                         goto done;
3534                 /* INTx is not supported for VF */
3535                 if (be_virtfn(adapter))
3536                         return status;
3537         }
3538
3539         /* INTx: only the first EQ is used */
3540         netdev->irq = adapter->pdev->irq;
3541         status = request_irq(netdev->irq, be_intx, IRQF_SHARED, netdev->name,
3542                              &adapter->eq_obj[0]);
3543         if (status) {
3544                 dev_err(&adapter->pdev->dev,
3545                         "INTx request IRQ failed - err %d\n", status);
3546                 return status;
3547         }
3548 done:
3549         adapter->isr_registered = true;
3550         return 0;
3551 }
3552
3553 static void be_irq_unregister(struct be_adapter *adapter)
3554 {
3555         struct net_device *netdev = adapter->netdev;
3556         struct be_eq_obj *eqo;
3557         int i, vec;
3558
3559         if (!adapter->isr_registered)
3560                 return;
3561
3562         /* INTx */
3563         if (!msix_enabled(adapter)) {
3564                 free_irq(netdev->irq, &adapter->eq_obj[0]);
3565                 goto done;
3566         }
3567
3568         /* MSIx */
3569         for_all_evt_queues(adapter, eqo, i) {
3570                 vec = be_msix_vec_get(adapter, eqo);
3571                 irq_set_affinity_hint(vec, NULL);
3572                 free_irq(vec, eqo);
3573         }
3574
3575 done:
3576         adapter->isr_registered = false;
3577 }
3578
3579 static void be_rx_qs_destroy(struct be_adapter *adapter)
3580 {
3581         struct rss_info *rss = &adapter->rss_info;
3582         struct be_queue_info *q;
3583         struct be_rx_obj *rxo;
3584         int i;
3585
3586         for_all_rx_queues(adapter, rxo, i) {
3587                 q = &rxo->q;
3588                 if (q->created) {
3589                         /* If RXQs are destroyed while in an "out of buffer"
3590                          * state, there is a possibility of an HW stall on
3591                          * Lancer. So, post 64 buffers to each queue to relieve
3592                          * the "out of buffer" condition.
3593                          * Make sure there's space in the RXQ before posting.
3594                          */
3595                         if (lancer_chip(adapter)) {
3596                                 be_rx_cq_clean(rxo);
3597                                 if (atomic_read(&q->used) == 0)
3598                                         be_post_rx_frags(rxo, GFP_KERNEL,
3599                                                          MAX_RX_POST);
3600                         }
3601
3602                         be_cmd_rxq_destroy(adapter, q);
3603                         be_rx_cq_clean(rxo);
3604                         be_rxq_clean(rxo);
3605                 }
3606                 be_queue_free(adapter, q);
3607         }
3608
3609         if (rss->rss_flags) {
3610                 rss->rss_flags = RSS_ENABLE_NONE;
3611                 be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3612                                   128, rss->rss_hkey);
3613         }
3614 }
3615
3616 static void be_disable_if_filters(struct be_adapter *adapter)
3617 {
3618         /* Don't delete MAC on BE3 VFs without FILTMGMT privilege  */
3619         if (!BEx_chip(adapter) || !be_virtfn(adapter) ||
3620             check_privilege(adapter, BE_PRIV_FILTMGMT))
3621                 be_dev_mac_del(adapter, adapter->pmac_id[0]);
3622
3623         be_clear_uc_list(adapter);
3624         be_clear_mc_list(adapter);
3625
3626         /* The IFACE flags are enabled in the open path and cleared
3627          * in the close path. When a VF gets detached from the host and
3628          * assigned to a VM the following happens:
3629          *      - VF's IFACE flags get cleared in the detach path
3630          *      - IFACE create is issued by the VF in the attach path
3631          * Due to a bug in the BE3/Skyhawk-R FW
3632          * (Lancer FW doesn't have the bug), the IFACE capability flags
3633          * specified along with the IFACE create cmd issued by a VF are not
3634          * honoured by FW.  As a consequence, if a *new* driver
3635          * (that enables/disables IFACE flags in open/close)
3636          * is loaded in the host and an *old* driver is * used by a VM/VF,
3637          * the IFACE gets created *without* the needed flags.
3638          * To avoid this, disable RX-filter flags only for Lancer.
3639          */
3640         if (lancer_chip(adapter)) {
3641                 be_cmd_rx_filter(adapter, BE_IF_ALL_FILT_FLAGS, OFF);
3642                 adapter->if_flags &= ~BE_IF_ALL_FILT_FLAGS;
3643         }
3644 }
3645
3646 static int be_close(struct net_device *netdev)
3647 {
3648         struct be_adapter *adapter = netdev_priv(netdev);
3649         struct be_eq_obj *eqo;
3650         int i;
3651
3652         /* This protection is needed as be_close() may be called even when the
3653          * adapter is in cleared state (after eeh perm failure)
3654          */
3655         if (!(adapter->flags & BE_FLAGS_SETUP_DONE))
3656                 return 0;
3657
3658         /* Before attempting cleanup ensure all the pending cmds in the
3659          * config_wq have finished execution
3660          */
3661         flush_workqueue(be_wq);
3662
3663         be_disable_if_filters(adapter);
3664
3665         if (adapter->flags & BE_FLAGS_NAPI_ENABLED) {
3666                 for_all_evt_queues(adapter, eqo, i) {
3667                         napi_disable(&eqo->napi);
3668                         be_disable_busy_poll(eqo);
3669                 }
3670                 adapter->flags &= ~BE_FLAGS_NAPI_ENABLED;
3671         }
3672
3673         be_async_mcc_disable(adapter);
3674
3675         /* Wait for all pending tx completions to arrive so that
3676          * all tx skbs are freed.
3677          */
3678         netif_tx_disable(netdev);
3679         be_tx_compl_clean(adapter);
3680
3681         be_rx_qs_destroy(adapter);
3682
3683         for_all_evt_queues(adapter, eqo, i) {
3684                 if (msix_enabled(adapter))
3685                         synchronize_irq(be_msix_vec_get(adapter, eqo));
3686                 else
3687                         synchronize_irq(netdev->irq);
3688                 be_eq_clean(eqo);
3689         }
3690
3691         be_irq_unregister(adapter);
3692
3693         return 0;
3694 }
3695
3696 static int be_rx_qs_create(struct be_adapter *adapter)
3697 {
3698         struct rss_info *rss = &adapter->rss_info;
3699         u8 rss_key[RSS_HASH_KEY_LEN];
3700         struct be_rx_obj *rxo;
3701         int rc, i, j;
3702
3703         for_all_rx_queues(adapter, rxo, i) {
3704                 rc = be_queue_alloc(adapter, &rxo->q, RX_Q_LEN,
3705                                     sizeof(struct be_eth_rx_d));
3706                 if (rc)
3707                         return rc;
3708         }
3709
3710         if (adapter->need_def_rxq || !adapter->num_rss_qs) {
3711                 rxo = default_rxo(adapter);
3712                 rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3713                                        rx_frag_size, adapter->if_handle,
3714                                        false, &rxo->rss_id);
3715                 if (rc)
3716                         return rc;
3717         }
3718
3719         for_all_rss_queues(adapter, rxo, i) {
3720                 rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3721                                        rx_frag_size, adapter->if_handle,
3722                                        true, &rxo->rss_id);
3723                 if (rc)
3724                         return rc;
3725         }
3726
3727         if (be_multi_rxq(adapter)) {
3728                 for (j = 0; j < RSS_INDIR_TABLE_LEN; j += adapter->num_rss_qs) {
3729                         for_all_rss_queues(adapter, rxo, i) {
3730                                 if ((j + i) >= RSS_INDIR_TABLE_LEN)
3731                                         break;
3732                                 rss->rsstable[j + i] = rxo->rss_id;
3733                                 rss->rss_queue[j + i] = i;
3734                         }
3735                 }
3736                 rss->rss_flags = RSS_ENABLE_TCP_IPV4 | RSS_ENABLE_IPV4 |
3737                         RSS_ENABLE_TCP_IPV6 | RSS_ENABLE_IPV6;
3738
3739                 if (!BEx_chip(adapter))
3740                         rss->rss_flags |= RSS_ENABLE_UDP_IPV4 |
3741                                 RSS_ENABLE_UDP_IPV6;
3742
3743                 netdev_rss_key_fill(rss_key, RSS_HASH_KEY_LEN);
3744                 rc = be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3745                                        RSS_INDIR_TABLE_LEN, rss_key);
3746                 if (rc) {
3747                         rss->rss_flags = RSS_ENABLE_NONE;
3748                         return rc;
3749                 }
3750
3751                 memcpy(rss->rss_hkey, rss_key, RSS_HASH_KEY_LEN);
3752         } else {
3753                 /* Disable RSS, if only default RX Q is created */
3754                 rss->rss_flags = RSS_ENABLE_NONE;
3755         }
3756
3757
3758         /* Post 1 less than RXQ-len to avoid head being equal to tail,
3759          * which is a queue empty condition
3760          */
3761         for_all_rx_queues(adapter, rxo, i)
3762                 be_post_rx_frags(rxo, GFP_KERNEL, RX_Q_LEN - 1);
3763
3764         return 0;
3765 }
3766
3767 static int be_enable_if_filters(struct be_adapter *adapter)
3768 {
3769         int status;
3770
3771         status = be_cmd_rx_filter(adapter, BE_IF_FILT_FLAGS_BASIC, ON);
3772         if (status)
3773                 return status;
3774
3775         /* Don't add MAC on BE3 VFs without FILTMGMT privilege */
3776         if (!BEx_chip(adapter) || !be_virtfn(adapter) ||
3777             check_privilege(adapter, BE_PRIV_FILTMGMT)) {
3778                 status = be_dev_mac_add(adapter, adapter->netdev->dev_addr);
3779                 if (status)
3780                         return status;
3781                 ether_addr_copy(adapter->dev_mac, adapter->netdev->dev_addr);
3782         }
3783
3784         if (adapter->vlans_added)
3785                 be_vid_config(adapter);
3786
3787         __be_set_rx_mode(adapter);
3788
3789         return 0;
3790 }
3791
3792 static int be_open(struct net_device *netdev)
3793 {
3794         struct be_adapter *adapter = netdev_priv(netdev);
3795         struct be_eq_obj *eqo;
3796         struct be_rx_obj *rxo;
3797         struct be_tx_obj *txo;
3798         u8 link_status;
3799         int status, i;
3800
3801         status = be_rx_qs_create(adapter);
3802         if (status)
3803                 goto err;
3804
3805         status = be_enable_if_filters(adapter);
3806         if (status)
3807                 goto err;
3808
3809         status = be_irq_register(adapter);
3810         if (status)
3811                 goto err;
3812
3813         for_all_rx_queues(adapter, rxo, i)
3814                 be_cq_notify(adapter, rxo->cq.id, true, 0);
3815
3816         for_all_tx_queues(adapter, txo, i)
3817                 be_cq_notify(adapter, txo->cq.id, true, 0);
3818
3819         be_async_mcc_enable(adapter);
3820
3821         for_all_evt_queues(adapter, eqo, i) {
3822                 napi_enable(&eqo->napi);
3823                 be_enable_busy_poll(eqo);
3824                 be_eq_notify(adapter, eqo->q.id, true, true, 0, 0);
3825         }
3826         adapter->flags |= BE_FLAGS_NAPI_ENABLED;
3827
3828         status = be_cmd_link_status_query(adapter, NULL, &link_status, 0);
3829         if (!status)
3830                 be_link_status_update(adapter, link_status);
3831
3832         netif_tx_start_all_queues(netdev);
3833         if (skyhawk_chip(adapter))
3834                 udp_tunnel_get_rx_info(netdev);
3835
3836         return 0;
3837 err:
3838         be_close(adapter->netdev);
3839         return -EIO;
3840 }
3841
3842 static void be_vf_eth_addr_generate(struct be_adapter *adapter, u8 *mac)
3843 {
3844         u32 addr;
3845
3846         addr = jhash(adapter->netdev->dev_addr, ETH_ALEN, 0);
3847
3848         mac[5] = (u8)(addr & 0xFF);
3849         mac[4] = (u8)((addr >> 8) & 0xFF);
3850         mac[3] = (u8)((addr >> 16) & 0xFF);
3851         /* Use the OUI from the current MAC address */
3852         memcpy(mac, adapter->netdev->dev_addr, 3);
3853 }
3854
3855 /*
3856  * Generate a seed MAC address from the PF MAC Address using jhash.
3857  * MAC Address for VFs are assigned incrementally starting from the seed.
3858  * These addresses are programmed in the ASIC by the PF and the VF driver
3859  * queries for the MAC address during its probe.
3860  */
3861 static int be_vf_eth_addr_config(struct be_adapter *adapter)
3862 {
3863         u32 vf;
3864         int status = 0;
3865         u8 mac[ETH_ALEN];
3866         struct be_vf_cfg *vf_cfg;
3867
3868         be_vf_eth_addr_generate(adapter, mac);
3869
3870         for_all_vfs(adapter, vf_cfg, vf) {
3871                 if (BEx_chip(adapter))
3872                         status = be_cmd_pmac_add(adapter, mac,
3873                                                  vf_cfg->if_handle,
3874                                                  &vf_cfg->pmac_id, vf + 1);
3875                 else
3876                         status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
3877                                                 vf + 1);
3878
3879                 if (status)
3880                         dev_err(&adapter->pdev->dev,
3881                                 "Mac address assignment failed for VF %d\n",
3882                                 vf);
3883                 else
3884                         memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3885
3886                 mac[5] += 1;
3887         }
3888         return status;
3889 }
3890
3891 static int be_vfs_mac_query(struct be_adapter *adapter)
3892 {
3893         int status, vf;
3894         u8 mac[ETH_ALEN];
3895         struct be_vf_cfg *vf_cfg;
3896
3897         for_all_vfs(adapter, vf_cfg, vf) {
3898                 status = be_cmd_get_active_mac(adapter, vf_cfg->pmac_id,
3899                                                mac, vf_cfg->if_handle,
3900                                                false, vf+1);
3901                 if (status)
3902                         return status;
3903                 memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3904         }
3905         return 0;
3906 }
3907
3908 static void be_vf_clear(struct be_adapter *adapter)
3909 {
3910         struct be_vf_cfg *vf_cfg;
3911         u32 vf;
3912
3913         if (pci_vfs_assigned(adapter->pdev)) {
3914                 dev_warn(&adapter->pdev->dev,
3915                          "VFs are assigned to VMs: not disabling VFs\n");
3916                 goto done;
3917         }
3918
3919         pci_disable_sriov(adapter->pdev);
3920
3921         for_all_vfs(adapter, vf_cfg, vf) {
3922                 if (BEx_chip(adapter))
3923                         be_cmd_pmac_del(adapter, vf_cfg->if_handle,
3924                                         vf_cfg->pmac_id, vf + 1);
3925                 else
3926                         be_cmd_set_mac(adapter, NULL, vf_cfg->if_handle,
3927                                        vf + 1);
3928
3929                 be_cmd_if_destroy(adapter, vf_cfg->if_handle, vf + 1);
3930         }
3931
3932         if (BE3_chip(adapter))
3933                 be_cmd_set_hsw_config(adapter, 0, 0,
3934                                       adapter->if_handle,
3935                                       PORT_FWD_TYPE_PASSTHRU, 0);
3936 done:
3937         kfree(adapter->vf_cfg);
3938         adapter->num_vfs = 0;
3939         adapter->flags &= ~BE_FLAGS_SRIOV_ENABLED;
3940 }
3941
3942 static void be_clear_queues(struct be_adapter *adapter)
3943 {
3944         be_mcc_queues_destroy(adapter);
3945         be_rx_cqs_destroy(adapter);
3946         be_tx_queues_destroy(adapter);
3947         be_evt_queues_destroy(adapter);
3948 }
3949
3950 static void be_cancel_worker(struct be_adapter *adapter)
3951 {
3952         if (adapter->flags & BE_FLAGS_WORKER_SCHEDULED) {
3953                 cancel_delayed_work_sync(&adapter->work);
3954                 adapter->flags &= ~BE_FLAGS_WORKER_SCHEDULED;
3955         }
3956 }
3957
3958 static void be_cancel_err_detection(struct be_adapter *adapter)
3959 {
3960         struct be_error_recovery *err_rec = &adapter->error_recovery;
3961
3962         if (!be_err_recovery_workq)
3963                 return;
3964
3965         if (adapter->flags & BE_FLAGS_ERR_DETECTION_SCHEDULED) {
3966                 cancel_delayed_work_sync(&err_rec->err_detection_work);
3967                 adapter->flags &= ~BE_FLAGS_ERR_DETECTION_SCHEDULED;
3968         }
3969 }
3970
3971 static void be_disable_vxlan_offloads(struct be_adapter *adapter)
3972 {
3973         struct net_device *netdev = adapter->netdev;
3974
3975         if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS)
3976                 be_cmd_manage_iface(adapter, adapter->if_handle,
3977                                     OP_CONVERT_TUNNEL_TO_NORMAL);
3978
3979         if (adapter->vxlan_port)
3980                 be_cmd_set_vxlan_port(adapter, 0);
3981
3982         adapter->flags &= ~BE_FLAGS_VXLAN_OFFLOADS;
3983         adapter->vxlan_port = 0;
3984
3985         netdev->hw_enc_features = 0;
3986         netdev->hw_features &= ~(NETIF_F_GSO_UDP_TUNNEL);
3987         netdev->features &= ~(NETIF_F_GSO_UDP_TUNNEL);
3988 }
3989
3990 static void be_calculate_vf_res(struct be_adapter *adapter, u16 num_vfs,
3991                                 struct be_resources *vft_res)
3992 {
3993         struct be_resources res = adapter->pool_res;
3994         u32 vf_if_cap_flags = res.vf_if_cap_flags;
3995         struct be_resources res_mod = {0};
3996         u16 num_vf_qs = 1;
3997
3998         /* Distribute the queue resources among the PF and it's VFs */
3999         if (num_vfs) {
4000                 /* Divide the rx queues evenly among the VFs and the PF, capped
4001                  * at VF-EQ-count. Any remainder queues belong to the PF.
4002                  */
4003                 num_vf_qs = min(SH_VF_MAX_NIC_EQS,
4004                                 res.max_rss_qs / (num_vfs + 1));
4005
4006                 /* Skyhawk-R chip supports only MAX_PORT_RSS_TABLES
4007                  * RSS Tables per port. Provide RSS on VFs, only if number of
4008                  * VFs requested is less than it's PF Pool's RSS Tables limit.
4009                  */
4010                 if (num_vfs >= be_max_pf_pool_rss_tables(adapter))
4011                         num_vf_qs = 1;
4012         }
4013
4014         /* Resource with fields set to all '1's by GET_PROFILE_CONFIG cmd,
4015          * which are modifiable using SET_PROFILE_CONFIG cmd.
4016          */
4017         be_cmd_get_profile_config(adapter, &res_mod, NULL, ACTIVE_PROFILE_TYPE,
4018                                   RESOURCE_MODIFIABLE, 0);
4019
4020         /* If RSS IFACE capability flags are modifiable for a VF, set the
4021          * capability flag as valid and set RSS and DEFQ_RSS IFACE flags if
4022          * more than 1 RSSQ is available for a VF.
4023          * Otherwise, provision only 1 queue pair for VF.
4024          */
4025         if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_RSS) {
4026                 vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
4027                 if (num_vf_qs > 1) {
4028                         vf_if_cap_flags |= BE_IF_FLAGS_RSS;
4029                         if (res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS)
4030                                 vf_if_cap_flags |= BE_IF_FLAGS_DEFQ_RSS;
4031                 } else {
4032                         vf_if_cap_flags &= ~(BE_IF_FLAGS_RSS |
4033                                              BE_IF_FLAGS_DEFQ_RSS);
4034                 }
4035         } else {
4036                 num_vf_qs = 1;
4037         }
4038
4039         if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
4040                 vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
4041                 vf_if_cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4042         }
4043
4044         vft_res->vf_if_cap_flags = vf_if_cap_flags;
4045         vft_res->max_rx_qs = num_vf_qs;
4046         vft_res->max_rss_qs = num_vf_qs;
4047         vft_res->max_tx_qs = res.max_tx_qs / (num_vfs + 1);
4048         vft_res->max_cq_count = res.max_cq_count / (num_vfs + 1);
4049
4050         /* Distribute unicast MACs, VLANs, IFACE count and MCCQ count equally
4051          * among the PF and it's VFs, if the fields are changeable
4052          */
4053         if (res_mod.max_uc_mac == FIELD_MODIFIABLE)
4054                 vft_res->max_uc_mac = res.max_uc_mac / (num_vfs + 1);
4055
4056         if (res_mod.max_vlans == FIELD_MODIFIABLE)
4057                 vft_res->max_vlans = res.max_vlans / (num_vfs + 1);
4058
4059         if (res_mod.max_iface_count == FIELD_MODIFIABLE)
4060                 vft_res->max_iface_count = res.max_iface_count / (num_vfs + 1);
4061
4062         if (res_mod.max_mcc_count == FIELD_MODIFIABLE)
4063                 vft_res->max_mcc_count = res.max_mcc_count / (num_vfs + 1);
4064 }
4065
4066 static void be_if_destroy(struct be_adapter *adapter)
4067 {
4068         be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4069
4070         kfree(adapter->pmac_id);
4071         adapter->pmac_id = NULL;
4072
4073         kfree(adapter->mc_list);
4074         adapter->mc_list = NULL;
4075
4076         kfree(adapter->uc_list);
4077         adapter->uc_list = NULL;
4078 }
4079
4080 static int be_clear(struct be_adapter *adapter)
4081 {
4082         struct pci_dev *pdev = adapter->pdev;
4083         struct  be_resources vft_res = {0};
4084
4085         be_cancel_worker(adapter);
4086
4087         flush_workqueue(be_wq);
4088
4089         if (sriov_enabled(adapter))
4090                 be_vf_clear(adapter);
4091
4092         /* Re-configure FW to distribute resources evenly across max-supported
4093          * number of VFs, only when VFs are not already enabled.
4094          */
4095         if (skyhawk_chip(adapter) && be_physfn(adapter) &&
4096             !pci_vfs_assigned(pdev)) {
4097                 be_calculate_vf_res(adapter,
4098                                     pci_sriov_get_totalvfs(pdev),
4099                                     &vft_res);
4100                 be_cmd_set_sriov_config(adapter, adapter->pool_res,
4101                                         pci_sriov_get_totalvfs(pdev),
4102                                         &vft_res);
4103         }
4104
4105         be_disable_vxlan_offloads(adapter);
4106
4107         be_if_destroy(adapter);
4108
4109         be_clear_queues(adapter);
4110
4111         be_msix_disable(adapter);
4112         adapter->flags &= ~BE_FLAGS_SETUP_DONE;
4113         return 0;
4114 }
4115
4116 static int be_vfs_if_create(struct be_adapter *adapter)
4117 {
4118         struct be_resources res = {0};
4119         u32 cap_flags, en_flags, vf;
4120         struct be_vf_cfg *vf_cfg;
4121         int status;
4122
4123         /* If a FW profile exists, then cap_flags are updated */
4124         cap_flags = BE_VF_IF_EN_FLAGS;
4125
4126         for_all_vfs(adapter, vf_cfg, vf) {
4127                 if (!BE3_chip(adapter)) {
4128                         status = be_cmd_get_profile_config(adapter, &res, NULL,
4129                                                            ACTIVE_PROFILE_TYPE,
4130                                                            RESOURCE_LIMITS,
4131                                                            vf + 1);
4132                         if (!status) {
4133                                 cap_flags = res.if_cap_flags;
4134                                 /* Prevent VFs from enabling VLAN promiscuous
4135                                  * mode
4136                                  */
4137                                 cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4138                         }
4139                 }
4140
4141                 /* PF should enable IF flags during proxy if_create call */
4142                 en_flags = cap_flags & BE_VF_IF_EN_FLAGS;
4143                 status = be_cmd_if_create(adapter, cap_flags, en_flags,
4144                                           &vf_cfg->if_handle, vf + 1);
4145                 if (status)
4146                         return status;
4147         }
4148
4149         return 0;
4150 }
4151
4152 static int be_vf_setup_init(struct be_adapter *adapter)
4153 {
4154         struct be_vf_cfg *vf_cfg;
4155         int vf;
4156
4157         adapter->vf_cfg = kcalloc(adapter->num_vfs, sizeof(*vf_cfg),
4158                                   GFP_KERNEL);
4159         if (!adapter->vf_cfg)
4160                 return -ENOMEM;
4161
4162         for_all_vfs(adapter, vf_cfg, vf) {
4163                 vf_cfg->if_handle = -1;
4164                 vf_cfg->pmac_id = -1;
4165         }
4166         return 0;
4167 }
4168
4169 static int be_vf_setup(struct be_adapter *adapter)
4170 {
4171         struct device *dev = &adapter->pdev->dev;
4172         struct be_vf_cfg *vf_cfg;
4173         int status, old_vfs, vf;
4174         bool spoofchk;
4175
4176         old_vfs = pci_num_vf(adapter->pdev);
4177
4178         status = be_vf_setup_init(adapter);
4179         if (status)
4180                 goto err;
4181
4182         if (old_vfs) {
4183                 for_all_vfs(adapter, vf_cfg, vf) {
4184                         status = be_cmd_get_if_id(adapter, vf_cfg, vf);
4185                         if (status)
4186                                 goto err;
4187                 }
4188
4189                 status = be_vfs_mac_query(adapter);
4190                 if (status)
4191                         goto err;
4192         } else {
4193                 status = be_vfs_if_create(adapter);
4194                 if (status)
4195                         goto err;
4196
4197                 status = be_vf_eth_addr_config(adapter);
4198                 if (status)
4199                         goto err;
4200         }
4201
4202         for_all_vfs(adapter, vf_cfg, vf) {
4203                 /* Allow VFs to programs MAC/VLAN filters */
4204                 status = be_cmd_get_fn_privileges(adapter, &vf_cfg->privileges,
4205                                                   vf + 1);
4206                 if (!status && !(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
4207                         status = be_cmd_set_fn_privileges(adapter,
4208                                                           vf_cfg->privileges |
4209                                                           BE_PRIV_FILTMGMT,
4210                                                           vf + 1);
4211                         if (!status) {
4212                                 vf_cfg->privileges |= BE_PRIV_FILTMGMT;
4213                                 dev_info(dev, "VF%d has FILTMGMT privilege\n",
4214                                          vf);
4215                         }
4216                 }
4217
4218                 /* Allow full available bandwidth */
4219                 if (!old_vfs)
4220                         be_cmd_config_qos(adapter, 0, 0, vf + 1);
4221
4222                 status = be_cmd_get_hsw_config(adapter, NULL, vf + 1,
4223                                                vf_cfg->if_handle, NULL,
4224                                                &spoofchk);
4225                 if (!status)
4226                         vf_cfg->spoofchk = spoofchk;
4227
4228                 if (!old_vfs) {
4229                         be_cmd_enable_vf(adapter, vf + 1);
4230                         be_cmd_set_logical_link_config(adapter,
4231                                                        IFLA_VF_LINK_STATE_AUTO,
4232                                                        vf+1);
4233                 }
4234         }
4235
4236         if (!old_vfs) {
4237                 status = pci_enable_sriov(adapter->pdev, adapter->num_vfs);
4238                 if (status) {
4239                         dev_err(dev, "SRIOV enable failed\n");
4240                         adapter->num_vfs = 0;
4241                         goto err;
4242                 }
4243         }
4244
4245         if (BE3_chip(adapter)) {
4246                 /* On BE3, enable VEB only when SRIOV is enabled */
4247                 status = be_cmd_set_hsw_config(adapter, 0, 0,
4248                                                adapter->if_handle,
4249                                                PORT_FWD_TYPE_VEB, 0);
4250                 if (status)
4251                         goto err;
4252         }
4253
4254         adapter->flags |= BE_FLAGS_SRIOV_ENABLED;
4255         return 0;
4256 err:
4257         dev_err(dev, "VF setup failed\n");
4258         be_vf_clear(adapter);
4259         return status;
4260 }
4261
4262 /* Converting function_mode bits on BE3 to SH mc_type enums */
4263
4264 static u8 be_convert_mc_type(u32 function_mode)
4265 {
4266         if (function_mode & VNIC_MODE && function_mode & QNQ_MODE)
4267                 return vNIC1;
4268         else if (function_mode & QNQ_MODE)
4269                 return FLEX10;
4270         else if (function_mode & VNIC_MODE)
4271                 return vNIC2;
4272         else if (function_mode & UMC_ENABLED)
4273                 return UMC;
4274         else
4275                 return MC_NONE;
4276 }
4277
4278 /* On BE2/BE3 FW does not suggest the supported limits */
4279 static void BEx_get_resources(struct be_adapter *adapter,
4280                               struct be_resources *res)
4281 {
4282         bool use_sriov = adapter->num_vfs ? 1 : 0;
4283
4284         if (be_physfn(adapter))
4285                 res->max_uc_mac = BE_UC_PMAC_COUNT;
4286         else
4287                 res->max_uc_mac = BE_VF_UC_PMAC_COUNT;
4288
4289         adapter->mc_type = be_convert_mc_type(adapter->function_mode);
4290
4291         if (be_is_mc(adapter)) {
4292                 /* Assuming that there are 4 channels per port,
4293                  * when multi-channel is enabled
4294                  */
4295                 if (be_is_qnq_mode(adapter))
4296                         res->max_vlans = BE_NUM_VLANS_SUPPORTED/8;
4297                 else
4298                         /* In a non-qnq multichannel mode, the pvid
4299                          * takes up one vlan entry
4300                          */
4301                         res->max_vlans = (BE_NUM_VLANS_SUPPORTED / 4) - 1;
4302         } else {
4303                 res->max_vlans = BE_NUM_VLANS_SUPPORTED;
4304         }
4305
4306         res->max_mcast_mac = BE_MAX_MC;
4307
4308         /* 1) For BE3 1Gb ports, FW does not support multiple TXQs
4309          * 2) Create multiple TX rings on a BE3-R multi-channel interface
4310          *    *only* if it is RSS-capable.
4311          */
4312         if (BE2_chip(adapter) || use_sriov ||  (adapter->port_num > 1) ||
4313             be_virtfn(adapter) ||
4314             (be_is_mc(adapter) &&
4315              !(adapter->function_caps & BE_FUNCTION_CAPS_RSS))) {
4316                 res->max_tx_qs = 1;
4317         } else if (adapter->function_caps & BE_FUNCTION_CAPS_SUPER_NIC) {
4318                 struct be_resources super_nic_res = {0};
4319
4320                 /* On a SuperNIC profile, the driver needs to use the
4321                  * GET_PROFILE_CONFIG cmd to query the per-function TXQ limits
4322                  */
4323                 be_cmd_get_profile_config(adapter, &super_nic_res, NULL,
4324                                           ACTIVE_PROFILE_TYPE, RESOURCE_LIMITS,
4325                                           0);
4326                 /* Some old versions of BE3 FW don't report max_tx_qs value */
4327                 res->max_tx_qs = super_nic_res.max_tx_qs ? : BE3_MAX_TX_QS;
4328         } else {
4329                 res->max_tx_qs = BE3_MAX_TX_QS;
4330         }
4331
4332         if ((adapter->function_caps & BE_FUNCTION_CAPS_RSS) &&
4333             !use_sriov && be_physfn(adapter))
4334                 res->max_rss_qs = (adapter->be3_native) ?
4335                                            BE3_MAX_RSS_QS : BE2_MAX_RSS_QS;
4336         res->max_rx_qs = res->max_rss_qs + 1;
4337
4338         if (be_physfn(adapter))
4339                 res->max_evt_qs = (be_max_vfs(adapter) > 0) ?
4340                                         BE3_SRIOV_MAX_EVT_QS : BE3_MAX_EVT_QS;
4341         else
4342                 res->max_evt_qs = 1;
4343
4344         res->if_cap_flags = BE_IF_CAP_FLAGS_WANT;
4345         res->if_cap_flags &= ~BE_IF_FLAGS_DEFQ_RSS;
4346         if (!(adapter->function_caps & BE_FUNCTION_CAPS_RSS))
4347                 res->if_cap_flags &= ~BE_IF_FLAGS_RSS;
4348 }
4349
4350 static void be_setup_init(struct be_adapter *adapter)
4351 {
4352         adapter->vlan_prio_bmap = 0xff;
4353         adapter->phy.link_speed = -1;
4354         adapter->if_handle = -1;
4355         adapter->be3_native = false;
4356         adapter->if_flags = 0;
4357         adapter->phy_state = BE_UNKNOWN_PHY_STATE;
4358         if (be_physfn(adapter))
4359                 adapter->cmd_privileges = MAX_PRIVILEGES;
4360         else
4361                 adapter->cmd_privileges = MIN_PRIVILEGES;
4362 }
4363
4364 /* HW supports only MAX_PORT_RSS_TABLES RSS Policy Tables per port.
4365  * However, this HW limitation is not exposed to the host via any SLI cmd.
4366  * As a result, in the case of SRIOV and in particular multi-partition configs
4367  * the driver needs to calcuate a proportional share of RSS Tables per PF-pool
4368  * for distribution between the VFs. This self-imposed limit will determine the
4369  * no: of VFs for which RSS can be enabled.
4370  */
4371 static void be_calculate_pf_pool_rss_tables(struct be_adapter *adapter)
4372 {
4373         struct be_port_resources port_res = {0};
4374         u8 rss_tables_on_port;
4375         u16 max_vfs = be_max_vfs(adapter);
4376
4377         be_cmd_get_profile_config(adapter, NULL, &port_res, SAVED_PROFILE_TYPE,
4378                                   RESOURCE_LIMITS, 0);
4379
4380         rss_tables_on_port = MAX_PORT_RSS_TABLES - port_res.nic_pfs;
4381
4382         /* Each PF Pool's RSS Tables limit =
4383          * PF's Max VFs / Total_Max_VFs on Port * RSS Tables on Port
4384          */
4385         adapter->pool_res.max_rss_tables =
4386                 max_vfs * rss_tables_on_port / port_res.max_vfs;
4387 }
4388
4389 static int be_get_sriov_config(struct be_adapter *adapter)
4390 {
4391         struct be_resources res = {0};
4392         int max_vfs, old_vfs;
4393
4394         be_cmd_get_profile_config(adapter, &res, NULL, ACTIVE_PROFILE_TYPE,
4395                                   RESOURCE_LIMITS, 0);
4396
4397         /* Some old versions of BE3 FW don't report max_vfs value */
4398         if (BE3_chip(adapter) && !res.max_vfs) {
4399                 max_vfs = pci_sriov_get_totalvfs(adapter->pdev);
4400                 res.max_vfs = max_vfs > 0 ? min(MAX_VFS, max_vfs) : 0;
4401         }
4402
4403         adapter->pool_res = res;
4404
4405         /* If during previous unload of the driver, the VFs were not disabled,
4406          * then we cannot rely on the PF POOL limits for the TotalVFs value.
4407          * Instead use the TotalVFs value stored in the pci-dev struct.
4408          */
4409         old_vfs = pci_num_vf(adapter->pdev);
4410         if (old_vfs) {
4411                 dev_info(&adapter->pdev->dev, "%d VFs are already enabled\n",
4412                          old_vfs);
4413
4414                 adapter->pool_res.max_vfs =
4415                         pci_sriov_get_totalvfs(adapter->pdev);
4416                 adapter->num_vfs = old_vfs;
4417         }
4418
4419         if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4420                 be_calculate_pf_pool_rss_tables(adapter);
4421                 dev_info(&adapter->pdev->dev,
4422                          "RSS can be enabled for all VFs if num_vfs <= %d\n",
4423                          be_max_pf_pool_rss_tables(adapter));
4424         }
4425         return 0;
4426 }
4427
4428 static void be_alloc_sriov_res(struct be_adapter *adapter)
4429 {
4430         int old_vfs = pci_num_vf(adapter->pdev);
4431         struct  be_resources vft_res = {0};
4432         int status;
4433
4434         be_get_sriov_config(adapter);
4435
4436         if (!old_vfs)
4437                 pci_sriov_set_totalvfs(adapter->pdev, be_max_vfs(adapter));
4438
4439         /* When the HW is in SRIOV capable configuration, the PF-pool
4440          * resources are given to PF during driver load, if there are no
4441          * old VFs. This facility is not available in BE3 FW.
4442          * Also, this is done by FW in Lancer chip.
4443          */
4444         if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4445                 be_calculate_vf_res(adapter, 0, &vft_res);
4446                 status = be_cmd_set_sriov_config(adapter, adapter->pool_res, 0,
4447                                                  &vft_res);
4448                 if (status)
4449                         dev_err(&adapter->pdev->dev,
4450                                 "Failed to optimize SRIOV resources\n");
4451         }
4452 }
4453
4454 static int be_get_resources(struct be_adapter *adapter)
4455 {
4456         struct device *dev = &adapter->pdev->dev;
4457         struct be_resources res = {0};
4458         int status;
4459
4460         /* For Lancer, SH etc read per-function resource limits from FW.
4461          * GET_FUNC_CONFIG returns per function guaranteed limits.
4462          * GET_PROFILE_CONFIG returns PCI-E related limits PF-pool limits
4463          */
4464         if (BEx_chip(adapter)) {
4465                 BEx_get_resources(adapter, &res);
4466         } else {
4467                 status = be_cmd_get_func_config(adapter, &res);
4468                 if (status)
4469                         return status;
4470
4471                 /* If a deafault RXQ must be created, we'll use up one RSSQ*/
4472                 if (res.max_rss_qs && res.max_rss_qs == res.max_rx_qs &&
4473                     !(res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS))
4474                         res.max_rss_qs -= 1;
4475         }
4476
4477         /* If RoCE is supported stash away half the EQs for RoCE */
4478         res.max_nic_evt_qs = be_roce_supported(adapter) ?
4479                                 res.max_evt_qs / 2 : res.max_evt_qs;
4480         adapter->res = res;
4481
4482         /* If FW supports RSS default queue, then skip creating non-RSS
4483          * queue for non-IP traffic.
4484          */
4485         adapter->need_def_rxq = (be_if_cap_flags(adapter) &
4486                                  BE_IF_FLAGS_DEFQ_RSS) ? 0 : 1;
4487
4488         dev_info(dev, "Max: txqs %d, rxqs %d, rss %d, eqs %d, vfs %d\n",
4489                  be_max_txqs(adapter), be_max_rxqs(adapter),
4490                  be_max_rss(adapter), be_max_nic_eqs(adapter),
4491                  be_max_vfs(adapter));
4492         dev_info(dev, "Max: uc-macs %d, mc-macs %d, vlans %d\n",
4493                  be_max_uc(adapter), be_max_mc(adapter),
4494                  be_max_vlans(adapter));
4495
4496         /* Ensure RX and TX queues are created in pairs at init time */
4497         adapter->cfg_num_rx_irqs =
4498                                 min_t(u16, netif_get_num_default_rss_queues(),
4499                                       be_max_qp_irqs(adapter));
4500         adapter->cfg_num_tx_irqs = adapter->cfg_num_rx_irqs;
4501         return 0;
4502 }
4503
4504 static int be_get_config(struct be_adapter *adapter)
4505 {
4506         int status, level;
4507         u16 profile_id;
4508
4509         status = be_cmd_get_cntl_attributes(adapter);
4510         if (status)
4511                 return status;
4512
4513         status = be_cmd_query_fw_cfg(adapter);
4514         if (status)
4515                 return status;
4516
4517         if (!lancer_chip(adapter) && be_physfn(adapter))
4518                 be_cmd_get_fat_dump_len(adapter, &adapter->fat_dump_len);
4519
4520         if (BEx_chip(adapter)) {
4521                 level = be_cmd_get_fw_log_level(adapter);
4522                 adapter->msg_enable =
4523                         level <= FW_LOG_LEVEL_DEFAULT ? NETIF_MSG_HW : 0;
4524         }
4525
4526         be_cmd_get_acpi_wol_cap(adapter);
4527         pci_enable_wake(adapter->pdev, PCI_D3hot, adapter->wol_en);
4528         pci_enable_wake(adapter->pdev, PCI_D3cold, adapter->wol_en);
4529
4530         be_cmd_query_port_name(adapter);
4531
4532         if (be_physfn(adapter)) {
4533                 status = be_cmd_get_active_profile(adapter, &profile_id);
4534                 if (!status)
4535                         dev_info(&adapter->pdev->dev,
4536                                  "Using profile 0x%x\n", profile_id);
4537         }
4538
4539         return 0;
4540 }
4541
4542 static int be_mac_setup(struct be_adapter *adapter)
4543 {
4544         u8 mac[ETH_ALEN];
4545         int status;
4546
4547         if (is_zero_ether_addr(adapter->netdev->dev_addr)) {
4548                 status = be_cmd_get_perm_mac(adapter, mac);
4549                 if (status)
4550                         return status;
4551
4552                 memcpy(adapter->netdev->dev_addr, mac, ETH_ALEN);
4553                 memcpy(adapter->netdev->perm_addr, mac, ETH_ALEN);
4554         }
4555
4556         return 0;
4557 }
4558
4559 static void be_schedule_worker(struct be_adapter *adapter)
4560 {
4561         queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
4562         adapter->flags |= BE_FLAGS_WORKER_SCHEDULED;
4563 }
4564
4565 static void be_destroy_err_recovery_workq(void)
4566 {
4567         if (!be_err_recovery_workq)
4568                 return;
4569
4570         flush_workqueue(be_err_recovery_workq);
4571         destroy_workqueue(be_err_recovery_workq);
4572         be_err_recovery_workq = NULL;
4573 }
4574
4575 static void be_schedule_err_detection(struct be_adapter *adapter, u32 delay)
4576 {
4577         struct be_error_recovery *err_rec = &adapter->error_recovery;
4578
4579         if (!be_err_recovery_workq)
4580                 return;
4581
4582         queue_delayed_work(be_err_recovery_workq, &err_rec->err_detection_work,
4583                            msecs_to_jiffies(delay));
4584         adapter->flags |= BE_FLAGS_ERR_DETECTION_SCHEDULED;
4585 }
4586
4587 static int be_setup_queues(struct be_adapter *adapter)
4588 {
4589         struct net_device *netdev = adapter->netdev;
4590         int status;
4591
4592         status = be_evt_queues_create(adapter);
4593         if (status)
4594                 goto err;
4595
4596         status = be_tx_qs_create(adapter);
4597         if (status)
4598                 goto err;
4599
4600         status = be_rx_cqs_create(adapter);
4601         if (status)
4602                 goto err;
4603
4604         status = be_mcc_queues_create(adapter);
4605         if (status)
4606                 goto err;
4607
4608         status = netif_set_real_num_rx_queues(netdev, adapter->num_rx_qs);
4609         if (status)
4610                 goto err;
4611
4612         status = netif_set_real_num_tx_queues(netdev, adapter->num_tx_qs);
4613         if (status)
4614                 goto err;
4615
4616         return 0;
4617 err:
4618         dev_err(&adapter->pdev->dev, "queue_setup failed\n");
4619         return status;
4620 }
4621
4622 static int be_if_create(struct be_adapter *adapter)
4623 {
4624         u32 en_flags = BE_IF_FLAGS_RSS | BE_IF_FLAGS_DEFQ_RSS;
4625         u32 cap_flags = be_if_cap_flags(adapter);
4626         int status;
4627
4628         /* alloc required memory for other filtering fields */
4629         adapter->pmac_id = kcalloc(be_max_uc(adapter),
4630                                    sizeof(*adapter->pmac_id), GFP_KERNEL);
4631         if (!adapter->pmac_id)
4632                 return -ENOMEM;
4633
4634         adapter->mc_list = kcalloc(be_max_mc(adapter),
4635                                    sizeof(*adapter->mc_list), GFP_KERNEL);
4636         if (!adapter->mc_list)
4637                 return -ENOMEM;
4638
4639         adapter->uc_list = kcalloc(be_max_uc(adapter),
4640                                    sizeof(*adapter->uc_list), GFP_KERNEL);
4641         if (!adapter->uc_list)
4642                 return -ENOMEM;
4643
4644         if (adapter->cfg_num_rx_irqs == 1)
4645                 cap_flags &= ~(BE_IF_FLAGS_DEFQ_RSS | BE_IF_FLAGS_RSS);
4646
4647         en_flags &= cap_flags;
4648         /* will enable all the needed filter flags in be_open() */
4649         status = be_cmd_if_create(adapter, be_if_cap_flags(adapter), en_flags,
4650                                   &adapter->if_handle, 0);
4651
4652         if (status)
4653                 return status;
4654
4655         return 0;
4656 }
4657
4658 int be_update_queues(struct be_adapter *adapter)
4659 {
4660         struct net_device *netdev = adapter->netdev;
4661         int status;
4662
4663         if (netif_running(netdev))
4664                 be_close(netdev);
4665
4666         be_cancel_worker(adapter);
4667
4668         /* If any vectors have been shared with RoCE we cannot re-program
4669          * the MSIx table.
4670          */
4671         if (!adapter->num_msix_roce_vec)
4672                 be_msix_disable(adapter);
4673
4674         be_clear_queues(adapter);
4675         status = be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4676         if (status)
4677                 return status;
4678
4679         if (!msix_enabled(adapter)) {
4680                 status = be_msix_enable(adapter);
4681                 if (status)
4682                         return status;
4683         }
4684
4685         status = be_if_create(adapter);
4686         if (status)
4687                 return status;
4688
4689         status = be_setup_queues(adapter);
4690         if (status)
4691                 return status;
4692
4693         be_schedule_worker(adapter);
4694
4695         if (netif_running(netdev))
4696                 status = be_open(netdev);
4697
4698         return status;
4699 }
4700
4701 static inline int fw_major_num(const char *fw_ver)
4702 {
4703         int fw_major = 0, i;
4704
4705         i = sscanf(fw_ver, "%d.", &fw_major);
4706         if (i != 1)
4707                 return 0;
4708
4709         return fw_major;
4710 }
4711
4712 /* If it is error recovery, FLR the PF
4713  * Else if any VFs are already enabled don't FLR the PF
4714  */
4715 static bool be_reset_required(struct be_adapter *adapter)
4716 {
4717         if (be_error_recovering(adapter))
4718                 return true;
4719         else
4720                 return pci_num_vf(adapter->pdev) == 0;
4721 }
4722
4723 /* Wait for the FW to be ready and perform the required initialization */
4724 static int be_func_init(struct be_adapter *adapter)
4725 {
4726         int status;
4727
4728         status = be_fw_wait_ready(adapter);
4729         if (status)
4730                 return status;
4731
4732         /* FW is now ready; clear errors to allow cmds/doorbell */
4733         be_clear_error(adapter, BE_CLEAR_ALL);
4734
4735         if (be_reset_required(adapter)) {
4736                 status = be_cmd_reset_function(adapter);
4737                 if (status)
4738                         return status;
4739
4740                 /* Wait for interrupts to quiesce after an FLR */
4741                 msleep(100);
4742         }
4743
4744         /* Tell FW we're ready to fire cmds */
4745         status = be_cmd_fw_init(adapter);
4746         if (status)
4747                 return status;
4748
4749         /* Allow interrupts for other ULPs running on NIC function */
4750         be_intr_set(adapter, true);
4751
4752         return 0;
4753 }
4754
4755 static int be_setup(struct be_adapter *adapter)
4756 {
4757         struct device *dev = &adapter->pdev->dev;
4758         int status;
4759
4760         status = be_func_init(adapter);
4761         if (status)
4762                 return status;
4763
4764         be_setup_init(adapter);
4765
4766         if (!lancer_chip(adapter))
4767                 be_cmd_req_native_mode(adapter);
4768
4769         /* invoke this cmd first to get pf_num and vf_num which are needed
4770          * for issuing profile related cmds
4771          */
4772         if (!BEx_chip(adapter)) {
4773                 status = be_cmd_get_func_config(adapter, NULL);
4774                 if (status)
4775                         return status;
4776         }
4777
4778         status = be_get_config(adapter);
4779         if (status)
4780                 goto err;
4781
4782         if (!BE2_chip(adapter) && be_physfn(adapter))
4783                 be_alloc_sriov_res(adapter);
4784
4785         status = be_get_resources(adapter);
4786         if (status)
4787                 goto err;
4788
4789         status = be_msix_enable(adapter);
4790         if (status)
4791                 goto err;
4792
4793         /* will enable all the needed filter flags in be_open() */
4794         status = be_if_create(adapter);
4795         if (status)
4796                 goto err;
4797
4798         /* Updating real_num_tx/rx_queues() requires rtnl_lock() */
4799         rtnl_lock();
4800         status = be_setup_queues(adapter);
4801         rtnl_unlock();
4802         if (status)
4803                 goto err;
4804
4805         be_cmd_get_fn_privileges(adapter, &adapter->cmd_privileges, 0);
4806
4807         status = be_mac_setup(adapter);
4808         if (status)
4809                 goto err;
4810
4811         be_cmd_get_fw_ver(adapter);
4812         dev_info(dev, "FW version is %s\n", adapter->fw_ver);
4813
4814         if (BE2_chip(adapter) && fw_major_num(adapter->fw_ver) < 4) {
4815                 dev_err(dev, "Firmware on card is old(%s), IRQs may not work",
4816                         adapter->fw_ver);
4817                 dev_err(dev, "Please upgrade firmware to version >= 4.0\n");
4818         }
4819
4820         status = be_cmd_set_flow_control(adapter, adapter->tx_fc,
4821                                          adapter->rx_fc);
4822         if (status)
4823                 be_cmd_get_flow_control(adapter, &adapter->tx_fc,
4824                                         &adapter->rx_fc);
4825
4826         dev_info(&adapter->pdev->dev, "HW Flow control - TX:%d RX:%d\n",
4827                  adapter->tx_fc, adapter->rx_fc);
4828
4829         if (be_physfn(adapter))
4830                 be_cmd_set_logical_link_config(adapter,
4831                                                IFLA_VF_LINK_STATE_AUTO, 0);
4832
4833         /* BE3 EVB echoes broadcast/multicast packets back to PF's vport
4834          * confusing a linux bridge or OVS that it might be connected to.
4835          * Set the EVB to PASSTHRU mode which effectively disables the EVB
4836          * when SRIOV is not enabled.
4837          */
4838         if (BE3_chip(adapter))
4839                 be_cmd_set_hsw_config(adapter, 0, 0, adapter->if_handle,
4840                                       PORT_FWD_TYPE_PASSTHRU, 0);
4841
4842         if (adapter->num_vfs)
4843                 be_vf_setup(adapter);
4844
4845         status = be_cmd_get_phy_info(adapter);
4846         if (!status && be_pause_supported(adapter))
4847                 adapter->phy.fc_autoneg = 1;
4848
4849         if (be_physfn(adapter) && !lancer_chip(adapter))
4850                 be_cmd_set_features(adapter);
4851
4852         be_schedule_worker(adapter);
4853         adapter->flags |= BE_FLAGS_SETUP_DONE;
4854         return 0;
4855 err:
4856         be_clear(adapter);
4857         return status;
4858 }
4859
4860 #ifdef CONFIG_NET_POLL_CONTROLLER
4861 static void be_netpoll(struct net_device *netdev)
4862 {
4863         struct be_adapter *adapter = netdev_priv(netdev);
4864         struct be_eq_obj *eqo;
4865         int i;
4866
4867         for_all_evt_queues(adapter, eqo, i) {
4868                 be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
4869                 napi_schedule(&eqo->napi);
4870         }
4871 }
4872 #endif
4873
4874 int be_load_fw(struct be_adapter *adapter, u8 *fw_file)
4875 {
4876         const struct firmware *fw;
4877         int status;
4878
4879         if (!netif_running(adapter->netdev)) {
4880                 dev_err(&adapter->pdev->dev,
4881                         "Firmware load not allowed (interface is down)\n");
4882                 return -ENETDOWN;
4883         }
4884
4885         status = request_firmware(&fw, fw_file, &adapter->pdev->dev);
4886         if (status)
4887                 goto fw_exit;
4888
4889         dev_info(&adapter->pdev->dev, "Flashing firmware file %s\n", fw_file);
4890
4891         if (lancer_chip(adapter))
4892                 status = lancer_fw_download(adapter, fw);
4893         else
4894                 status = be_fw_download(adapter, fw);
4895
4896         if (!status)
4897                 be_cmd_get_fw_ver(adapter);
4898
4899 fw_exit:
4900         release_firmware(fw);
4901         return status;
4902 }
4903
4904 static int be_ndo_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh,
4905                                  u16 flags)
4906 {
4907         struct be_adapter *adapter = netdev_priv(dev);
4908         struct nlattr *attr, *br_spec;
4909         int rem;
4910         int status = 0;
4911         u16 mode = 0;
4912
4913         if (!sriov_enabled(adapter))
4914                 return -EOPNOTSUPP;
4915
4916         br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
4917         if (!br_spec)
4918                 return -EINVAL;
4919
4920         nla_for_each_nested(attr, br_spec, rem) {
4921                 if (nla_type(attr) != IFLA_BRIDGE_MODE)
4922                         continue;
4923
4924                 if (nla_len(attr) < sizeof(mode))
4925                         return -EINVAL;
4926
4927                 mode = nla_get_u16(attr);
4928                 if (BE3_chip(adapter) && mode == BRIDGE_MODE_VEPA)
4929                         return -EOPNOTSUPP;
4930
4931                 if (mode != BRIDGE_MODE_VEPA && mode != BRIDGE_MODE_VEB)
4932                         return -EINVAL;
4933
4934                 status = be_cmd_set_hsw_config(adapter, 0, 0,
4935                                                adapter->if_handle,
4936                                                mode == BRIDGE_MODE_VEPA ?
4937                                                PORT_FWD_TYPE_VEPA :
4938                                                PORT_FWD_TYPE_VEB, 0);
4939                 if (status)
4940                         goto err;
4941
4942                 dev_info(&adapter->pdev->dev, "enabled switch mode: %s\n",
4943                          mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
4944
4945                 return status;
4946         }
4947 err:
4948         dev_err(&adapter->pdev->dev, "Failed to set switch mode %s\n",
4949                 mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
4950
4951         return status;
4952 }
4953
4954 static int be_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
4955                                  struct net_device *dev, u32 filter_mask,
4956                                  int nlflags)
4957 {
4958         struct be_adapter *adapter = netdev_priv(dev);
4959         int status = 0;
4960         u8 hsw_mode;
4961
4962         /* BE and Lancer chips support VEB mode only */
4963         if (BEx_chip(adapter) || lancer_chip(adapter)) {
4964                 /* VEB is disabled in non-SR-IOV profiles on BE3/Lancer */
4965                 if (!pci_sriov_get_totalvfs(adapter->pdev))
4966                         return 0;
4967                 hsw_mode = PORT_FWD_TYPE_VEB;
4968         } else {
4969                 status = be_cmd_get_hsw_config(adapter, NULL, 0,
4970                                                adapter->if_handle, &hsw_mode,
4971                                                NULL);
4972                 if (status)
4973                         return 0;
4974
4975                 if (hsw_mode == PORT_FWD_TYPE_PASSTHRU)
4976                         return 0;
4977         }
4978
4979         return ndo_dflt_bridge_getlink(skb, pid, seq, dev,
4980                                        hsw_mode == PORT_FWD_TYPE_VEPA ?
4981                                        BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB,
4982                                        0, 0, nlflags, filter_mask, NULL);
4983 }
4984
4985 static struct be_cmd_work *be_alloc_work(struct be_adapter *adapter,
4986                                          void (*func)(struct work_struct *))
4987 {
4988         struct be_cmd_work *work;
4989
4990         work = kzalloc(sizeof(*work), GFP_ATOMIC);
4991         if (!work) {
4992                 dev_err(&adapter->pdev->dev,
4993                         "be_work memory allocation failed\n");
4994                 return NULL;
4995         }
4996
4997         INIT_WORK(&work->work, func);
4998         work->adapter = adapter;
4999         return work;
5000 }
5001
5002 /* VxLAN offload Notes:
5003  *
5004  * The stack defines tunnel offload flags (hw_enc_features) for IP and doesn't
5005  * distinguish various types of transports (VxLAN, GRE, NVGRE ..). So, offload
5006  * is expected to work across all types of IP tunnels once exported. Skyhawk
5007  * supports offloads for either VxLAN or NVGRE, exclusively. So we export VxLAN
5008  * offloads in hw_enc_features only when a VxLAN port is added. If other (non
5009  * VxLAN) tunnels are configured while VxLAN offloads are enabled, offloads for
5010  * those other tunnels are unexported on the fly through ndo_features_check().
5011  *
5012  * Skyhawk supports VxLAN offloads only for one UDP dport. So, if the stack
5013  * adds more than one port, disable offloads and don't re-enable them again
5014  * until after all the tunnels are removed.
5015  */
5016 static void be_work_add_vxlan_port(struct work_struct *work)
5017 {
5018         struct be_cmd_work *cmd_work =
5019                                 container_of(work, struct be_cmd_work, work);
5020         struct be_adapter *adapter = cmd_work->adapter;
5021         struct net_device *netdev = adapter->netdev;
5022         struct device *dev = &adapter->pdev->dev;
5023         __be16 port = cmd_work->info.vxlan_port;
5024         int status;
5025
5026         if (adapter->vxlan_port == port && adapter->vxlan_port_count) {
5027                 adapter->vxlan_port_aliases++;
5028                 goto done;
5029         }
5030
5031         if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS) {
5032                 dev_info(dev,
5033                          "Only one UDP port supported for VxLAN offloads\n");
5034                 dev_info(dev, "Disabling VxLAN offloads\n");
5035                 adapter->vxlan_port_count++;
5036                 goto err;
5037         }
5038
5039         if (adapter->vxlan_port_count++ >= 1)
5040                 goto done;
5041
5042         status = be_cmd_manage_iface(adapter, adapter->if_handle,
5043                                      OP_CONVERT_NORMAL_TO_TUNNEL);
5044         if (status) {
5045                 dev_warn(dev, "Failed to convert normal interface to tunnel\n");
5046                 goto err;
5047         }
5048
5049         status = be_cmd_set_vxlan_port(adapter, port);
5050         if (status) {
5051                 dev_warn(dev, "Failed to add VxLAN port\n");
5052                 goto err;
5053         }
5054         adapter->flags |= BE_FLAGS_VXLAN_OFFLOADS;
5055         adapter->vxlan_port = port;
5056
5057         netdev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
5058                                    NETIF_F_TSO | NETIF_F_TSO6 |
5059                                    NETIF_F_GSO_UDP_TUNNEL;
5060         netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL;
5061         netdev->features |= NETIF_F_GSO_UDP_TUNNEL;
5062
5063         dev_info(dev, "Enabled VxLAN offloads for UDP port %d\n",
5064                  be16_to_cpu(port));
5065         goto done;
5066 err:
5067         be_disable_vxlan_offloads(adapter);
5068 done:
5069         kfree(cmd_work);
5070 }
5071
5072 static void be_work_del_vxlan_port(struct work_struct *work)
5073 {
5074         struct be_cmd_work *cmd_work =
5075                                 container_of(work, struct be_cmd_work, work);
5076         struct be_adapter *adapter = cmd_work->adapter;
5077         __be16 port = cmd_work->info.vxlan_port;
5078
5079         if (adapter->vxlan_port != port)
5080                 goto done;
5081
5082         if (adapter->vxlan_port_aliases) {
5083                 adapter->vxlan_port_aliases--;
5084                 goto out;
5085         }
5086
5087         be_disable_vxlan_offloads(adapter);
5088
5089         dev_info(&adapter->pdev->dev,
5090                  "Disabled VxLAN offloads for UDP port %d\n",
5091                  be16_to_cpu(port));
5092 done:
5093         adapter->vxlan_port_count--;
5094 out:
5095         kfree(cmd_work);
5096 }
5097
5098 static void be_cfg_vxlan_port(struct net_device *netdev,
5099                               struct udp_tunnel_info *ti,
5100                               void (*func)(struct work_struct *))
5101 {
5102         struct be_adapter *adapter = netdev_priv(netdev);
5103         struct be_cmd_work *cmd_work;
5104
5105         if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
5106                 return;
5107
5108         if (lancer_chip(adapter) || BEx_chip(adapter) || be_is_mc(adapter))
5109                 return;
5110
5111         cmd_work = be_alloc_work(adapter, func);
5112         if (cmd_work) {
5113                 cmd_work->info.vxlan_port = ti->port;
5114                 queue_work(be_wq, &cmd_work->work);
5115         }
5116 }
5117
5118 static void be_del_vxlan_port(struct net_device *netdev,
5119                               struct udp_tunnel_info *ti)
5120 {
5121         be_cfg_vxlan_port(netdev, ti, be_work_del_vxlan_port);
5122 }
5123
5124 static void be_add_vxlan_port(struct net_device *netdev,
5125                               struct udp_tunnel_info *ti)
5126 {
5127         be_cfg_vxlan_port(netdev, ti, be_work_add_vxlan_port);
5128 }
5129
5130 static netdev_features_t be_features_check(struct sk_buff *skb,
5131                                            struct net_device *dev,
5132                                            netdev_features_t features)
5133 {
5134         struct be_adapter *adapter = netdev_priv(dev);
5135         u8 l4_hdr = 0;
5136
5137         /* The code below restricts offload features for some tunneled packets.
5138          * Offload features for normal (non tunnel) packets are unchanged.
5139          */
5140         if (!skb->encapsulation ||
5141             !(adapter->flags & BE_FLAGS_VXLAN_OFFLOADS))
5142                 return features;
5143
5144         /* It's an encapsulated packet and VxLAN offloads are enabled. We
5145          * should disable tunnel offload features if it's not a VxLAN packet,
5146          * as tunnel offloads have been enabled only for VxLAN. This is done to
5147          * allow other tunneled traffic like GRE work fine while VxLAN
5148          * offloads are configured in Skyhawk-R.
5149          */
5150         switch (vlan_get_protocol(skb)) {
5151         case htons(ETH_P_IP):
5152                 l4_hdr = ip_hdr(skb)->protocol;
5153                 break;
5154         case htons(ETH_P_IPV6):
5155                 l4_hdr = ipv6_hdr(skb)->nexthdr;
5156                 break;
5157         default:
5158                 return features;
5159         }
5160
5161         if (l4_hdr != IPPROTO_UDP ||
5162             skb->inner_protocol_type != ENCAP_TYPE_ETHER ||
5163             skb->inner_protocol != htons(ETH_P_TEB) ||
5164             skb_inner_mac_header(skb) - skb_transport_header(skb) !=
5165                 sizeof(struct udphdr) + sizeof(struct vxlanhdr) ||
5166             !adapter->vxlan_port ||
5167             udp_hdr(skb)->dest != adapter->vxlan_port)
5168                 return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
5169
5170         return features;
5171 }
5172
5173 static int be_get_phys_port_id(struct net_device *dev,
5174                                struct netdev_phys_item_id *ppid)
5175 {
5176         int i, id_len = CNTL_SERIAL_NUM_WORDS * CNTL_SERIAL_NUM_WORD_SZ + 1;
5177         struct be_adapter *adapter = netdev_priv(dev);
5178         u8 *id;
5179
5180         if (MAX_PHYS_ITEM_ID_LEN < id_len)
5181                 return -ENOSPC;
5182
5183         ppid->id[0] = adapter->hba_port_num + 1;
5184         id = &ppid->id[1];
5185         for (i = CNTL_SERIAL_NUM_WORDS - 1; i >= 0;
5186              i--, id += CNTL_SERIAL_NUM_WORD_SZ)
5187                 memcpy(id, &adapter->serial_num[i], CNTL_SERIAL_NUM_WORD_SZ);
5188
5189         ppid->id_len = id_len;
5190
5191         return 0;
5192 }
5193
5194 static void be_set_rx_mode(struct net_device *dev)
5195 {
5196         struct be_adapter *adapter = netdev_priv(dev);
5197         struct be_cmd_work *work;
5198
5199         work = be_alloc_work(adapter, be_work_set_rx_mode);
5200         if (work)
5201                 queue_work(be_wq, &work->work);
5202 }
5203
5204 static const struct net_device_ops be_netdev_ops = {
5205         .ndo_open               = be_open,
5206         .ndo_stop               = be_close,
5207         .ndo_start_xmit         = be_xmit,
5208         .ndo_set_rx_mode        = be_set_rx_mode,
5209         .ndo_set_mac_address    = be_mac_addr_set,
5210         .ndo_get_stats64        = be_get_stats64,
5211         .ndo_validate_addr      = eth_validate_addr,
5212         .ndo_vlan_rx_add_vid    = be_vlan_add_vid,
5213         .ndo_vlan_rx_kill_vid   = be_vlan_rem_vid,
5214         .ndo_set_vf_mac         = be_set_vf_mac,
5215         .ndo_set_vf_vlan        = be_set_vf_vlan,
5216         .ndo_set_vf_rate        = be_set_vf_tx_rate,
5217         .ndo_get_vf_config      = be_get_vf_config,
5218         .ndo_set_vf_link_state  = be_set_vf_link_state,
5219         .ndo_set_vf_spoofchk    = be_set_vf_spoofchk,
5220 #ifdef CONFIG_NET_POLL_CONTROLLER
5221         .ndo_poll_controller    = be_netpoll,
5222 #endif
5223         .ndo_bridge_setlink     = be_ndo_bridge_setlink,
5224         .ndo_bridge_getlink     = be_ndo_bridge_getlink,
5225 #ifdef CONFIG_NET_RX_BUSY_POLL
5226         .ndo_busy_poll          = be_busy_poll,
5227 #endif
5228         .ndo_udp_tunnel_add     = be_add_vxlan_port,
5229         .ndo_udp_tunnel_del     = be_del_vxlan_port,
5230         .ndo_features_check     = be_features_check,
5231         .ndo_get_phys_port_id   = be_get_phys_port_id,
5232 };
5233
5234 static void be_netdev_init(struct net_device *netdev)
5235 {
5236         struct be_adapter *adapter = netdev_priv(netdev);
5237
5238         netdev->hw_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5239                 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM |
5240                 NETIF_F_HW_VLAN_CTAG_TX;
5241         if ((be_if_cap_flags(adapter) & BE_IF_FLAGS_RSS))
5242                 netdev->hw_features |= NETIF_F_RXHASH;
5243
5244         netdev->features |= netdev->hw_features |
5245                 NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER;
5246
5247         netdev->vlan_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5248                 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
5249
5250         netdev->priv_flags |= IFF_UNICAST_FLT;
5251
5252         netdev->flags |= IFF_MULTICAST;
5253
5254         netif_set_gso_max_size(netdev, BE_MAX_GSO_SIZE - ETH_HLEN);
5255
5256         netdev->netdev_ops = &be_netdev_ops;
5257
5258         netdev->ethtool_ops = &be_ethtool_ops;
5259
5260         /* MTU range: 256 - 9000 */
5261         netdev->min_mtu = BE_MIN_MTU;
5262         netdev->max_mtu = BE_MAX_MTU;
5263 }
5264
5265 static void be_cleanup(struct be_adapter *adapter)
5266 {
5267         struct net_device *netdev = adapter->netdev;
5268
5269         rtnl_lock();
5270         netif_device_detach(netdev);
5271         if (netif_running(netdev))
5272                 be_close(netdev);
5273         rtnl_unlock();
5274
5275         be_clear(adapter);
5276 }
5277
5278 static int be_resume(struct be_adapter *adapter)
5279 {
5280         struct net_device *netdev = adapter->netdev;
5281         int status;
5282
5283         status = be_setup(adapter);
5284         if (status)
5285                 return status;
5286
5287         rtnl_lock();
5288         if (netif_running(netdev))
5289                 status = be_open(netdev);
5290         rtnl_unlock();
5291
5292         if (status)
5293                 return status;
5294
5295         netif_device_attach(netdev);
5296
5297         return 0;
5298 }
5299
5300 static void be_soft_reset(struct be_adapter *adapter)
5301 {
5302         u32 val;
5303
5304         dev_info(&adapter->pdev->dev, "Initiating chip soft reset\n");
5305         val = ioread32(adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5306         val |= SLIPORT_SOFTRESET_SR_MASK;
5307         iowrite32(val, adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5308 }
5309
5310 static bool be_err_is_recoverable(struct be_adapter *adapter)
5311 {
5312         struct be_error_recovery *err_rec = &adapter->error_recovery;
5313         unsigned long initial_idle_time =
5314                 msecs_to_jiffies(ERR_RECOVERY_IDLE_TIME);
5315         unsigned long recovery_interval =
5316                 msecs_to_jiffies(ERR_RECOVERY_INTERVAL);
5317         u16 ue_err_code;
5318         u32 val;
5319
5320         val = be_POST_stage_get(adapter);
5321         if ((val & POST_STAGE_RECOVERABLE_ERR) != POST_STAGE_RECOVERABLE_ERR)
5322                 return false;
5323         ue_err_code = val & POST_ERR_RECOVERY_CODE_MASK;
5324         if (ue_err_code == 0)
5325                 return false;
5326
5327         dev_err(&adapter->pdev->dev, "Recoverable HW error code: 0x%x\n",
5328                 ue_err_code);
5329
5330         if (jiffies - err_rec->probe_time <= initial_idle_time) {
5331                 dev_err(&adapter->pdev->dev,
5332                         "Cannot recover within %lu sec from driver load\n",
5333                         jiffies_to_msecs(initial_idle_time) / MSEC_PER_SEC);
5334                 return false;
5335         }
5336
5337         if (err_rec->last_recovery_time &&
5338             (jiffies - err_rec->last_recovery_time <= recovery_interval)) {
5339                 dev_err(&adapter->pdev->dev,
5340                         "Cannot recover within %lu sec from last recovery\n",
5341                         jiffies_to_msecs(recovery_interval) / MSEC_PER_SEC);
5342                 return false;
5343         }
5344
5345         if (ue_err_code == err_rec->last_err_code) {
5346                 dev_err(&adapter->pdev->dev,
5347                         "Cannot recover from a consecutive TPE error\n");
5348                 return false;
5349         }
5350
5351         err_rec->last_recovery_time = jiffies;
5352         err_rec->last_err_code = ue_err_code;
5353         return true;
5354 }
5355
5356 static int be_tpe_recover(struct be_adapter *adapter)
5357 {
5358         struct be_error_recovery *err_rec = &adapter->error_recovery;
5359         int status = -EAGAIN;
5360         u32 val;
5361
5362         switch (err_rec->recovery_state) {
5363         case ERR_RECOVERY_ST_NONE:
5364                 err_rec->recovery_state = ERR_RECOVERY_ST_DETECT;
5365                 err_rec->resched_delay = ERR_RECOVERY_UE_DETECT_DURATION;
5366                 break;
5367
5368         case ERR_RECOVERY_ST_DETECT:
5369                 val = be_POST_stage_get(adapter);
5370                 if ((val & POST_STAGE_RECOVERABLE_ERR) !=
5371                     POST_STAGE_RECOVERABLE_ERR) {
5372                         dev_err(&adapter->pdev->dev,
5373                                 "Unrecoverable HW error detected: 0x%x\n", val);
5374                         status = -EINVAL;
5375                         err_rec->resched_delay = 0;
5376                         break;
5377                 }
5378
5379                 dev_err(&adapter->pdev->dev, "Recoverable HW error detected\n");
5380
5381                 /* Only PF0 initiates Chip Soft Reset. But PF0 must wait UE2SR
5382                  * milliseconds before it checks for final error status in
5383                  * SLIPORT_SEMAPHORE to determine if recovery criteria is met.
5384                  * If it does, then PF0 initiates a Soft Reset.
5385                  */
5386                 if (adapter->pf_num == 0) {
5387                         err_rec->recovery_state = ERR_RECOVERY_ST_RESET;
5388                         err_rec->resched_delay = err_rec->ue_to_reset_time -
5389                                         ERR_RECOVERY_UE_DETECT_DURATION;
5390                         break;
5391                 }
5392
5393                 err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5394                 err_rec->resched_delay = err_rec->ue_to_poll_time -
5395                                         ERR_RECOVERY_UE_DETECT_DURATION;
5396                 break;
5397
5398         case ERR_RECOVERY_ST_RESET:
5399                 if (!be_err_is_recoverable(adapter)) {
5400                         dev_err(&adapter->pdev->dev,
5401                                 "Failed to meet recovery criteria\n");
5402                         status = -EIO;
5403                         err_rec->resched_delay = 0;
5404                         break;
5405                 }
5406                 be_soft_reset(adapter);
5407                 err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5408                 err_rec->resched_delay = err_rec->ue_to_poll_time -
5409                                         err_rec->ue_to_reset_time;
5410                 break;
5411
5412         case ERR_RECOVERY_ST_PRE_POLL:
5413                 err_rec->recovery_state = ERR_RECOVERY_ST_REINIT;
5414                 err_rec->resched_delay = 0;
5415                 status = 0;                     /* done */
5416                 break;
5417
5418         default:
5419                 status = -EINVAL;
5420                 err_rec->resched_delay = 0;
5421                 break;
5422         }
5423
5424         return status;
5425 }
5426
5427 static int be_err_recover(struct be_adapter *adapter)
5428 {
5429         int status;
5430
5431         if (!lancer_chip(adapter)) {
5432                 if (!adapter->error_recovery.recovery_supported ||
5433                     adapter->priv_flags & BE_DISABLE_TPE_RECOVERY)
5434                         return -EIO;
5435                 status = be_tpe_recover(adapter);
5436                 if (status)
5437                         goto err;
5438         }
5439
5440         /* Wait for adapter to reach quiescent state before
5441          * destroying queues
5442          */
5443         status = be_fw_wait_ready(adapter);
5444         if (status)
5445                 goto err;
5446
5447         adapter->flags |= BE_FLAGS_TRY_RECOVERY;
5448
5449         be_cleanup(adapter);
5450
5451         status = be_resume(adapter);
5452         if (status)
5453                 goto err;
5454
5455         adapter->flags &= ~BE_FLAGS_TRY_RECOVERY;
5456
5457 err:
5458         return status;
5459 }
5460
5461 static void be_err_detection_task(struct work_struct *work)
5462 {
5463         struct be_error_recovery *err_rec =
5464                         container_of(work, struct be_error_recovery,
5465                                      err_detection_work.work);
5466         struct be_adapter *adapter =
5467                         container_of(err_rec, struct be_adapter,
5468                                      error_recovery);
5469         u32 resched_delay = ERR_RECOVERY_DETECTION_DELAY;
5470         struct device *dev = &adapter->pdev->dev;
5471         int recovery_status;
5472
5473         be_detect_error(adapter);
5474         if (!be_check_error(adapter, BE_ERROR_HW))
5475                 goto reschedule_task;
5476
5477         recovery_status = be_err_recover(adapter);
5478         if (!recovery_status) {
5479                 err_rec->recovery_retries = 0;
5480                 err_rec->recovery_state = ERR_RECOVERY_ST_NONE;
5481                 dev_info(dev, "Adapter recovery successful\n");
5482                 goto reschedule_task;
5483         } else if (!lancer_chip(adapter) && err_rec->resched_delay) {
5484                 /* BEx/SH recovery state machine */
5485                 if (adapter->pf_num == 0 &&
5486                     err_rec->recovery_state > ERR_RECOVERY_ST_DETECT)
5487                         dev_err(&adapter->pdev->dev,
5488                                 "Adapter recovery in progress\n");
5489                 resched_delay = err_rec->resched_delay;
5490                 goto reschedule_task;
5491         } else if (lancer_chip(adapter) && be_virtfn(adapter)) {
5492                 /* For VFs, check if PF have allocated resources
5493                  * every second.
5494                  */
5495                 dev_err(dev, "Re-trying adapter recovery\n");
5496                 goto reschedule_task;
5497         } else if (lancer_chip(adapter) && err_rec->recovery_retries++ <
5498                    ERR_RECOVERY_MAX_RETRY_COUNT) {
5499                 /* In case of another error during recovery, it takes 30 sec
5500                  * for adapter to come out of error. Retry error recovery after
5501                  * this time interval.
5502                  */
5503                 dev_err(&adapter->pdev->dev, "Re-trying adapter recovery\n");
5504                 resched_delay = ERR_RECOVERY_RETRY_DELAY;
5505                 goto reschedule_task;
5506         } else {
5507                 dev_err(dev, "Adapter recovery failed\n");
5508                 dev_err(dev, "Please reboot server to recover\n");
5509         }
5510
5511         return;
5512
5513 reschedule_task:
5514         be_schedule_err_detection(adapter, resched_delay);
5515 }
5516
5517 static void be_log_sfp_info(struct be_adapter *adapter)
5518 {
5519         int status;
5520
5521         status = be_cmd_query_sfp_info(adapter);
5522         if (!status) {
5523                 dev_err(&adapter->pdev->dev,
5524                         "Port %c: %s Vendor: %s part no: %s",
5525                         adapter->port_name,
5526                         be_misconfig_evt_port_state[adapter->phy_state],
5527                         adapter->phy.vendor_name,
5528                         adapter->phy.vendor_pn);
5529         }
5530         adapter->flags &= ~BE_FLAGS_PHY_MISCONFIGURED;
5531 }
5532
5533 static void be_worker(struct work_struct *work)
5534 {
5535         struct be_adapter *adapter =
5536                 container_of(work, struct be_adapter, work.work);
5537         struct be_rx_obj *rxo;
5538         int i;
5539
5540         if (be_physfn(adapter) &&
5541             MODULO(adapter->work_counter, adapter->be_get_temp_freq) == 0)
5542                 be_cmd_get_die_temperature(adapter);
5543
5544         /* when interrupts are not yet enabled, just reap any pending
5545          * mcc completions
5546          */
5547         if (!netif_running(adapter->netdev)) {
5548                 local_bh_disable();
5549                 be_process_mcc(adapter);
5550                 local_bh_enable();
5551                 goto reschedule;
5552         }
5553
5554         if (!adapter->stats_cmd_sent) {
5555                 if (lancer_chip(adapter))
5556                         lancer_cmd_get_pport_stats(adapter,
5557                                                    &adapter->stats_cmd);
5558                 else
5559                         be_cmd_get_stats(adapter, &adapter->stats_cmd);
5560         }
5561
5562         for_all_rx_queues(adapter, rxo, i) {
5563                 /* Replenish RX-queues starved due to memory
5564                  * allocation failures.
5565                  */
5566                 if (rxo->rx_post_starved)
5567                         be_post_rx_frags(rxo, GFP_KERNEL, MAX_RX_POST);
5568         }
5569
5570         /* EQ-delay update for Skyhawk is done while notifying EQ */
5571         if (!skyhawk_chip(adapter))
5572                 be_eqd_update(adapter, false);
5573
5574         if (adapter->flags & BE_FLAGS_PHY_MISCONFIGURED)
5575                 be_log_sfp_info(adapter);
5576
5577 reschedule:
5578         adapter->work_counter++;
5579         queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
5580 }
5581
5582 static void be_unmap_pci_bars(struct be_adapter *adapter)
5583 {
5584         if (adapter->csr)
5585                 pci_iounmap(adapter->pdev, adapter->csr);
5586         if (adapter->db)
5587                 pci_iounmap(adapter->pdev, adapter->db);
5588         if (adapter->pcicfg && adapter->pcicfg_mapped)
5589                 pci_iounmap(adapter->pdev, adapter->pcicfg);
5590 }
5591
5592 static int db_bar(struct be_adapter *adapter)
5593 {
5594         if (lancer_chip(adapter) || be_virtfn(adapter))
5595                 return 0;
5596         else
5597                 return 4;
5598 }
5599
5600 static int be_roce_map_pci_bars(struct be_adapter *adapter)
5601 {
5602         if (skyhawk_chip(adapter)) {
5603                 adapter->roce_db.size = 4096;
5604                 adapter->roce_db.io_addr = pci_resource_start(adapter->pdev,
5605                                                               db_bar(adapter));
5606                 adapter->roce_db.total_size = pci_resource_len(adapter->pdev,
5607                                                                db_bar(adapter));
5608         }
5609         return 0;
5610 }
5611
5612 static int be_map_pci_bars(struct be_adapter *adapter)
5613 {
5614         struct pci_dev *pdev = adapter->pdev;
5615         u8 __iomem *addr;
5616         u32 sli_intf;
5617
5618         pci_read_config_dword(adapter->pdev, SLI_INTF_REG_OFFSET, &sli_intf);
5619         adapter->sli_family = (sli_intf & SLI_INTF_FAMILY_MASK) >>
5620                                 SLI_INTF_FAMILY_SHIFT;
5621         adapter->virtfn = (sli_intf & SLI_INTF_FT_MASK) ? 1 : 0;
5622
5623         if (BEx_chip(adapter) && be_physfn(adapter)) {
5624                 adapter->csr = pci_iomap(pdev, 2, 0);
5625                 if (!adapter->csr)
5626                         return -ENOMEM;
5627         }
5628
5629         addr = pci_iomap(pdev, db_bar(adapter), 0);
5630         if (!addr)
5631                 goto pci_map_err;
5632         adapter->db = addr;
5633
5634         if (skyhawk_chip(adapter) || BEx_chip(adapter)) {
5635                 if (be_physfn(adapter)) {
5636                         /* PCICFG is the 2nd BAR in BE2 */
5637                         addr = pci_iomap(pdev, BE2_chip(adapter) ? 1 : 0, 0);
5638                         if (!addr)
5639                                 goto pci_map_err;
5640                         adapter->pcicfg = addr;
5641                         adapter->pcicfg_mapped = true;
5642                 } else {
5643                         adapter->pcicfg = adapter->db + SRIOV_VF_PCICFG_OFFSET;
5644                         adapter->pcicfg_mapped = false;
5645                 }
5646         }
5647
5648         be_roce_map_pci_bars(adapter);
5649         return 0;
5650
5651 pci_map_err:
5652         dev_err(&pdev->dev, "Error in mapping PCI BARs\n");
5653         be_unmap_pci_bars(adapter);
5654         return -ENOMEM;
5655 }
5656
5657 static void be_drv_cleanup(struct be_adapter *adapter)
5658 {
5659         struct be_dma_mem *mem = &adapter->mbox_mem_alloced;
5660         struct device *dev = &adapter->pdev->dev;
5661
5662         if (mem->va)
5663                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5664
5665         mem = &adapter->rx_filter;
5666         if (mem->va)
5667                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5668
5669         mem = &adapter->stats_cmd;
5670         if (mem->va)
5671                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5672 }
5673
5674 /* Allocate and initialize various fields in be_adapter struct */
5675 static int be_drv_init(struct be_adapter *adapter)
5676 {
5677         struct be_dma_mem *mbox_mem_alloc = &adapter->mbox_mem_alloced;
5678         struct be_dma_mem *mbox_mem_align = &adapter->mbox_mem;
5679         struct be_dma_mem *rx_filter = &adapter->rx_filter;
5680         struct be_dma_mem *stats_cmd = &adapter->stats_cmd;
5681         struct device *dev = &adapter->pdev->dev;
5682         int status = 0;
5683
5684         mbox_mem_alloc->size = sizeof(struct be_mcc_mailbox) + 16;
5685         mbox_mem_alloc->va = dma_zalloc_coherent(dev, mbox_mem_alloc->size,
5686                                                  &mbox_mem_alloc->dma,
5687                                                  GFP_KERNEL);
5688         if (!mbox_mem_alloc->va)
5689                 return -ENOMEM;
5690
5691         mbox_mem_align->size = sizeof(struct be_mcc_mailbox);
5692         mbox_mem_align->va = PTR_ALIGN(mbox_mem_alloc->va, 16);
5693         mbox_mem_align->dma = PTR_ALIGN(mbox_mem_alloc->dma, 16);
5694
5695         rx_filter->size = sizeof(struct be_cmd_req_rx_filter);
5696         rx_filter->va = dma_zalloc_coherent(dev, rx_filter->size,
5697                                             &rx_filter->dma, GFP_KERNEL);
5698         if (!rx_filter->va) {
5699                 status = -ENOMEM;
5700                 goto free_mbox;
5701         }
5702
5703         if (lancer_chip(adapter))
5704                 stats_cmd->size = sizeof(struct lancer_cmd_req_pport_stats);
5705         else if (BE2_chip(adapter))
5706                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v0);
5707         else if (BE3_chip(adapter))
5708                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v1);
5709         else
5710                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v2);
5711         stats_cmd->va = dma_zalloc_coherent(dev, stats_cmd->size,
5712                                             &stats_cmd->dma, GFP_KERNEL);
5713         if (!stats_cmd->va) {
5714                 status = -ENOMEM;
5715                 goto free_rx_filter;
5716         }
5717
5718         mutex_init(&adapter->mbox_lock);
5719         mutex_init(&adapter->mcc_lock);
5720         mutex_init(&adapter->rx_filter_lock);
5721         spin_lock_init(&adapter->mcc_cq_lock);
5722         init_completion(&adapter->et_cmd_compl);
5723
5724         pci_save_state(adapter->pdev);
5725
5726         INIT_DELAYED_WORK(&adapter->work, be_worker);
5727
5728         adapter->error_recovery.recovery_state = ERR_RECOVERY_ST_NONE;
5729         adapter->error_recovery.resched_delay = 0;
5730         INIT_DELAYED_WORK(&adapter->error_recovery.err_detection_work,
5731                           be_err_detection_task);
5732
5733         adapter->rx_fc = true;
5734         adapter->tx_fc = true;
5735
5736         /* Must be a power of 2 or else MODULO will BUG_ON */
5737         adapter->be_get_temp_freq = 64;
5738
5739         return 0;
5740
5741 free_rx_filter:
5742         dma_free_coherent(dev, rx_filter->size, rx_filter->va, rx_filter->dma);
5743 free_mbox:
5744         dma_free_coherent(dev, mbox_mem_alloc->size, mbox_mem_alloc->va,
5745                           mbox_mem_alloc->dma);
5746         return status;
5747 }
5748
5749 static void be_remove(struct pci_dev *pdev)
5750 {
5751         struct be_adapter *adapter = pci_get_drvdata(pdev);
5752
5753         if (!adapter)
5754                 return;
5755
5756         be_roce_dev_remove(adapter);
5757         be_intr_set(adapter, false);
5758
5759         be_cancel_err_detection(adapter);
5760
5761         unregister_netdev(adapter->netdev);
5762
5763         be_clear(adapter);
5764
5765         if (!pci_vfs_assigned(adapter->pdev))
5766                 be_cmd_reset_function(adapter);
5767
5768         /* tell fw we're done with firing cmds */
5769         be_cmd_fw_clean(adapter);
5770
5771         be_unmap_pci_bars(adapter);
5772         be_drv_cleanup(adapter);
5773
5774         pci_disable_pcie_error_reporting(pdev);
5775
5776         pci_release_regions(pdev);
5777         pci_disable_device(pdev);
5778
5779         free_netdev(adapter->netdev);
5780 }
5781
5782 static ssize_t be_hwmon_show_temp(struct device *dev,
5783                                   struct device_attribute *dev_attr,
5784                                   char *buf)
5785 {
5786         struct be_adapter *adapter = dev_get_drvdata(dev);
5787
5788         /* Unit: millidegree Celsius */
5789         if (adapter->hwmon_info.be_on_die_temp == BE_INVALID_DIE_TEMP)
5790                 return -EIO;
5791         else
5792                 return sprintf(buf, "%u\n",
5793                                adapter->hwmon_info.be_on_die_temp * 1000);
5794 }
5795
5796 static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO,
5797                           be_hwmon_show_temp, NULL, 1);
5798
5799 static struct attribute *be_hwmon_attrs[] = {
5800         &sensor_dev_attr_temp1_input.dev_attr.attr,
5801         NULL
5802 };
5803
5804 ATTRIBUTE_GROUPS(be_hwmon);
5805
5806 static char *mc_name(struct be_adapter *adapter)
5807 {
5808         char *str = ""; /* default */
5809
5810         switch (adapter->mc_type) {
5811         case UMC:
5812                 str = "UMC";
5813                 break;
5814         case FLEX10:
5815                 str = "FLEX10";
5816                 break;
5817         case vNIC1:
5818                 str = "vNIC-1";
5819                 break;
5820         case nPAR:
5821                 str = "nPAR";
5822                 break;
5823         case UFP:
5824                 str = "UFP";
5825                 break;
5826         case vNIC2:
5827                 str = "vNIC-2";
5828                 break;
5829         default:
5830                 str = "";
5831         }
5832
5833         return str;
5834 }
5835
5836 static inline char *func_name(struct be_adapter *adapter)
5837 {
5838         return be_physfn(adapter) ? "PF" : "VF";
5839 }
5840
5841 static inline char *nic_name(struct pci_dev *pdev)
5842 {
5843         switch (pdev->device) {
5844         case OC_DEVICE_ID1:
5845                 return OC_NAME;
5846         case OC_DEVICE_ID2:
5847                 return OC_NAME_BE;
5848         case OC_DEVICE_ID3:
5849         case OC_DEVICE_ID4:
5850                 return OC_NAME_LANCER;
5851         case BE_DEVICE_ID2:
5852                 return BE3_NAME;
5853         case OC_DEVICE_ID5:
5854         case OC_DEVICE_ID6:
5855                 return OC_NAME_SH;
5856         default:
5857                 return BE_NAME;
5858         }
5859 }
5860
5861 static int be_probe(struct pci_dev *pdev, const struct pci_device_id *pdev_id)
5862 {
5863         struct be_adapter *adapter;
5864         struct net_device *netdev;
5865         int status = 0;
5866
5867         dev_info(&pdev->dev, "%s version is %s\n", DRV_NAME, DRV_VER);
5868
5869         status = pci_enable_device(pdev);
5870         if (status)
5871                 goto do_none;
5872
5873         status = pci_request_regions(pdev, DRV_NAME);
5874         if (status)
5875                 goto disable_dev;
5876         pci_set_master(pdev);
5877
5878         netdev = alloc_etherdev_mqs(sizeof(*adapter), MAX_TX_QS, MAX_RX_QS);
5879         if (!netdev) {
5880                 status = -ENOMEM;
5881                 goto rel_reg;
5882         }
5883         adapter = netdev_priv(netdev);
5884         adapter->pdev = pdev;
5885         pci_set_drvdata(pdev, adapter);
5886         adapter->netdev = netdev;
5887         SET_NETDEV_DEV(netdev, &pdev->dev);
5888
5889         status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
5890         if (!status) {
5891                 netdev->features |= NETIF_F_HIGHDMA;
5892         } else {
5893                 status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
5894                 if (status) {
5895                         dev_err(&pdev->dev, "Could not set PCI DMA Mask\n");
5896                         goto free_netdev;
5897                 }
5898         }
5899
5900         status = pci_enable_pcie_error_reporting(pdev);
5901         if (!status)
5902                 dev_info(&pdev->dev, "PCIe error reporting enabled\n");
5903
5904         status = be_map_pci_bars(adapter);
5905         if (status)
5906                 goto free_netdev;
5907
5908         status = be_drv_init(adapter);
5909         if (status)
5910                 goto unmap_bars;
5911
5912         status = be_setup(adapter);
5913         if (status)
5914                 goto drv_cleanup;
5915
5916         be_netdev_init(netdev);
5917         status = register_netdev(netdev);
5918         if (status != 0)
5919                 goto unsetup;
5920
5921         be_roce_dev_add(adapter);
5922
5923         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5924         adapter->error_recovery.probe_time = jiffies;
5925
5926         /* On Die temperature not supported for VF. */
5927         if (be_physfn(adapter) && IS_ENABLED(CONFIG_BE2NET_HWMON)) {
5928                 adapter->hwmon_info.hwmon_dev =
5929                         devm_hwmon_device_register_with_groups(&pdev->dev,
5930                                                                DRV_NAME,
5931                                                                adapter,
5932                                                                be_hwmon_groups);
5933                 adapter->hwmon_info.be_on_die_temp = BE_INVALID_DIE_TEMP;
5934         }
5935
5936         dev_info(&pdev->dev, "%s: %s %s port %c\n", nic_name(pdev),
5937                  func_name(adapter), mc_name(adapter), adapter->port_name);
5938
5939         return 0;
5940
5941 unsetup:
5942         be_clear(adapter);
5943 drv_cleanup:
5944         be_drv_cleanup(adapter);
5945 unmap_bars:
5946         be_unmap_pci_bars(adapter);
5947 free_netdev:
5948         free_netdev(netdev);
5949 rel_reg:
5950         pci_release_regions(pdev);
5951 disable_dev:
5952         pci_disable_device(pdev);
5953 do_none:
5954         dev_err(&pdev->dev, "%s initialization failed\n", nic_name(pdev));
5955         return status;
5956 }
5957
5958 static int be_suspend(struct pci_dev *pdev, pm_message_t state)
5959 {
5960         struct be_adapter *adapter = pci_get_drvdata(pdev);
5961
5962         be_intr_set(adapter, false);
5963         be_cancel_err_detection(adapter);
5964
5965         be_cleanup(adapter);
5966
5967         pci_save_state(pdev);
5968         pci_disable_device(pdev);
5969         pci_set_power_state(pdev, pci_choose_state(pdev, state));
5970         return 0;
5971 }
5972
5973 static int be_pci_resume(struct pci_dev *pdev)
5974 {
5975         struct be_adapter *adapter = pci_get_drvdata(pdev);
5976         int status = 0;
5977
5978         status = pci_enable_device(pdev);
5979         if (status)
5980                 return status;
5981
5982         pci_restore_state(pdev);
5983
5984         status = be_resume(adapter);
5985         if (status)
5986                 return status;
5987
5988         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5989
5990         return 0;
5991 }
5992
5993 /*
5994  * An FLR will stop BE from DMAing any data.
5995  */
5996 static void be_shutdown(struct pci_dev *pdev)
5997 {
5998         struct be_adapter *adapter = pci_get_drvdata(pdev);
5999
6000         if (!adapter)
6001                 return;
6002
6003         be_roce_dev_shutdown(adapter);
6004         cancel_delayed_work_sync(&adapter->work);
6005         be_cancel_err_detection(adapter);
6006
6007         netif_device_detach(adapter->netdev);
6008
6009         be_cmd_reset_function(adapter);
6010
6011         pci_disable_device(pdev);
6012 }
6013
6014 static pci_ers_result_t be_eeh_err_detected(struct pci_dev *pdev,
6015                                             pci_channel_state_t state)
6016 {
6017         struct be_adapter *adapter = pci_get_drvdata(pdev);
6018
6019         dev_err(&adapter->pdev->dev, "EEH error detected\n");
6020
6021         be_roce_dev_remove(adapter);
6022
6023         if (!be_check_error(adapter, BE_ERROR_EEH)) {
6024                 be_set_error(adapter, BE_ERROR_EEH);
6025
6026                 be_cancel_err_detection(adapter);
6027
6028                 be_cleanup(adapter);
6029         }
6030
6031         if (state == pci_channel_io_perm_failure)
6032                 return PCI_ERS_RESULT_DISCONNECT;
6033
6034         pci_disable_device(pdev);
6035
6036         /* The error could cause the FW to trigger a flash debug dump.
6037          * Resetting the card while flash dump is in progress
6038          * can cause it not to recover; wait for it to finish.
6039          * Wait only for first function as it is needed only once per
6040          * adapter.
6041          */
6042         if (pdev->devfn == 0)
6043                 ssleep(30);
6044
6045         return PCI_ERS_RESULT_NEED_RESET;
6046 }
6047
6048 static pci_ers_result_t be_eeh_reset(struct pci_dev *pdev)
6049 {
6050         struct be_adapter *adapter = pci_get_drvdata(pdev);
6051         int status;
6052
6053         dev_info(&adapter->pdev->dev, "EEH reset\n");
6054
6055         status = pci_enable_device(pdev);
6056         if (status)
6057                 return PCI_ERS_RESULT_DISCONNECT;
6058
6059         pci_set_master(pdev);
6060         pci_restore_state(pdev);
6061
6062         /* Check if card is ok and fw is ready */
6063         dev_info(&adapter->pdev->dev,
6064                  "Waiting for FW to be ready after EEH reset\n");
6065         status = be_fw_wait_ready(adapter);
6066         if (status)
6067                 return PCI_ERS_RESULT_DISCONNECT;
6068
6069         pci_cleanup_aer_uncorrect_error_status(pdev);
6070         be_clear_error(adapter, BE_CLEAR_ALL);
6071         return PCI_ERS_RESULT_RECOVERED;
6072 }
6073
6074 static void be_eeh_resume(struct pci_dev *pdev)
6075 {
6076         int status = 0;
6077         struct be_adapter *adapter = pci_get_drvdata(pdev);
6078
6079         dev_info(&adapter->pdev->dev, "EEH resume\n");
6080
6081         pci_save_state(pdev);
6082
6083         status = be_resume(adapter);
6084         if (status)
6085                 goto err;
6086
6087         be_roce_dev_add(adapter);
6088
6089         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
6090         return;
6091 err:
6092         dev_err(&adapter->pdev->dev, "EEH resume failed\n");
6093 }
6094
6095 static int be_pci_sriov_configure(struct pci_dev *pdev, int num_vfs)
6096 {
6097         struct be_adapter *adapter = pci_get_drvdata(pdev);
6098         struct be_resources vft_res = {0};
6099         int status;
6100
6101         if (!num_vfs)
6102                 be_vf_clear(adapter);
6103
6104         adapter->num_vfs = num_vfs;
6105
6106         if (adapter->num_vfs == 0 && pci_vfs_assigned(pdev)) {
6107                 dev_warn(&pdev->dev,
6108                          "Cannot disable VFs while they are assigned\n");
6109                 return -EBUSY;
6110         }
6111
6112         /* When the HW is in SRIOV capable configuration, the PF-pool resources
6113          * are equally distributed across the max-number of VFs. The user may
6114          * request only a subset of the max-vfs to be enabled.
6115          * Based on num_vfs, redistribute the resources across num_vfs so that
6116          * each VF will have access to more number of resources.
6117          * This facility is not available in BE3 FW.
6118          * Also, this is done by FW in Lancer chip.
6119          */
6120         if (skyhawk_chip(adapter) && !pci_num_vf(pdev)) {
6121                 be_calculate_vf_res(adapter, adapter->num_vfs,
6122                                     &vft_res);
6123                 status = be_cmd_set_sriov_config(adapter, adapter->pool_res,
6124                                                  adapter->num_vfs, &vft_res);
6125                 if (status)
6126                         dev_err(&pdev->dev,
6127                                 "Failed to optimize SR-IOV resources\n");
6128         }
6129
6130         status = be_get_resources(adapter);
6131         if (status)
6132                 return be_cmd_status(status);
6133
6134         /* Updating real_num_tx/rx_queues() requires rtnl_lock() */
6135         rtnl_lock();
6136         status = be_update_queues(adapter);
6137         rtnl_unlock();
6138         if (status)
6139                 return be_cmd_status(status);
6140
6141         if (adapter->num_vfs)
6142                 status = be_vf_setup(adapter);
6143
6144         if (!status)
6145                 return adapter->num_vfs;
6146
6147         return 0;
6148 }
6149
6150 static const struct pci_error_handlers be_eeh_handlers = {
6151         .error_detected = be_eeh_err_detected,
6152         .slot_reset = be_eeh_reset,
6153         .resume = be_eeh_resume,
6154 };
6155
6156 static struct pci_driver be_driver = {
6157         .name = DRV_NAME,
6158         .id_table = be_dev_ids,
6159         .probe = be_probe,
6160         .remove = be_remove,
6161         .suspend = be_suspend,
6162         .resume = be_pci_resume,
6163         .shutdown = be_shutdown,
6164         .sriov_configure = be_pci_sriov_configure,
6165         .err_handler = &be_eeh_handlers
6166 };
6167
6168 static int __init be_init_module(void)
6169 {
6170         int status;
6171
6172         if (rx_frag_size != 8192 && rx_frag_size != 4096 &&
6173             rx_frag_size != 2048) {
6174                 printk(KERN_WARNING DRV_NAME
6175                         " : Module param rx_frag_size must be 2048/4096/8192."
6176                         " Using 2048\n");
6177                 rx_frag_size = 2048;
6178         }
6179
6180         if (num_vfs > 0) {
6181                 pr_info(DRV_NAME " : Module param num_vfs is obsolete.");
6182                 pr_info(DRV_NAME " : Use sysfs method to enable VFs\n");
6183         }
6184
6185         be_wq = create_singlethread_workqueue("be_wq");
6186         if (!be_wq) {
6187                 pr_warn(DRV_NAME "workqueue creation failed\n");
6188                 return -1;
6189         }
6190
6191         be_err_recovery_workq =
6192                 create_singlethread_workqueue("be_err_recover");
6193         if (!be_err_recovery_workq)
6194                 pr_warn(DRV_NAME "Could not create error recovery workqueue\n");
6195
6196         status = pci_register_driver(&be_driver);
6197         if (status) {
6198                 destroy_workqueue(be_wq);
6199                 be_destroy_err_recovery_workq();
6200         }
6201         return status;
6202 }
6203 module_init(be_init_module);
6204
6205 static void __exit be_exit_module(void)
6206 {
6207         pci_unregister_driver(&be_driver);
6208
6209         be_destroy_err_recovery_workq();
6210
6211         if (be_wq)
6212                 destroy_workqueue(be_wq);
6213 }
6214 module_exit(be_exit_module);