]> asedeno.scripts.mit.edu Git - linux.git/blob - drivers/net/ethernet/emulex/benet/be_main.c
be2net: fix accesses to unicast list
[linux.git] / drivers / net / ethernet / emulex / benet / be_main.c
1 /*
2  * Copyright (C) 2005 - 2016 Broadcom
3  * All rights reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License version 2
7  * as published by the Free Software Foundation.  The full GNU General
8  * Public License is included in this distribution in the file called COPYING.
9  *
10  * Contact Information:
11  * linux-drivers@emulex.com
12  *
13  * Emulex
14  * 3333 Susan Street
15  * Costa Mesa, CA 92626
16  */
17
18 #include <linux/prefetch.h>
19 #include <linux/module.h>
20 #include "be.h"
21 #include "be_cmds.h"
22 #include <asm/div64.h>
23 #include <linux/aer.h>
24 #include <linux/if_bridge.h>
25 #include <net/busy_poll.h>
26 #include <net/vxlan.h>
27
28 MODULE_VERSION(DRV_VER);
29 MODULE_DESCRIPTION(DRV_DESC " " DRV_VER);
30 MODULE_AUTHOR("Emulex Corporation");
31 MODULE_LICENSE("GPL");
32
33 /* num_vfs module param is obsolete.
34  * Use sysfs method to enable/disable VFs.
35  */
36 static unsigned int num_vfs;
37 module_param(num_vfs, uint, S_IRUGO);
38 MODULE_PARM_DESC(num_vfs, "Number of PCI VFs to initialize");
39
40 static ushort rx_frag_size = 2048;
41 module_param(rx_frag_size, ushort, S_IRUGO);
42 MODULE_PARM_DESC(rx_frag_size, "Size of a fragment that holds rcvd data.");
43
44 /* Per-module error detection/recovery workq shared across all functions.
45  * Each function schedules its own work request on this shared workq.
46  */
47 static struct workqueue_struct *be_err_recovery_workq;
48
49 static const struct pci_device_id be_dev_ids[] = {
50         { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID1) },
51         { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID2) },
52         { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID1) },
53         { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID2) },
54         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID3)},
55         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID4)},
56         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID5)},
57         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID6)},
58         { 0 }
59 };
60 MODULE_DEVICE_TABLE(pci, be_dev_ids);
61
62 /* Workqueue used by all functions for defering cmd calls to the adapter */
63 static struct workqueue_struct *be_wq;
64
65 /* UE Status Low CSR */
66 static const char * const ue_status_low_desc[] = {
67         "CEV",
68         "CTX",
69         "DBUF",
70         "ERX",
71         "Host",
72         "MPU",
73         "NDMA",
74         "PTC ",
75         "RDMA ",
76         "RXF ",
77         "RXIPS ",
78         "RXULP0 ",
79         "RXULP1 ",
80         "RXULP2 ",
81         "TIM ",
82         "TPOST ",
83         "TPRE ",
84         "TXIPS ",
85         "TXULP0 ",
86         "TXULP1 ",
87         "UC ",
88         "WDMA ",
89         "TXULP2 ",
90         "HOST1 ",
91         "P0_OB_LINK ",
92         "P1_OB_LINK ",
93         "HOST_GPIO ",
94         "MBOX ",
95         "ERX2 ",
96         "SPARE ",
97         "JTAG ",
98         "MPU_INTPEND "
99 };
100
101 /* UE Status High CSR */
102 static const char * const ue_status_hi_desc[] = {
103         "LPCMEMHOST",
104         "MGMT_MAC",
105         "PCS0ONLINE",
106         "MPU_IRAM",
107         "PCS1ONLINE",
108         "PCTL0",
109         "PCTL1",
110         "PMEM",
111         "RR",
112         "TXPB",
113         "RXPP",
114         "XAUI",
115         "TXP",
116         "ARM",
117         "IPC",
118         "HOST2",
119         "HOST3",
120         "HOST4",
121         "HOST5",
122         "HOST6",
123         "HOST7",
124         "ECRC",
125         "Poison TLP",
126         "NETC",
127         "PERIPH",
128         "LLTXULP",
129         "D2P",
130         "RCON",
131         "LDMA",
132         "LLTXP",
133         "LLTXPB",
134         "Unknown"
135 };
136
137 #define BE_VF_IF_EN_FLAGS       (BE_IF_FLAGS_UNTAGGED | \
138                                  BE_IF_FLAGS_BROADCAST | \
139                                  BE_IF_FLAGS_MULTICAST | \
140                                  BE_IF_FLAGS_PASS_L3L4_ERRORS)
141
142 static void be_queue_free(struct be_adapter *adapter, struct be_queue_info *q)
143 {
144         struct be_dma_mem *mem = &q->dma_mem;
145
146         if (mem->va) {
147                 dma_free_coherent(&adapter->pdev->dev, mem->size, mem->va,
148                                   mem->dma);
149                 mem->va = NULL;
150         }
151 }
152
153 static int be_queue_alloc(struct be_adapter *adapter, struct be_queue_info *q,
154                           u16 len, u16 entry_size)
155 {
156         struct be_dma_mem *mem = &q->dma_mem;
157
158         memset(q, 0, sizeof(*q));
159         q->len = len;
160         q->entry_size = entry_size;
161         mem->size = len * entry_size;
162         mem->va = dma_zalloc_coherent(&adapter->pdev->dev, mem->size, &mem->dma,
163                                       GFP_KERNEL);
164         if (!mem->va)
165                 return -ENOMEM;
166         return 0;
167 }
168
169 static void be_reg_intr_set(struct be_adapter *adapter, bool enable)
170 {
171         u32 reg, enabled;
172
173         pci_read_config_dword(adapter->pdev, PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET,
174                               &reg);
175         enabled = reg & MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
176
177         if (!enabled && enable)
178                 reg |= MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
179         else if (enabled && !enable)
180                 reg &= ~MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
181         else
182                 return;
183
184         pci_write_config_dword(adapter->pdev,
185                                PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET, reg);
186 }
187
188 static void be_intr_set(struct be_adapter *adapter, bool enable)
189 {
190         int status = 0;
191
192         /* On lancer interrupts can't be controlled via this register */
193         if (lancer_chip(adapter))
194                 return;
195
196         if (be_check_error(adapter, BE_ERROR_EEH))
197                 return;
198
199         status = be_cmd_intr_set(adapter, enable);
200         if (status)
201                 be_reg_intr_set(adapter, enable);
202 }
203
204 static void be_rxq_notify(struct be_adapter *adapter, u16 qid, u16 posted)
205 {
206         u32 val = 0;
207
208         if (be_check_error(adapter, BE_ERROR_HW))
209                 return;
210
211         val |= qid & DB_RQ_RING_ID_MASK;
212         val |= posted << DB_RQ_NUM_POSTED_SHIFT;
213
214         wmb();
215         iowrite32(val, adapter->db + DB_RQ_OFFSET);
216 }
217
218 static void be_txq_notify(struct be_adapter *adapter, struct be_tx_obj *txo,
219                           u16 posted)
220 {
221         u32 val = 0;
222
223         if (be_check_error(adapter, BE_ERROR_HW))
224                 return;
225
226         val |= txo->q.id & DB_TXULP_RING_ID_MASK;
227         val |= (posted & DB_TXULP_NUM_POSTED_MASK) << DB_TXULP_NUM_POSTED_SHIFT;
228
229         wmb();
230         iowrite32(val, adapter->db + txo->db_offset);
231 }
232
233 static void be_eq_notify(struct be_adapter *adapter, u16 qid,
234                          bool arm, bool clear_int, u16 num_popped,
235                          u32 eq_delay_mult_enc)
236 {
237         u32 val = 0;
238
239         val |= qid & DB_EQ_RING_ID_MASK;
240         val |= ((qid & DB_EQ_RING_ID_EXT_MASK) << DB_EQ_RING_ID_EXT_MASK_SHIFT);
241
242         if (be_check_error(adapter, BE_ERROR_HW))
243                 return;
244
245         if (arm)
246                 val |= 1 << DB_EQ_REARM_SHIFT;
247         if (clear_int)
248                 val |= 1 << DB_EQ_CLR_SHIFT;
249         val |= 1 << DB_EQ_EVNT_SHIFT;
250         val |= num_popped << DB_EQ_NUM_POPPED_SHIFT;
251         val |= eq_delay_mult_enc << DB_EQ_R2I_DLY_SHIFT;
252         iowrite32(val, adapter->db + DB_EQ_OFFSET);
253 }
254
255 void be_cq_notify(struct be_adapter *adapter, u16 qid, bool arm, u16 num_popped)
256 {
257         u32 val = 0;
258
259         val |= qid & DB_CQ_RING_ID_MASK;
260         val |= ((qid & DB_CQ_RING_ID_EXT_MASK) <<
261                         DB_CQ_RING_ID_EXT_MASK_SHIFT);
262
263         if (be_check_error(adapter, BE_ERROR_HW))
264                 return;
265
266         if (arm)
267                 val |= 1 << DB_CQ_REARM_SHIFT;
268         val |= num_popped << DB_CQ_NUM_POPPED_SHIFT;
269         iowrite32(val, adapter->db + DB_CQ_OFFSET);
270 }
271
272 static int be_dev_mac_add(struct be_adapter *adapter, u8 *mac)
273 {
274         int i;
275
276         /* Check if mac has already been added as part of uc-list */
277         for (i = 0; i < adapter->uc_macs; i++) {
278                 if (ether_addr_equal(adapter->uc_list[i].mac, mac)) {
279                         /* mac already added, skip addition */
280                         adapter->pmac_id[0] = adapter->pmac_id[i + 1];
281                         return 0;
282                 }
283         }
284
285         return be_cmd_pmac_add(adapter, mac, adapter->if_handle,
286                                &adapter->pmac_id[0], 0);
287 }
288
289 static void be_dev_mac_del(struct be_adapter *adapter, int pmac_id)
290 {
291         int i;
292
293         /* Skip deletion if the programmed mac is
294          * being used in uc-list
295          */
296         for (i = 0; i < adapter->uc_macs; i++) {
297                 if (adapter->pmac_id[i + 1] == pmac_id)
298                         return;
299         }
300         be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
301 }
302
303 static int be_mac_addr_set(struct net_device *netdev, void *p)
304 {
305         struct be_adapter *adapter = netdev_priv(netdev);
306         struct device *dev = &adapter->pdev->dev;
307         struct sockaddr *addr = p;
308         int status;
309         u8 mac[ETH_ALEN];
310         u32 old_pmac_id = adapter->pmac_id[0];
311
312         if (!is_valid_ether_addr(addr->sa_data))
313                 return -EADDRNOTAVAIL;
314
315         /* Proceed further only if, User provided MAC is different
316          * from active MAC
317          */
318         if (ether_addr_equal(addr->sa_data, adapter->dev_mac))
319                 return 0;
320
321         /* if device is not running, copy MAC to netdev->dev_addr */
322         if (!netif_running(netdev))
323                 goto done;
324
325         /* The PMAC_ADD cmd may fail if the VF doesn't have FILTMGMT
326          * privilege or if PF did not provision the new MAC address.
327          * On BE3, this cmd will always fail if the VF doesn't have the
328          * FILTMGMT privilege. This failure is OK, only if the PF programmed
329          * the MAC for the VF.
330          */
331         mutex_lock(&adapter->rx_filter_lock);
332         status = be_dev_mac_add(adapter, (u8 *)addr->sa_data);
333         if (!status) {
334
335                 /* Delete the old programmed MAC. This call may fail if the
336                  * old MAC was already deleted by the PF driver.
337                  */
338                 if (adapter->pmac_id[0] != old_pmac_id)
339                         be_dev_mac_del(adapter, old_pmac_id);
340         }
341
342         mutex_unlock(&adapter->rx_filter_lock);
343         /* Decide if the new MAC is successfully activated only after
344          * querying the FW
345          */
346         status = be_cmd_get_active_mac(adapter, adapter->pmac_id[0], mac,
347                                        adapter->if_handle, true, 0);
348         if (status)
349                 goto err;
350
351         /* The MAC change did not happen, either due to lack of privilege
352          * or PF didn't pre-provision.
353          */
354         if (!ether_addr_equal(addr->sa_data, mac)) {
355                 status = -EPERM;
356                 goto err;
357         }
358 done:
359         ether_addr_copy(adapter->dev_mac, addr->sa_data);
360         ether_addr_copy(netdev->dev_addr, addr->sa_data);
361         dev_info(dev, "MAC address changed to %pM\n", addr->sa_data);
362         return 0;
363 err:
364         dev_warn(dev, "MAC address change to %pM failed\n", addr->sa_data);
365         return status;
366 }
367
368 /* BE2 supports only v0 cmd */
369 static void *hw_stats_from_cmd(struct be_adapter *adapter)
370 {
371         if (BE2_chip(adapter)) {
372                 struct be_cmd_resp_get_stats_v0 *cmd = adapter->stats_cmd.va;
373
374                 return &cmd->hw_stats;
375         } else if (BE3_chip(adapter)) {
376                 struct be_cmd_resp_get_stats_v1 *cmd = adapter->stats_cmd.va;
377
378                 return &cmd->hw_stats;
379         } else {
380                 struct be_cmd_resp_get_stats_v2 *cmd = adapter->stats_cmd.va;
381
382                 return &cmd->hw_stats;
383         }
384 }
385
386 /* BE2 supports only v0 cmd */
387 static void *be_erx_stats_from_cmd(struct be_adapter *adapter)
388 {
389         if (BE2_chip(adapter)) {
390                 struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
391
392                 return &hw_stats->erx;
393         } else if (BE3_chip(adapter)) {
394                 struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
395
396                 return &hw_stats->erx;
397         } else {
398                 struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
399
400                 return &hw_stats->erx;
401         }
402 }
403
404 static void populate_be_v0_stats(struct be_adapter *adapter)
405 {
406         struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
407         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
408         struct be_rxf_stats_v0 *rxf_stats = &hw_stats->rxf;
409         struct be_port_rxf_stats_v0 *port_stats =
410                                         &rxf_stats->port[adapter->port_num];
411         struct be_drv_stats *drvs = &adapter->drv_stats;
412
413         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
414         drvs->rx_pause_frames = port_stats->rx_pause_frames;
415         drvs->rx_crc_errors = port_stats->rx_crc_errors;
416         drvs->rx_control_frames = port_stats->rx_control_frames;
417         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
418         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
419         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
420         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
421         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
422         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
423         drvs->rxpp_fifo_overflow_drop = port_stats->rx_fifo_overflow;
424         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
425         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
426         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
427         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
428         drvs->rx_input_fifo_overflow_drop = port_stats->rx_input_fifo_overflow;
429         drvs->rx_dropped_header_too_small =
430                 port_stats->rx_dropped_header_too_small;
431         drvs->rx_address_filtered =
432                                         port_stats->rx_address_filtered +
433                                         port_stats->rx_vlan_filtered;
434         drvs->rx_alignment_symbol_errors =
435                 port_stats->rx_alignment_symbol_errors;
436
437         drvs->tx_pauseframes = port_stats->tx_pauseframes;
438         drvs->tx_controlframes = port_stats->tx_controlframes;
439
440         if (adapter->port_num)
441                 drvs->jabber_events = rxf_stats->port1_jabber_events;
442         else
443                 drvs->jabber_events = rxf_stats->port0_jabber_events;
444         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
445         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
446         drvs->forwarded_packets = rxf_stats->forwarded_packets;
447         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
448         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
449         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
450         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
451 }
452
453 static void populate_be_v1_stats(struct be_adapter *adapter)
454 {
455         struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
456         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
457         struct be_rxf_stats_v1 *rxf_stats = &hw_stats->rxf;
458         struct be_port_rxf_stats_v1 *port_stats =
459                                         &rxf_stats->port[adapter->port_num];
460         struct be_drv_stats *drvs = &adapter->drv_stats;
461
462         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
463         drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
464         drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
465         drvs->rx_pause_frames = port_stats->rx_pause_frames;
466         drvs->rx_crc_errors = port_stats->rx_crc_errors;
467         drvs->rx_control_frames = port_stats->rx_control_frames;
468         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
469         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
470         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
471         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
472         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
473         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
474         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
475         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
476         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
477         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
478         drvs->rx_dropped_header_too_small =
479                 port_stats->rx_dropped_header_too_small;
480         drvs->rx_input_fifo_overflow_drop =
481                 port_stats->rx_input_fifo_overflow_drop;
482         drvs->rx_address_filtered = port_stats->rx_address_filtered;
483         drvs->rx_alignment_symbol_errors =
484                 port_stats->rx_alignment_symbol_errors;
485         drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
486         drvs->tx_pauseframes = port_stats->tx_pauseframes;
487         drvs->tx_controlframes = port_stats->tx_controlframes;
488         drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
489         drvs->jabber_events = port_stats->jabber_events;
490         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
491         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
492         drvs->forwarded_packets = rxf_stats->forwarded_packets;
493         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
494         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
495         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
496         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
497 }
498
499 static void populate_be_v2_stats(struct be_adapter *adapter)
500 {
501         struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
502         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
503         struct be_rxf_stats_v2 *rxf_stats = &hw_stats->rxf;
504         struct be_port_rxf_stats_v2 *port_stats =
505                                         &rxf_stats->port[adapter->port_num];
506         struct be_drv_stats *drvs = &adapter->drv_stats;
507
508         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
509         drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
510         drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
511         drvs->rx_pause_frames = port_stats->rx_pause_frames;
512         drvs->rx_crc_errors = port_stats->rx_crc_errors;
513         drvs->rx_control_frames = port_stats->rx_control_frames;
514         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
515         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
516         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
517         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
518         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
519         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
520         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
521         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
522         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
523         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
524         drvs->rx_dropped_header_too_small =
525                 port_stats->rx_dropped_header_too_small;
526         drvs->rx_input_fifo_overflow_drop =
527                 port_stats->rx_input_fifo_overflow_drop;
528         drvs->rx_address_filtered = port_stats->rx_address_filtered;
529         drvs->rx_alignment_symbol_errors =
530                 port_stats->rx_alignment_symbol_errors;
531         drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
532         drvs->tx_pauseframes = port_stats->tx_pauseframes;
533         drvs->tx_controlframes = port_stats->tx_controlframes;
534         drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
535         drvs->jabber_events = port_stats->jabber_events;
536         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
537         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
538         drvs->forwarded_packets = rxf_stats->forwarded_packets;
539         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
540         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
541         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
542         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
543         if (be_roce_supported(adapter)) {
544                 drvs->rx_roce_bytes_lsd = port_stats->roce_bytes_received_lsd;
545                 drvs->rx_roce_bytes_msd = port_stats->roce_bytes_received_msd;
546                 drvs->rx_roce_frames = port_stats->roce_frames_received;
547                 drvs->roce_drops_crc = port_stats->roce_drops_crc;
548                 drvs->roce_drops_payload_len =
549                         port_stats->roce_drops_payload_len;
550         }
551 }
552
553 static void populate_lancer_stats(struct be_adapter *adapter)
554 {
555         struct be_drv_stats *drvs = &adapter->drv_stats;
556         struct lancer_pport_stats *pport_stats = pport_stats_from_cmd(adapter);
557
558         be_dws_le_to_cpu(pport_stats, sizeof(*pport_stats));
559         drvs->rx_pause_frames = pport_stats->rx_pause_frames_lo;
560         drvs->rx_crc_errors = pport_stats->rx_crc_errors_lo;
561         drvs->rx_control_frames = pport_stats->rx_control_frames_lo;
562         drvs->rx_in_range_errors = pport_stats->rx_in_range_errors;
563         drvs->rx_frame_too_long = pport_stats->rx_frames_too_long_lo;
564         drvs->rx_dropped_runt = pport_stats->rx_dropped_runt;
565         drvs->rx_ip_checksum_errs = pport_stats->rx_ip_checksum_errors;
566         drvs->rx_tcp_checksum_errs = pport_stats->rx_tcp_checksum_errors;
567         drvs->rx_udp_checksum_errs = pport_stats->rx_udp_checksum_errors;
568         drvs->rx_dropped_tcp_length =
569                                 pport_stats->rx_dropped_invalid_tcp_length;
570         drvs->rx_dropped_too_small = pport_stats->rx_dropped_too_small;
571         drvs->rx_dropped_too_short = pport_stats->rx_dropped_too_short;
572         drvs->rx_out_range_errors = pport_stats->rx_out_of_range_errors;
573         drvs->rx_dropped_header_too_small =
574                                 pport_stats->rx_dropped_header_too_small;
575         drvs->rx_input_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
576         drvs->rx_address_filtered =
577                                         pport_stats->rx_address_filtered +
578                                         pport_stats->rx_vlan_filtered;
579         drvs->rx_alignment_symbol_errors = pport_stats->rx_symbol_errors_lo;
580         drvs->rxpp_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
581         drvs->tx_pauseframes = pport_stats->tx_pause_frames_lo;
582         drvs->tx_controlframes = pport_stats->tx_control_frames_lo;
583         drvs->jabber_events = pport_stats->rx_jabbers;
584         drvs->forwarded_packets = pport_stats->num_forwards_lo;
585         drvs->rx_drops_mtu = pport_stats->rx_drops_mtu_lo;
586         drvs->rx_drops_too_many_frags =
587                                 pport_stats->rx_drops_too_many_frags_lo;
588 }
589
590 static void accumulate_16bit_val(u32 *acc, u16 val)
591 {
592 #define lo(x)                   (x & 0xFFFF)
593 #define hi(x)                   (x & 0xFFFF0000)
594         bool wrapped = val < lo(*acc);
595         u32 newacc = hi(*acc) + val;
596
597         if (wrapped)
598                 newacc += 65536;
599         ACCESS_ONCE(*acc) = newacc;
600 }
601
602 static void populate_erx_stats(struct be_adapter *adapter,
603                                struct be_rx_obj *rxo, u32 erx_stat)
604 {
605         if (!BEx_chip(adapter))
606                 rx_stats(rxo)->rx_drops_no_frags = erx_stat;
607         else
608                 /* below erx HW counter can actually wrap around after
609                  * 65535. Driver accumulates a 32-bit value
610                  */
611                 accumulate_16bit_val(&rx_stats(rxo)->rx_drops_no_frags,
612                                      (u16)erx_stat);
613 }
614
615 void be_parse_stats(struct be_adapter *adapter)
616 {
617         struct be_erx_stats_v2 *erx = be_erx_stats_from_cmd(adapter);
618         struct be_rx_obj *rxo;
619         int i;
620         u32 erx_stat;
621
622         if (lancer_chip(adapter)) {
623                 populate_lancer_stats(adapter);
624         } else {
625                 if (BE2_chip(adapter))
626                         populate_be_v0_stats(adapter);
627                 else if (BE3_chip(adapter))
628                         /* for BE3 */
629                         populate_be_v1_stats(adapter);
630                 else
631                         populate_be_v2_stats(adapter);
632
633                 /* erx_v2 is longer than v0, v1. use v2 for v0, v1 access */
634                 for_all_rx_queues(adapter, rxo, i) {
635                         erx_stat = erx->rx_drops_no_fragments[rxo->q.id];
636                         populate_erx_stats(adapter, rxo, erx_stat);
637                 }
638         }
639 }
640
641 static struct rtnl_link_stats64 *be_get_stats64(struct net_device *netdev,
642                                                 struct rtnl_link_stats64 *stats)
643 {
644         struct be_adapter *adapter = netdev_priv(netdev);
645         struct be_drv_stats *drvs = &adapter->drv_stats;
646         struct be_rx_obj *rxo;
647         struct be_tx_obj *txo;
648         u64 pkts, bytes;
649         unsigned int start;
650         int i;
651
652         for_all_rx_queues(adapter, rxo, i) {
653                 const struct be_rx_stats *rx_stats = rx_stats(rxo);
654
655                 do {
656                         start = u64_stats_fetch_begin_irq(&rx_stats->sync);
657                         pkts = rx_stats(rxo)->rx_pkts;
658                         bytes = rx_stats(rxo)->rx_bytes;
659                 } while (u64_stats_fetch_retry_irq(&rx_stats->sync, start));
660                 stats->rx_packets += pkts;
661                 stats->rx_bytes += bytes;
662                 stats->multicast += rx_stats(rxo)->rx_mcast_pkts;
663                 stats->rx_dropped += rx_stats(rxo)->rx_drops_no_skbs +
664                                         rx_stats(rxo)->rx_drops_no_frags;
665         }
666
667         for_all_tx_queues(adapter, txo, i) {
668                 const struct be_tx_stats *tx_stats = tx_stats(txo);
669
670                 do {
671                         start = u64_stats_fetch_begin_irq(&tx_stats->sync);
672                         pkts = tx_stats(txo)->tx_pkts;
673                         bytes = tx_stats(txo)->tx_bytes;
674                 } while (u64_stats_fetch_retry_irq(&tx_stats->sync, start));
675                 stats->tx_packets += pkts;
676                 stats->tx_bytes += bytes;
677         }
678
679         /* bad pkts received */
680         stats->rx_errors = drvs->rx_crc_errors +
681                 drvs->rx_alignment_symbol_errors +
682                 drvs->rx_in_range_errors +
683                 drvs->rx_out_range_errors +
684                 drvs->rx_frame_too_long +
685                 drvs->rx_dropped_too_small +
686                 drvs->rx_dropped_too_short +
687                 drvs->rx_dropped_header_too_small +
688                 drvs->rx_dropped_tcp_length +
689                 drvs->rx_dropped_runt;
690
691         /* detailed rx errors */
692         stats->rx_length_errors = drvs->rx_in_range_errors +
693                 drvs->rx_out_range_errors +
694                 drvs->rx_frame_too_long;
695
696         stats->rx_crc_errors = drvs->rx_crc_errors;
697
698         /* frame alignment errors */
699         stats->rx_frame_errors = drvs->rx_alignment_symbol_errors;
700
701         /* receiver fifo overrun */
702         /* drops_no_pbuf is no per i/f, it's per BE card */
703         stats->rx_fifo_errors = drvs->rxpp_fifo_overflow_drop +
704                                 drvs->rx_input_fifo_overflow_drop +
705                                 drvs->rx_drops_no_pbuf;
706         return stats;
707 }
708
709 void be_link_status_update(struct be_adapter *adapter, u8 link_status)
710 {
711         struct net_device *netdev = adapter->netdev;
712
713         if (!(adapter->flags & BE_FLAGS_LINK_STATUS_INIT)) {
714                 netif_carrier_off(netdev);
715                 adapter->flags |= BE_FLAGS_LINK_STATUS_INIT;
716         }
717
718         if (link_status)
719                 netif_carrier_on(netdev);
720         else
721                 netif_carrier_off(netdev);
722
723         netdev_info(netdev, "Link is %s\n", link_status ? "Up" : "Down");
724 }
725
726 static int be_gso_hdr_len(struct sk_buff *skb)
727 {
728         if (skb->encapsulation)
729                 return skb_inner_transport_offset(skb) +
730                        inner_tcp_hdrlen(skb);
731         return skb_transport_offset(skb) + tcp_hdrlen(skb);
732 }
733
734 static void be_tx_stats_update(struct be_tx_obj *txo, struct sk_buff *skb)
735 {
736         struct be_tx_stats *stats = tx_stats(txo);
737         u32 tx_pkts = skb_shinfo(skb)->gso_segs ? : 1;
738         /* Account for headers which get duplicated in TSO pkt */
739         u32 dup_hdr_len = tx_pkts > 1 ? be_gso_hdr_len(skb) * (tx_pkts - 1) : 0;
740
741         u64_stats_update_begin(&stats->sync);
742         stats->tx_reqs++;
743         stats->tx_bytes += skb->len + dup_hdr_len;
744         stats->tx_pkts += tx_pkts;
745         if (skb->encapsulation && skb->ip_summed == CHECKSUM_PARTIAL)
746                 stats->tx_vxlan_offload_pkts += tx_pkts;
747         u64_stats_update_end(&stats->sync);
748 }
749
750 /* Returns number of WRBs needed for the skb */
751 static u32 skb_wrb_cnt(struct sk_buff *skb)
752 {
753         /* +1 for the header wrb */
754         return 1 + (skb_headlen(skb) ? 1 : 0) + skb_shinfo(skb)->nr_frags;
755 }
756
757 static inline void wrb_fill(struct be_eth_wrb *wrb, u64 addr, int len)
758 {
759         wrb->frag_pa_hi = cpu_to_le32(upper_32_bits(addr));
760         wrb->frag_pa_lo = cpu_to_le32(lower_32_bits(addr));
761         wrb->frag_len = cpu_to_le32(len & ETH_WRB_FRAG_LEN_MASK);
762         wrb->rsvd0 = 0;
763 }
764
765 /* A dummy wrb is just all zeros. Using a separate routine for dummy-wrb
766  * to avoid the swap and shift/mask operations in wrb_fill().
767  */
768 static inline void wrb_fill_dummy(struct be_eth_wrb *wrb)
769 {
770         wrb->frag_pa_hi = 0;
771         wrb->frag_pa_lo = 0;
772         wrb->frag_len = 0;
773         wrb->rsvd0 = 0;
774 }
775
776 static inline u16 be_get_tx_vlan_tag(struct be_adapter *adapter,
777                                      struct sk_buff *skb)
778 {
779         u8 vlan_prio;
780         u16 vlan_tag;
781
782         vlan_tag = skb_vlan_tag_get(skb);
783         vlan_prio = (vlan_tag & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
784         /* If vlan priority provided by OS is NOT in available bmap */
785         if (!(adapter->vlan_prio_bmap & (1 << vlan_prio)))
786                 vlan_tag = (vlan_tag & ~VLAN_PRIO_MASK) |
787                                 adapter->recommended_prio_bits;
788
789         return vlan_tag;
790 }
791
792 /* Used only for IP tunnel packets */
793 static u16 skb_inner_ip_proto(struct sk_buff *skb)
794 {
795         return (inner_ip_hdr(skb)->version == 4) ?
796                 inner_ip_hdr(skb)->protocol : inner_ipv6_hdr(skb)->nexthdr;
797 }
798
799 static u16 skb_ip_proto(struct sk_buff *skb)
800 {
801         return (ip_hdr(skb)->version == 4) ?
802                 ip_hdr(skb)->protocol : ipv6_hdr(skb)->nexthdr;
803 }
804
805 static inline bool be_is_txq_full(struct be_tx_obj *txo)
806 {
807         return atomic_read(&txo->q.used) + BE_MAX_TX_FRAG_COUNT >= txo->q.len;
808 }
809
810 static inline bool be_can_txq_wake(struct be_tx_obj *txo)
811 {
812         return atomic_read(&txo->q.used) < txo->q.len / 2;
813 }
814
815 static inline bool be_is_tx_compl_pending(struct be_tx_obj *txo)
816 {
817         return atomic_read(&txo->q.used) > txo->pend_wrb_cnt;
818 }
819
820 static void be_get_wrb_params_from_skb(struct be_adapter *adapter,
821                                        struct sk_buff *skb,
822                                        struct be_wrb_params *wrb_params)
823 {
824         u16 proto;
825
826         if (skb_is_gso(skb)) {
827                 BE_WRB_F_SET(wrb_params->features, LSO, 1);
828                 wrb_params->lso_mss = skb_shinfo(skb)->gso_size;
829                 if (skb_is_gso_v6(skb) && !lancer_chip(adapter))
830                         BE_WRB_F_SET(wrb_params->features, LSO6, 1);
831         } else if (skb->ip_summed == CHECKSUM_PARTIAL) {
832                 if (skb->encapsulation) {
833                         BE_WRB_F_SET(wrb_params->features, IPCS, 1);
834                         proto = skb_inner_ip_proto(skb);
835                 } else {
836                         proto = skb_ip_proto(skb);
837                 }
838                 if (proto == IPPROTO_TCP)
839                         BE_WRB_F_SET(wrb_params->features, TCPCS, 1);
840                 else if (proto == IPPROTO_UDP)
841                         BE_WRB_F_SET(wrb_params->features, UDPCS, 1);
842         }
843
844         if (skb_vlan_tag_present(skb)) {
845                 BE_WRB_F_SET(wrb_params->features, VLAN, 1);
846                 wrb_params->vlan_tag = be_get_tx_vlan_tag(adapter, skb);
847         }
848
849         BE_WRB_F_SET(wrb_params->features, CRC, 1);
850 }
851
852 static void wrb_fill_hdr(struct be_adapter *adapter,
853                          struct be_eth_hdr_wrb *hdr,
854                          struct be_wrb_params *wrb_params,
855                          struct sk_buff *skb)
856 {
857         memset(hdr, 0, sizeof(*hdr));
858
859         SET_TX_WRB_HDR_BITS(crc, hdr,
860                             BE_WRB_F_GET(wrb_params->features, CRC));
861         SET_TX_WRB_HDR_BITS(ipcs, hdr,
862                             BE_WRB_F_GET(wrb_params->features, IPCS));
863         SET_TX_WRB_HDR_BITS(tcpcs, hdr,
864                             BE_WRB_F_GET(wrb_params->features, TCPCS));
865         SET_TX_WRB_HDR_BITS(udpcs, hdr,
866                             BE_WRB_F_GET(wrb_params->features, UDPCS));
867
868         SET_TX_WRB_HDR_BITS(lso, hdr,
869                             BE_WRB_F_GET(wrb_params->features, LSO));
870         SET_TX_WRB_HDR_BITS(lso6, hdr,
871                             BE_WRB_F_GET(wrb_params->features, LSO6));
872         SET_TX_WRB_HDR_BITS(lso_mss, hdr, wrb_params->lso_mss);
873
874         /* Hack to skip HW VLAN tagging needs evt = 1, compl = 0. When this
875          * hack is not needed, the evt bit is set while ringing DB.
876          */
877         SET_TX_WRB_HDR_BITS(event, hdr,
878                             BE_WRB_F_GET(wrb_params->features, VLAN_SKIP_HW));
879         SET_TX_WRB_HDR_BITS(vlan, hdr,
880                             BE_WRB_F_GET(wrb_params->features, VLAN));
881         SET_TX_WRB_HDR_BITS(vlan_tag, hdr, wrb_params->vlan_tag);
882
883         SET_TX_WRB_HDR_BITS(num_wrb, hdr, skb_wrb_cnt(skb));
884         SET_TX_WRB_HDR_BITS(len, hdr, skb->len);
885         SET_TX_WRB_HDR_BITS(mgmt, hdr,
886                             BE_WRB_F_GET(wrb_params->features, OS2BMC));
887 }
888
889 static void unmap_tx_frag(struct device *dev, struct be_eth_wrb *wrb,
890                           bool unmap_single)
891 {
892         dma_addr_t dma;
893         u32 frag_len = le32_to_cpu(wrb->frag_len);
894
895
896         dma = (u64)le32_to_cpu(wrb->frag_pa_hi) << 32 |
897                 (u64)le32_to_cpu(wrb->frag_pa_lo);
898         if (frag_len) {
899                 if (unmap_single)
900                         dma_unmap_single(dev, dma, frag_len, DMA_TO_DEVICE);
901                 else
902                         dma_unmap_page(dev, dma, frag_len, DMA_TO_DEVICE);
903         }
904 }
905
906 /* Grab a WRB header for xmit */
907 static u32 be_tx_get_wrb_hdr(struct be_tx_obj *txo)
908 {
909         u32 head = txo->q.head;
910
911         queue_head_inc(&txo->q);
912         return head;
913 }
914
915 /* Set up the WRB header for xmit */
916 static void be_tx_setup_wrb_hdr(struct be_adapter *adapter,
917                                 struct be_tx_obj *txo,
918                                 struct be_wrb_params *wrb_params,
919                                 struct sk_buff *skb, u16 head)
920 {
921         u32 num_frags = skb_wrb_cnt(skb);
922         struct be_queue_info *txq = &txo->q;
923         struct be_eth_hdr_wrb *hdr = queue_index_node(txq, head);
924
925         wrb_fill_hdr(adapter, hdr, wrb_params, skb);
926         be_dws_cpu_to_le(hdr, sizeof(*hdr));
927
928         BUG_ON(txo->sent_skb_list[head]);
929         txo->sent_skb_list[head] = skb;
930         txo->last_req_hdr = head;
931         atomic_add(num_frags, &txq->used);
932         txo->last_req_wrb_cnt = num_frags;
933         txo->pend_wrb_cnt += num_frags;
934 }
935
936 /* Setup a WRB fragment (buffer descriptor) for xmit */
937 static void be_tx_setup_wrb_frag(struct be_tx_obj *txo, dma_addr_t busaddr,
938                                  int len)
939 {
940         struct be_eth_wrb *wrb;
941         struct be_queue_info *txq = &txo->q;
942
943         wrb = queue_head_node(txq);
944         wrb_fill(wrb, busaddr, len);
945         queue_head_inc(txq);
946 }
947
948 /* Bring the queue back to the state it was in before be_xmit_enqueue() routine
949  * was invoked. The producer index is restored to the previous packet and the
950  * WRBs of the current packet are unmapped. Invoked to handle tx setup errors.
951  */
952 static void be_xmit_restore(struct be_adapter *adapter,
953                             struct be_tx_obj *txo, u32 head, bool map_single,
954                             u32 copied)
955 {
956         struct device *dev;
957         struct be_eth_wrb *wrb;
958         struct be_queue_info *txq = &txo->q;
959
960         dev = &adapter->pdev->dev;
961         txq->head = head;
962
963         /* skip the first wrb (hdr); it's not mapped */
964         queue_head_inc(txq);
965         while (copied) {
966                 wrb = queue_head_node(txq);
967                 unmap_tx_frag(dev, wrb, map_single);
968                 map_single = false;
969                 copied -= le32_to_cpu(wrb->frag_len);
970                 queue_head_inc(txq);
971         }
972
973         txq->head = head;
974 }
975
976 /* Enqueue the given packet for transmit. This routine allocates WRBs for the
977  * packet, dma maps the packet buffers and sets up the WRBs. Returns the number
978  * of WRBs used up by the packet.
979  */
980 static u32 be_xmit_enqueue(struct be_adapter *adapter, struct be_tx_obj *txo,
981                            struct sk_buff *skb,
982                            struct be_wrb_params *wrb_params)
983 {
984         u32 i, copied = 0, wrb_cnt = skb_wrb_cnt(skb);
985         struct device *dev = &adapter->pdev->dev;
986         struct be_queue_info *txq = &txo->q;
987         bool map_single = false;
988         u32 head = txq->head;
989         dma_addr_t busaddr;
990         int len;
991
992         head = be_tx_get_wrb_hdr(txo);
993
994         if (skb->len > skb->data_len) {
995                 len = skb_headlen(skb);
996
997                 busaddr = dma_map_single(dev, skb->data, len, DMA_TO_DEVICE);
998                 if (dma_mapping_error(dev, busaddr))
999                         goto dma_err;
1000                 map_single = true;
1001                 be_tx_setup_wrb_frag(txo, busaddr, len);
1002                 copied += len;
1003         }
1004
1005         for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1006                 const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
1007                 len = skb_frag_size(frag);
1008
1009                 busaddr = skb_frag_dma_map(dev, frag, 0, len, DMA_TO_DEVICE);
1010                 if (dma_mapping_error(dev, busaddr))
1011                         goto dma_err;
1012                 be_tx_setup_wrb_frag(txo, busaddr, len);
1013                 copied += len;
1014         }
1015
1016         be_tx_setup_wrb_hdr(adapter, txo, wrb_params, skb, head);
1017
1018         be_tx_stats_update(txo, skb);
1019         return wrb_cnt;
1020
1021 dma_err:
1022         adapter->drv_stats.dma_map_errors++;
1023         be_xmit_restore(adapter, txo, head, map_single, copied);
1024         return 0;
1025 }
1026
1027 static inline int qnq_async_evt_rcvd(struct be_adapter *adapter)
1028 {
1029         return adapter->flags & BE_FLAGS_QNQ_ASYNC_EVT_RCVD;
1030 }
1031
1032 static struct sk_buff *be_insert_vlan_in_pkt(struct be_adapter *adapter,
1033                                              struct sk_buff *skb,
1034                                              struct be_wrb_params
1035                                              *wrb_params)
1036 {
1037         u16 vlan_tag = 0;
1038
1039         skb = skb_share_check(skb, GFP_ATOMIC);
1040         if (unlikely(!skb))
1041                 return skb;
1042
1043         if (skb_vlan_tag_present(skb))
1044                 vlan_tag = be_get_tx_vlan_tag(adapter, skb);
1045
1046         if (qnq_async_evt_rcvd(adapter) && adapter->pvid) {
1047                 if (!vlan_tag)
1048                         vlan_tag = adapter->pvid;
1049                 /* f/w workaround to set skip_hw_vlan = 1, informs the F/W to
1050                  * skip VLAN insertion
1051                  */
1052                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1053         }
1054
1055         if (vlan_tag) {
1056                 skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1057                                                 vlan_tag);
1058                 if (unlikely(!skb))
1059                         return skb;
1060                 skb->vlan_tci = 0;
1061         }
1062
1063         /* Insert the outer VLAN, if any */
1064         if (adapter->qnq_vid) {
1065                 vlan_tag = adapter->qnq_vid;
1066                 skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1067                                                 vlan_tag);
1068                 if (unlikely(!skb))
1069                         return skb;
1070                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1071         }
1072
1073         return skb;
1074 }
1075
1076 static bool be_ipv6_exthdr_check(struct sk_buff *skb)
1077 {
1078         struct ethhdr *eh = (struct ethhdr *)skb->data;
1079         u16 offset = ETH_HLEN;
1080
1081         if (eh->h_proto == htons(ETH_P_IPV6)) {
1082                 struct ipv6hdr *ip6h = (struct ipv6hdr *)(skb->data + offset);
1083
1084                 offset += sizeof(struct ipv6hdr);
1085                 if (ip6h->nexthdr != NEXTHDR_TCP &&
1086                     ip6h->nexthdr != NEXTHDR_UDP) {
1087                         struct ipv6_opt_hdr *ehdr =
1088                                 (struct ipv6_opt_hdr *)(skb->data + offset);
1089
1090                         /* offending pkt: 2nd byte following IPv6 hdr is 0xff */
1091                         if (ehdr->hdrlen == 0xff)
1092                                 return true;
1093                 }
1094         }
1095         return false;
1096 }
1097
1098 static int be_vlan_tag_tx_chk(struct be_adapter *adapter, struct sk_buff *skb)
1099 {
1100         return skb_vlan_tag_present(skb) || adapter->pvid || adapter->qnq_vid;
1101 }
1102
1103 static int be_ipv6_tx_stall_chk(struct be_adapter *adapter, struct sk_buff *skb)
1104 {
1105         return BE3_chip(adapter) && be_ipv6_exthdr_check(skb);
1106 }
1107
1108 static struct sk_buff *be_lancer_xmit_workarounds(struct be_adapter *adapter,
1109                                                   struct sk_buff *skb,
1110                                                   struct be_wrb_params
1111                                                   *wrb_params)
1112 {
1113         struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
1114         unsigned int eth_hdr_len;
1115         struct iphdr *ip;
1116
1117         /* For padded packets, BE HW modifies tot_len field in IP header
1118          * incorrecly when VLAN tag is inserted by HW.
1119          * For padded packets, Lancer computes incorrect checksum.
1120          */
1121         eth_hdr_len = ntohs(skb->protocol) == ETH_P_8021Q ?
1122                                                 VLAN_ETH_HLEN : ETH_HLEN;
1123         if (skb->len <= 60 &&
1124             (lancer_chip(adapter) || skb_vlan_tag_present(skb)) &&
1125             is_ipv4_pkt(skb)) {
1126                 ip = (struct iphdr *)ip_hdr(skb);
1127                 pskb_trim(skb, eth_hdr_len + ntohs(ip->tot_len));
1128         }
1129
1130         /* If vlan tag is already inlined in the packet, skip HW VLAN
1131          * tagging in pvid-tagging mode
1132          */
1133         if (be_pvid_tagging_enabled(adapter) &&
1134             veh->h_vlan_proto == htons(ETH_P_8021Q))
1135                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1136
1137         /* HW has a bug wherein it will calculate CSUM for VLAN
1138          * pkts even though it is disabled.
1139          * Manually insert VLAN in pkt.
1140          */
1141         if (skb->ip_summed != CHECKSUM_PARTIAL &&
1142             skb_vlan_tag_present(skb)) {
1143                 skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1144                 if (unlikely(!skb))
1145                         goto err;
1146         }
1147
1148         /* HW may lockup when VLAN HW tagging is requested on
1149          * certain ipv6 packets. Drop such pkts if the HW workaround to
1150          * skip HW tagging is not enabled by FW.
1151          */
1152         if (unlikely(be_ipv6_tx_stall_chk(adapter, skb) &&
1153                      (adapter->pvid || adapter->qnq_vid) &&
1154                      !qnq_async_evt_rcvd(adapter)))
1155                 goto tx_drop;
1156
1157         /* Manual VLAN tag insertion to prevent:
1158          * ASIC lockup when the ASIC inserts VLAN tag into
1159          * certain ipv6 packets. Insert VLAN tags in driver,
1160          * and set event, completion, vlan bits accordingly
1161          * in the Tx WRB.
1162          */
1163         if (be_ipv6_tx_stall_chk(adapter, skb) &&
1164             be_vlan_tag_tx_chk(adapter, skb)) {
1165                 skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1166                 if (unlikely(!skb))
1167                         goto err;
1168         }
1169
1170         return skb;
1171 tx_drop:
1172         dev_kfree_skb_any(skb);
1173 err:
1174         return NULL;
1175 }
1176
1177 static struct sk_buff *be_xmit_workarounds(struct be_adapter *adapter,
1178                                            struct sk_buff *skb,
1179                                            struct be_wrb_params *wrb_params)
1180 {
1181         int err;
1182
1183         /* Lancer, SH and BE3 in SRIOV mode have a bug wherein
1184          * packets that are 32b or less may cause a transmit stall
1185          * on that port. The workaround is to pad such packets
1186          * (len <= 32 bytes) to a minimum length of 36b.
1187          */
1188         if (skb->len <= 32) {
1189                 if (skb_put_padto(skb, 36))
1190                         return NULL;
1191         }
1192
1193         if (BEx_chip(adapter) || lancer_chip(adapter)) {
1194                 skb = be_lancer_xmit_workarounds(adapter, skb, wrb_params);
1195                 if (!skb)
1196                         return NULL;
1197         }
1198
1199         /* The stack can send us skbs with length greater than
1200          * what the HW can handle. Trim the extra bytes.
1201          */
1202         WARN_ON_ONCE(skb->len > BE_MAX_GSO_SIZE);
1203         err = pskb_trim(skb, BE_MAX_GSO_SIZE);
1204         WARN_ON(err);
1205
1206         return skb;
1207 }
1208
1209 static void be_xmit_flush(struct be_adapter *adapter, struct be_tx_obj *txo)
1210 {
1211         struct be_queue_info *txq = &txo->q;
1212         struct be_eth_hdr_wrb *hdr = queue_index_node(txq, txo->last_req_hdr);
1213
1214         /* Mark the last request eventable if it hasn't been marked already */
1215         if (!(hdr->dw[2] & cpu_to_le32(TX_HDR_WRB_EVT)))
1216                 hdr->dw[2] |= cpu_to_le32(TX_HDR_WRB_EVT | TX_HDR_WRB_COMPL);
1217
1218         /* compose a dummy wrb if there are odd set of wrbs to notify */
1219         if (!lancer_chip(adapter) && (txo->pend_wrb_cnt & 1)) {
1220                 wrb_fill_dummy(queue_head_node(txq));
1221                 queue_head_inc(txq);
1222                 atomic_inc(&txq->used);
1223                 txo->pend_wrb_cnt++;
1224                 hdr->dw[2] &= ~cpu_to_le32(TX_HDR_WRB_NUM_MASK <<
1225                                            TX_HDR_WRB_NUM_SHIFT);
1226                 hdr->dw[2] |= cpu_to_le32((txo->last_req_wrb_cnt + 1) <<
1227                                           TX_HDR_WRB_NUM_SHIFT);
1228         }
1229         be_txq_notify(adapter, txo, txo->pend_wrb_cnt);
1230         txo->pend_wrb_cnt = 0;
1231 }
1232
1233 /* OS2BMC related */
1234
1235 #define DHCP_CLIENT_PORT        68
1236 #define DHCP_SERVER_PORT        67
1237 #define NET_BIOS_PORT1          137
1238 #define NET_BIOS_PORT2          138
1239 #define DHCPV6_RAS_PORT         547
1240
1241 #define is_mc_allowed_on_bmc(adapter, eh)       \
1242         (!is_multicast_filt_enabled(adapter) && \
1243          is_multicast_ether_addr(eh->h_dest) && \
1244          !is_broadcast_ether_addr(eh->h_dest))
1245
1246 #define is_bc_allowed_on_bmc(adapter, eh)       \
1247         (!is_broadcast_filt_enabled(adapter) && \
1248          is_broadcast_ether_addr(eh->h_dest))
1249
1250 #define is_arp_allowed_on_bmc(adapter, skb)     \
1251         (is_arp(skb) && is_arp_filt_enabled(adapter))
1252
1253 #define is_broadcast_packet(eh, adapter)        \
1254                 (is_multicast_ether_addr(eh->h_dest) && \
1255                 !compare_ether_addr(eh->h_dest, adapter->netdev->broadcast))
1256
1257 #define is_arp(skb)     (skb->protocol == htons(ETH_P_ARP))
1258
1259 #define is_arp_filt_enabled(adapter)    \
1260                 (adapter->bmc_filt_mask & (BMC_FILT_BROADCAST_ARP))
1261
1262 #define is_dhcp_client_filt_enabled(adapter)    \
1263                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_CLIENT)
1264
1265 #define is_dhcp_srvr_filt_enabled(adapter)      \
1266                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_SERVER)
1267
1268 #define is_nbios_filt_enabled(adapter)  \
1269                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_NET_BIOS)
1270
1271 #define is_ipv6_na_filt_enabled(adapter)        \
1272                 (adapter->bmc_filt_mask &       \
1273                         BMC_FILT_MULTICAST_IPV6_NEIGH_ADVER)
1274
1275 #define is_ipv6_ra_filt_enabled(adapter)        \
1276                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RA)
1277
1278 #define is_ipv6_ras_filt_enabled(adapter)       \
1279                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RAS)
1280
1281 #define is_broadcast_filt_enabled(adapter)      \
1282                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST)
1283
1284 #define is_multicast_filt_enabled(adapter)      \
1285                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST)
1286
1287 static bool be_send_pkt_to_bmc(struct be_adapter *adapter,
1288                                struct sk_buff **skb)
1289 {
1290         struct ethhdr *eh = (struct ethhdr *)(*skb)->data;
1291         bool os2bmc = false;
1292
1293         if (!be_is_os2bmc_enabled(adapter))
1294                 goto done;
1295
1296         if (!is_multicast_ether_addr(eh->h_dest))
1297                 goto done;
1298
1299         if (is_mc_allowed_on_bmc(adapter, eh) ||
1300             is_bc_allowed_on_bmc(adapter, eh) ||
1301             is_arp_allowed_on_bmc(adapter, (*skb))) {
1302                 os2bmc = true;
1303                 goto done;
1304         }
1305
1306         if ((*skb)->protocol == htons(ETH_P_IPV6)) {
1307                 struct ipv6hdr *hdr = ipv6_hdr((*skb));
1308                 u8 nexthdr = hdr->nexthdr;
1309
1310                 if (nexthdr == IPPROTO_ICMPV6) {
1311                         struct icmp6hdr *icmp6 = icmp6_hdr((*skb));
1312
1313                         switch (icmp6->icmp6_type) {
1314                         case NDISC_ROUTER_ADVERTISEMENT:
1315                                 os2bmc = is_ipv6_ra_filt_enabled(adapter);
1316                                 goto done;
1317                         case NDISC_NEIGHBOUR_ADVERTISEMENT:
1318                                 os2bmc = is_ipv6_na_filt_enabled(adapter);
1319                                 goto done;
1320                         default:
1321                                 break;
1322                         }
1323                 }
1324         }
1325
1326         if (is_udp_pkt((*skb))) {
1327                 struct udphdr *udp = udp_hdr((*skb));
1328
1329                 switch (ntohs(udp->dest)) {
1330                 case DHCP_CLIENT_PORT:
1331                         os2bmc = is_dhcp_client_filt_enabled(adapter);
1332                         goto done;
1333                 case DHCP_SERVER_PORT:
1334                         os2bmc = is_dhcp_srvr_filt_enabled(adapter);
1335                         goto done;
1336                 case NET_BIOS_PORT1:
1337                 case NET_BIOS_PORT2:
1338                         os2bmc = is_nbios_filt_enabled(adapter);
1339                         goto done;
1340                 case DHCPV6_RAS_PORT:
1341                         os2bmc = is_ipv6_ras_filt_enabled(adapter);
1342                         goto done;
1343                 default:
1344                         break;
1345                 }
1346         }
1347 done:
1348         /* For packets over a vlan, which are destined
1349          * to BMC, asic expects the vlan to be inline in the packet.
1350          */
1351         if (os2bmc)
1352                 *skb = be_insert_vlan_in_pkt(adapter, *skb, NULL);
1353
1354         return os2bmc;
1355 }
1356
1357 static netdev_tx_t be_xmit(struct sk_buff *skb, struct net_device *netdev)
1358 {
1359         struct be_adapter *adapter = netdev_priv(netdev);
1360         u16 q_idx = skb_get_queue_mapping(skb);
1361         struct be_tx_obj *txo = &adapter->tx_obj[q_idx];
1362         struct be_wrb_params wrb_params = { 0 };
1363         bool flush = !skb->xmit_more;
1364         u16 wrb_cnt;
1365
1366         skb = be_xmit_workarounds(adapter, skb, &wrb_params);
1367         if (unlikely(!skb))
1368                 goto drop;
1369
1370         be_get_wrb_params_from_skb(adapter, skb, &wrb_params);
1371
1372         wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1373         if (unlikely(!wrb_cnt)) {
1374                 dev_kfree_skb_any(skb);
1375                 goto drop;
1376         }
1377
1378         /* if os2bmc is enabled and if the pkt is destined to bmc,
1379          * enqueue the pkt a 2nd time with mgmt bit set.
1380          */
1381         if (be_send_pkt_to_bmc(adapter, &skb)) {
1382                 BE_WRB_F_SET(wrb_params.features, OS2BMC, 1);
1383                 wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1384                 if (unlikely(!wrb_cnt))
1385                         goto drop;
1386                 else
1387                         skb_get(skb);
1388         }
1389
1390         if (be_is_txq_full(txo)) {
1391                 netif_stop_subqueue(netdev, q_idx);
1392                 tx_stats(txo)->tx_stops++;
1393         }
1394
1395         if (flush || __netif_subqueue_stopped(netdev, q_idx))
1396                 be_xmit_flush(adapter, txo);
1397
1398         return NETDEV_TX_OK;
1399 drop:
1400         tx_stats(txo)->tx_drv_drops++;
1401         /* Flush the already enqueued tx requests */
1402         if (flush && txo->pend_wrb_cnt)
1403                 be_xmit_flush(adapter, txo);
1404
1405         return NETDEV_TX_OK;
1406 }
1407
1408 static inline bool be_in_all_promisc(struct be_adapter *adapter)
1409 {
1410         return (adapter->if_flags & BE_IF_FLAGS_ALL_PROMISCUOUS) ==
1411                         BE_IF_FLAGS_ALL_PROMISCUOUS;
1412 }
1413
1414 static int be_set_vlan_promisc(struct be_adapter *adapter)
1415 {
1416         struct device *dev = &adapter->pdev->dev;
1417         int status;
1418
1419         if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS)
1420                 return 0;
1421
1422         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, ON);
1423         if (!status) {
1424                 dev_info(dev, "Enabled VLAN promiscuous mode\n");
1425                 adapter->if_flags |= BE_IF_FLAGS_VLAN_PROMISCUOUS;
1426         } else {
1427                 dev_err(dev, "Failed to enable VLAN promiscuous mode\n");
1428         }
1429         return status;
1430 }
1431
1432 static int be_clear_vlan_promisc(struct be_adapter *adapter)
1433 {
1434         struct device *dev = &adapter->pdev->dev;
1435         int status;
1436
1437         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, OFF);
1438         if (!status) {
1439                 dev_info(dev, "Disabling VLAN promiscuous mode\n");
1440                 adapter->if_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
1441         }
1442         return status;
1443 }
1444
1445 /*
1446  * A max of 64 (BE_NUM_VLANS_SUPPORTED) vlans can be configured in BE.
1447  * If the user configures more, place BE in vlan promiscuous mode.
1448  */
1449 static int be_vid_config(struct be_adapter *adapter)
1450 {
1451         struct device *dev = &adapter->pdev->dev;
1452         u16 vids[BE_NUM_VLANS_SUPPORTED];
1453         u16 num = 0, i = 0;
1454         int status = 0;
1455
1456         /* No need to change the VLAN state if the I/F is in promiscuous */
1457         if (adapter->netdev->flags & IFF_PROMISC)
1458                 return 0;
1459
1460         if (adapter->vlans_added > be_max_vlans(adapter))
1461                 return be_set_vlan_promisc(adapter);
1462
1463         if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
1464                 status = be_clear_vlan_promisc(adapter);
1465                 if (status)
1466                         return status;
1467         }
1468         /* Construct VLAN Table to give to HW */
1469         for_each_set_bit(i, adapter->vids, VLAN_N_VID)
1470                 vids[num++] = cpu_to_le16(i);
1471
1472         status = be_cmd_vlan_config(adapter, adapter->if_handle, vids, num, 0);
1473         if (status) {
1474                 dev_err(dev, "Setting HW VLAN filtering failed\n");
1475                 /* Set to VLAN promisc mode as setting VLAN filter failed */
1476                 if (addl_status(status) == MCC_ADDL_STATUS_INSUFFICIENT_VLANS ||
1477                     addl_status(status) ==
1478                                 MCC_ADDL_STATUS_INSUFFICIENT_RESOURCES)
1479                         return be_set_vlan_promisc(adapter);
1480         }
1481         return status;
1482 }
1483
1484 static int be_vlan_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
1485 {
1486         struct be_adapter *adapter = netdev_priv(netdev);
1487         int status = 0;
1488
1489         mutex_lock(&adapter->rx_filter_lock);
1490
1491         /* Packets with VID 0 are always received by Lancer by default */
1492         if (lancer_chip(adapter) && vid == 0)
1493                 goto done;
1494
1495         if (test_bit(vid, adapter->vids))
1496                 goto done;
1497
1498         set_bit(vid, adapter->vids);
1499         adapter->vlans_added++;
1500
1501         status = be_vid_config(adapter);
1502 done:
1503         mutex_unlock(&adapter->rx_filter_lock);
1504         return status;
1505 }
1506
1507 static int be_vlan_rem_vid(struct net_device *netdev, __be16 proto, u16 vid)
1508 {
1509         struct be_adapter *adapter = netdev_priv(netdev);
1510         int status = 0;
1511
1512         mutex_lock(&adapter->rx_filter_lock);
1513
1514         /* Packets with VID 0 are always received by Lancer by default */
1515         if (lancer_chip(adapter) && vid == 0)
1516                 goto done;
1517
1518         if (!test_bit(vid, adapter->vids))
1519                 goto done;
1520
1521         clear_bit(vid, adapter->vids);
1522         adapter->vlans_added--;
1523
1524         status = be_vid_config(adapter);
1525 done:
1526         mutex_unlock(&adapter->rx_filter_lock);
1527         return status;
1528 }
1529
1530 static void be_set_all_promisc(struct be_adapter *adapter)
1531 {
1532         be_cmd_rx_filter(adapter, BE_IF_FLAGS_ALL_PROMISCUOUS, ON);
1533         adapter->if_flags |= BE_IF_FLAGS_ALL_PROMISCUOUS;
1534 }
1535
1536 static void be_set_mc_promisc(struct be_adapter *adapter)
1537 {
1538         int status;
1539
1540         if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS)
1541                 return;
1542
1543         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MCAST_PROMISCUOUS, ON);
1544         if (!status)
1545                 adapter->if_flags |= BE_IF_FLAGS_MCAST_PROMISCUOUS;
1546 }
1547
1548 static void be_set_uc_promisc(struct be_adapter *adapter)
1549 {
1550         int status;
1551
1552         if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS)
1553                 return;
1554
1555         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, ON);
1556         if (!status)
1557                 adapter->if_flags |= BE_IF_FLAGS_PROMISCUOUS;
1558 }
1559
1560 static void be_clear_uc_promisc(struct be_adapter *adapter)
1561 {
1562         int status;
1563
1564         if (!(adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS))
1565                 return;
1566
1567         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, OFF);
1568         if (!status)
1569                 adapter->if_flags &= ~BE_IF_FLAGS_PROMISCUOUS;
1570 }
1571
1572 /* The below 2 functions are the callback args for __dev_mc_sync/dev_uc_sync().
1573  * We use a single callback function for both sync and unsync. We really don't
1574  * add/remove addresses through this callback. But, we use it to detect changes
1575  * to the uc/mc lists. The entire uc/mc list is programmed in be_set_rx_mode().
1576  */
1577 static int be_uc_list_update(struct net_device *netdev,
1578                              const unsigned char *addr)
1579 {
1580         struct be_adapter *adapter = netdev_priv(netdev);
1581
1582         adapter->update_uc_list = true;
1583         return 0;
1584 }
1585
1586 static int be_mc_list_update(struct net_device *netdev,
1587                              const unsigned char *addr)
1588 {
1589         struct be_adapter *adapter = netdev_priv(netdev);
1590
1591         adapter->update_mc_list = true;
1592         return 0;
1593 }
1594
1595 static void be_set_mc_list(struct be_adapter *adapter)
1596 {
1597         struct net_device *netdev = adapter->netdev;
1598         struct netdev_hw_addr *ha;
1599         bool mc_promisc = false;
1600         int status;
1601
1602         netif_addr_lock_bh(netdev);
1603         __dev_mc_sync(netdev, be_mc_list_update, be_mc_list_update);
1604
1605         if (netdev->flags & IFF_PROMISC) {
1606                 adapter->update_mc_list = false;
1607         } else if (netdev->flags & IFF_ALLMULTI ||
1608                    netdev_mc_count(netdev) > be_max_mc(adapter)) {
1609                 /* Enable multicast promisc if num configured exceeds
1610                  * what we support
1611                  */
1612                 mc_promisc = true;
1613                 adapter->update_mc_list = false;
1614         } else if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS) {
1615                 /* Update mc-list unconditionally if the iface was previously
1616                  * in mc-promisc mode and now is out of that mode.
1617                  */
1618                 adapter->update_mc_list = true;
1619         }
1620
1621         if (adapter->update_mc_list) {
1622                 int i = 0;
1623
1624                 /* cache the mc-list in adapter */
1625                 netdev_for_each_mc_addr(ha, netdev) {
1626                         ether_addr_copy(adapter->mc_list[i].mac, ha->addr);
1627                         i++;
1628                 }
1629                 adapter->mc_count = netdev_mc_count(netdev);
1630         }
1631         netif_addr_unlock_bh(netdev);
1632
1633         if (mc_promisc) {
1634                 be_set_mc_promisc(adapter);
1635         } else if (adapter->update_mc_list) {
1636                 status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, ON);
1637                 if (!status)
1638                         adapter->if_flags &= ~BE_IF_FLAGS_MCAST_PROMISCUOUS;
1639                 else
1640                         be_set_mc_promisc(adapter);
1641
1642                 adapter->update_mc_list = false;
1643         }
1644 }
1645
1646 static void be_clear_mc_list(struct be_adapter *adapter)
1647 {
1648         struct net_device *netdev = adapter->netdev;
1649
1650         __dev_mc_unsync(netdev, NULL);
1651         be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, OFF);
1652         adapter->mc_count = 0;
1653 }
1654
1655 static int be_uc_mac_add(struct be_adapter *adapter, int uc_idx)
1656 {
1657         if (ether_addr_equal(adapter->uc_list[uc_idx].mac, adapter->dev_mac)) {
1658                 adapter->pmac_id[uc_idx + 1] = adapter->pmac_id[0];
1659                 return 0;
1660         }
1661
1662         return be_cmd_pmac_add(adapter, adapter->uc_list[uc_idx].mac,
1663                                adapter->if_handle,
1664                                &adapter->pmac_id[uc_idx + 1], 0);
1665 }
1666
1667 static void be_uc_mac_del(struct be_adapter *adapter, int pmac_id)
1668 {
1669         if (pmac_id == adapter->pmac_id[0])
1670                 return;
1671
1672         be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
1673 }
1674
1675 static void be_set_uc_list(struct be_adapter *adapter)
1676 {
1677         struct net_device *netdev = adapter->netdev;
1678         struct netdev_hw_addr *ha;
1679         bool uc_promisc = false;
1680         int curr_uc_macs = 0, i;
1681
1682         netif_addr_lock_bh(netdev);
1683         __dev_uc_sync(netdev, be_uc_list_update, be_uc_list_update);
1684
1685         if (netdev->flags & IFF_PROMISC) {
1686                 adapter->update_uc_list = false;
1687         } else if (netdev_uc_count(netdev) > (be_max_uc(adapter) - 1)) {
1688                 uc_promisc = true;
1689                 adapter->update_uc_list = false;
1690         }  else if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS) {
1691                 /* Update uc-list unconditionally if the iface was previously
1692                  * in uc-promisc mode and now is out of that mode.
1693                  */
1694                 adapter->update_uc_list = true;
1695         }
1696
1697         if (adapter->update_uc_list) {
1698                 i = 1; /* First slot is claimed by the Primary MAC */
1699
1700                 /* cache the uc-list in adapter array */
1701                 netdev_for_each_uc_addr(ha, netdev) {
1702                         ether_addr_copy(adapter->uc_list[i].mac, ha->addr);
1703                         i++;
1704                 }
1705                 curr_uc_macs = netdev_uc_count(netdev);
1706         }
1707         netif_addr_unlock_bh(netdev);
1708
1709         if (uc_promisc) {
1710                 be_set_uc_promisc(adapter);
1711         } else if (adapter->update_uc_list) {
1712                 be_clear_uc_promisc(adapter);
1713
1714                 for (i = 0; i < adapter->uc_macs; i++)
1715                         be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1716
1717                 for (i = 0; i < curr_uc_macs; i++)
1718                         be_uc_mac_add(adapter, i);
1719                 adapter->uc_macs = curr_uc_macs;
1720                 adapter->update_uc_list = false;
1721         }
1722 }
1723
1724 static void be_clear_uc_list(struct be_adapter *adapter)
1725 {
1726         struct net_device *netdev = adapter->netdev;
1727         int i;
1728
1729         __dev_uc_unsync(netdev, NULL);
1730         for (i = 0; i < adapter->uc_macs; i++)
1731                 be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1732
1733         adapter->uc_macs = 0;
1734 }
1735
1736 static void __be_set_rx_mode(struct be_adapter *adapter)
1737 {
1738         struct net_device *netdev = adapter->netdev;
1739
1740         mutex_lock(&adapter->rx_filter_lock);
1741
1742         if (netdev->flags & IFF_PROMISC) {
1743                 if (!be_in_all_promisc(adapter))
1744                         be_set_all_promisc(adapter);
1745         } else if (be_in_all_promisc(adapter)) {
1746                 /* We need to re-program the vlan-list or clear
1747                  * vlan-promisc mode (if needed) when the interface
1748                  * comes out of promisc mode.
1749                  */
1750                 be_vid_config(adapter);
1751         }
1752
1753         be_set_uc_list(adapter);
1754         be_set_mc_list(adapter);
1755
1756         mutex_unlock(&adapter->rx_filter_lock);
1757 }
1758
1759 static void be_work_set_rx_mode(struct work_struct *work)
1760 {
1761         struct be_cmd_work *cmd_work =
1762                                 container_of(work, struct be_cmd_work, work);
1763
1764         __be_set_rx_mode(cmd_work->adapter);
1765         kfree(cmd_work);
1766 }
1767
1768 static int be_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
1769 {
1770         struct be_adapter *adapter = netdev_priv(netdev);
1771         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1772         int status;
1773
1774         if (!sriov_enabled(adapter))
1775                 return -EPERM;
1776
1777         if (!is_valid_ether_addr(mac) || vf >= adapter->num_vfs)
1778                 return -EINVAL;
1779
1780         /* Proceed further only if user provided MAC is different
1781          * from active MAC
1782          */
1783         if (ether_addr_equal(mac, vf_cfg->mac_addr))
1784                 return 0;
1785
1786         if (BEx_chip(adapter)) {
1787                 be_cmd_pmac_del(adapter, vf_cfg->if_handle, vf_cfg->pmac_id,
1788                                 vf + 1);
1789
1790                 status = be_cmd_pmac_add(adapter, mac, vf_cfg->if_handle,
1791                                          &vf_cfg->pmac_id, vf + 1);
1792         } else {
1793                 status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
1794                                         vf + 1);
1795         }
1796
1797         if (status) {
1798                 dev_err(&adapter->pdev->dev, "MAC %pM set on VF %d Failed: %#x",
1799                         mac, vf, status);
1800                 return be_cmd_status(status);
1801         }
1802
1803         ether_addr_copy(vf_cfg->mac_addr, mac);
1804
1805         return 0;
1806 }
1807
1808 static int be_get_vf_config(struct net_device *netdev, int vf,
1809                             struct ifla_vf_info *vi)
1810 {
1811         struct be_adapter *adapter = netdev_priv(netdev);
1812         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1813
1814         if (!sriov_enabled(adapter))
1815                 return -EPERM;
1816
1817         if (vf >= adapter->num_vfs)
1818                 return -EINVAL;
1819
1820         vi->vf = vf;
1821         vi->max_tx_rate = vf_cfg->tx_rate;
1822         vi->min_tx_rate = 0;
1823         vi->vlan = vf_cfg->vlan_tag & VLAN_VID_MASK;
1824         vi->qos = vf_cfg->vlan_tag >> VLAN_PRIO_SHIFT;
1825         memcpy(&vi->mac, vf_cfg->mac_addr, ETH_ALEN);
1826         vi->linkstate = adapter->vf_cfg[vf].plink_tracking;
1827         vi->spoofchk = adapter->vf_cfg[vf].spoofchk;
1828
1829         return 0;
1830 }
1831
1832 static int be_set_vf_tvt(struct be_adapter *adapter, int vf, u16 vlan)
1833 {
1834         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1835         u16 vids[BE_NUM_VLANS_SUPPORTED];
1836         int vf_if_id = vf_cfg->if_handle;
1837         int status;
1838
1839         /* Enable Transparent VLAN Tagging */
1840         status = be_cmd_set_hsw_config(adapter, vlan, vf + 1, vf_if_id, 0, 0);
1841         if (status)
1842                 return status;
1843
1844         /* Clear pre-programmed VLAN filters on VF if any, if TVT is enabled */
1845         vids[0] = 0;
1846         status = be_cmd_vlan_config(adapter, vf_if_id, vids, 1, vf + 1);
1847         if (!status)
1848                 dev_info(&adapter->pdev->dev,
1849                          "Cleared guest VLANs on VF%d", vf);
1850
1851         /* After TVT is enabled, disallow VFs to program VLAN filters */
1852         if (vf_cfg->privileges & BE_PRIV_FILTMGMT) {
1853                 status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges &
1854                                                   ~BE_PRIV_FILTMGMT, vf + 1);
1855                 if (!status)
1856                         vf_cfg->privileges &= ~BE_PRIV_FILTMGMT;
1857         }
1858         return 0;
1859 }
1860
1861 static int be_clear_vf_tvt(struct be_adapter *adapter, int vf)
1862 {
1863         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1864         struct device *dev = &adapter->pdev->dev;
1865         int status;
1866
1867         /* Reset Transparent VLAN Tagging. */
1868         status = be_cmd_set_hsw_config(adapter, BE_RESET_VLAN_TAG_ID, vf + 1,
1869                                        vf_cfg->if_handle, 0, 0);
1870         if (status)
1871                 return status;
1872
1873         /* Allow VFs to program VLAN filtering */
1874         if (!(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
1875                 status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges |
1876                                                   BE_PRIV_FILTMGMT, vf + 1);
1877                 if (!status) {
1878                         vf_cfg->privileges |= BE_PRIV_FILTMGMT;
1879                         dev_info(dev, "VF%d: FILTMGMT priv enabled", vf);
1880                 }
1881         }
1882
1883         dev_info(dev,
1884                  "Disable/re-enable i/f in VM to clear Transparent VLAN tag");
1885         return 0;
1886 }
1887
1888 static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos,
1889                           __be16 vlan_proto)
1890 {
1891         struct be_adapter *adapter = netdev_priv(netdev);
1892         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1893         int status;
1894
1895         if (!sriov_enabled(adapter))
1896                 return -EPERM;
1897
1898         if (vf >= adapter->num_vfs || vlan > 4095 || qos > 7)
1899                 return -EINVAL;
1900
1901         if (vlan_proto != htons(ETH_P_8021Q))
1902                 return -EPROTONOSUPPORT;
1903
1904         if (vlan || qos) {
1905                 vlan |= qos << VLAN_PRIO_SHIFT;
1906                 status = be_set_vf_tvt(adapter, vf, vlan);
1907         } else {
1908                 status = be_clear_vf_tvt(adapter, vf);
1909         }
1910
1911         if (status) {
1912                 dev_err(&adapter->pdev->dev,
1913                         "VLAN %d config on VF %d failed : %#x\n", vlan, vf,
1914                         status);
1915                 return be_cmd_status(status);
1916         }
1917
1918         vf_cfg->vlan_tag = vlan;
1919         return 0;
1920 }
1921
1922 static int be_set_vf_tx_rate(struct net_device *netdev, int vf,
1923                              int min_tx_rate, int max_tx_rate)
1924 {
1925         struct be_adapter *adapter = netdev_priv(netdev);
1926         struct device *dev = &adapter->pdev->dev;
1927         int percent_rate, status = 0;
1928         u16 link_speed = 0;
1929         u8 link_status;
1930
1931         if (!sriov_enabled(adapter))
1932                 return -EPERM;
1933
1934         if (vf >= adapter->num_vfs)
1935                 return -EINVAL;
1936
1937         if (min_tx_rate)
1938                 return -EINVAL;
1939
1940         if (!max_tx_rate)
1941                 goto config_qos;
1942
1943         status = be_cmd_link_status_query(adapter, &link_speed,
1944                                           &link_status, 0);
1945         if (status)
1946                 goto err;
1947
1948         if (!link_status) {
1949                 dev_err(dev, "TX-rate setting not allowed when link is down\n");
1950                 status = -ENETDOWN;
1951                 goto err;
1952         }
1953
1954         if (max_tx_rate < 100 || max_tx_rate > link_speed) {
1955                 dev_err(dev, "TX-rate must be between 100 and %d Mbps\n",
1956                         link_speed);
1957                 status = -EINVAL;
1958                 goto err;
1959         }
1960
1961         /* On Skyhawk the QOS setting must be done only as a % value */
1962         percent_rate = link_speed / 100;
1963         if (skyhawk_chip(adapter) && (max_tx_rate % percent_rate)) {
1964                 dev_err(dev, "TX-rate must be a multiple of %d Mbps\n",
1965                         percent_rate);
1966                 status = -EINVAL;
1967                 goto err;
1968         }
1969
1970 config_qos:
1971         status = be_cmd_config_qos(adapter, max_tx_rate, link_speed, vf + 1);
1972         if (status)
1973                 goto err;
1974
1975         adapter->vf_cfg[vf].tx_rate = max_tx_rate;
1976         return 0;
1977
1978 err:
1979         dev_err(dev, "TX-rate setting of %dMbps on VF%d failed\n",
1980                 max_tx_rate, vf);
1981         return be_cmd_status(status);
1982 }
1983
1984 static int be_set_vf_link_state(struct net_device *netdev, int vf,
1985                                 int link_state)
1986 {
1987         struct be_adapter *adapter = netdev_priv(netdev);
1988         int status;
1989
1990         if (!sriov_enabled(adapter))
1991                 return -EPERM;
1992
1993         if (vf >= adapter->num_vfs)
1994                 return -EINVAL;
1995
1996         status = be_cmd_set_logical_link_config(adapter, link_state, vf+1);
1997         if (status) {
1998                 dev_err(&adapter->pdev->dev,
1999                         "Link state change on VF %d failed: %#x\n", vf, status);
2000                 return be_cmd_status(status);
2001         }
2002
2003         adapter->vf_cfg[vf].plink_tracking = link_state;
2004
2005         return 0;
2006 }
2007
2008 static int be_set_vf_spoofchk(struct net_device *netdev, int vf, bool enable)
2009 {
2010         struct be_adapter *adapter = netdev_priv(netdev);
2011         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
2012         u8 spoofchk;
2013         int status;
2014
2015         if (!sriov_enabled(adapter))
2016                 return -EPERM;
2017
2018         if (vf >= adapter->num_vfs)
2019                 return -EINVAL;
2020
2021         if (BEx_chip(adapter))
2022                 return -EOPNOTSUPP;
2023
2024         if (enable == vf_cfg->spoofchk)
2025                 return 0;
2026
2027         spoofchk = enable ? ENABLE_MAC_SPOOFCHK : DISABLE_MAC_SPOOFCHK;
2028
2029         status = be_cmd_set_hsw_config(adapter, 0, vf + 1, vf_cfg->if_handle,
2030                                        0, spoofchk);
2031         if (status) {
2032                 dev_err(&adapter->pdev->dev,
2033                         "Spoofchk change on VF %d failed: %#x\n", vf, status);
2034                 return be_cmd_status(status);
2035         }
2036
2037         vf_cfg->spoofchk = enable;
2038         return 0;
2039 }
2040
2041 static void be_aic_update(struct be_aic_obj *aic, u64 rx_pkts, u64 tx_pkts,
2042                           ulong now)
2043 {
2044         aic->rx_pkts_prev = rx_pkts;
2045         aic->tx_reqs_prev = tx_pkts;
2046         aic->jiffies = now;
2047 }
2048
2049 static int be_get_new_eqd(struct be_eq_obj *eqo)
2050 {
2051         struct be_adapter *adapter = eqo->adapter;
2052         int eqd, start;
2053         struct be_aic_obj *aic;
2054         struct be_rx_obj *rxo;
2055         struct be_tx_obj *txo;
2056         u64 rx_pkts = 0, tx_pkts = 0;
2057         ulong now;
2058         u32 pps, delta;
2059         int i;
2060
2061         aic = &adapter->aic_obj[eqo->idx];
2062         if (!aic->enable) {
2063                 if (aic->jiffies)
2064                         aic->jiffies = 0;
2065                 eqd = aic->et_eqd;
2066                 return eqd;
2067         }
2068
2069         for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
2070                 do {
2071                         start = u64_stats_fetch_begin_irq(&rxo->stats.sync);
2072                         rx_pkts += rxo->stats.rx_pkts;
2073                 } while (u64_stats_fetch_retry_irq(&rxo->stats.sync, start));
2074         }
2075
2076         for_all_tx_queues_on_eq(adapter, eqo, txo, i) {
2077                 do {
2078                         start = u64_stats_fetch_begin_irq(&txo->stats.sync);
2079                         tx_pkts += txo->stats.tx_reqs;
2080                 } while (u64_stats_fetch_retry_irq(&txo->stats.sync, start));
2081         }
2082
2083         /* Skip, if wrapped around or first calculation */
2084         now = jiffies;
2085         if (!aic->jiffies || time_before(now, aic->jiffies) ||
2086             rx_pkts < aic->rx_pkts_prev ||
2087             tx_pkts < aic->tx_reqs_prev) {
2088                 be_aic_update(aic, rx_pkts, tx_pkts, now);
2089                 return aic->prev_eqd;
2090         }
2091
2092         delta = jiffies_to_msecs(now - aic->jiffies);
2093         if (delta == 0)
2094                 return aic->prev_eqd;
2095
2096         pps = (((u32)(rx_pkts - aic->rx_pkts_prev) * 1000) / delta) +
2097                 (((u32)(tx_pkts - aic->tx_reqs_prev) * 1000) / delta);
2098         eqd = (pps / 15000) << 2;
2099
2100         if (eqd < 8)
2101                 eqd = 0;
2102         eqd = min_t(u32, eqd, aic->max_eqd);
2103         eqd = max_t(u32, eqd, aic->min_eqd);
2104
2105         be_aic_update(aic, rx_pkts, tx_pkts, now);
2106
2107         return eqd;
2108 }
2109
2110 /* For Skyhawk-R only */
2111 static u32 be_get_eq_delay_mult_enc(struct be_eq_obj *eqo)
2112 {
2113         struct be_adapter *adapter = eqo->adapter;
2114         struct be_aic_obj *aic = &adapter->aic_obj[eqo->idx];
2115         ulong now = jiffies;
2116         int eqd;
2117         u32 mult_enc;
2118
2119         if (!aic->enable)
2120                 return 0;
2121
2122         if (jiffies_to_msecs(now - aic->jiffies) < 1)
2123                 eqd = aic->prev_eqd;
2124         else
2125                 eqd = be_get_new_eqd(eqo);
2126
2127         if (eqd > 100)
2128                 mult_enc = R2I_DLY_ENC_1;
2129         else if (eqd > 60)
2130                 mult_enc = R2I_DLY_ENC_2;
2131         else if (eqd > 20)
2132                 mult_enc = R2I_DLY_ENC_3;
2133         else
2134                 mult_enc = R2I_DLY_ENC_0;
2135
2136         aic->prev_eqd = eqd;
2137
2138         return mult_enc;
2139 }
2140
2141 void be_eqd_update(struct be_adapter *adapter, bool force_update)
2142 {
2143         struct be_set_eqd set_eqd[MAX_EVT_QS];
2144         struct be_aic_obj *aic;
2145         struct be_eq_obj *eqo;
2146         int i, num = 0, eqd;
2147
2148         for_all_evt_queues(adapter, eqo, i) {
2149                 aic = &adapter->aic_obj[eqo->idx];
2150                 eqd = be_get_new_eqd(eqo);
2151                 if (force_update || eqd != aic->prev_eqd) {
2152                         set_eqd[num].delay_multiplier = (eqd * 65)/100;
2153                         set_eqd[num].eq_id = eqo->q.id;
2154                         aic->prev_eqd = eqd;
2155                         num++;
2156                 }
2157         }
2158
2159         if (num)
2160                 be_cmd_modify_eqd(adapter, set_eqd, num);
2161 }
2162
2163 static void be_rx_stats_update(struct be_rx_obj *rxo,
2164                                struct be_rx_compl_info *rxcp)
2165 {
2166         struct be_rx_stats *stats = rx_stats(rxo);
2167
2168         u64_stats_update_begin(&stats->sync);
2169         stats->rx_compl++;
2170         stats->rx_bytes += rxcp->pkt_size;
2171         stats->rx_pkts++;
2172         if (rxcp->tunneled)
2173                 stats->rx_vxlan_offload_pkts++;
2174         if (rxcp->pkt_type == BE_MULTICAST_PACKET)
2175                 stats->rx_mcast_pkts++;
2176         if (rxcp->err)
2177                 stats->rx_compl_err++;
2178         u64_stats_update_end(&stats->sync);
2179 }
2180
2181 static inline bool csum_passed(struct be_rx_compl_info *rxcp)
2182 {
2183         /* L4 checksum is not reliable for non TCP/UDP packets.
2184          * Also ignore ipcksm for ipv6 pkts
2185          */
2186         return (rxcp->tcpf || rxcp->udpf) && rxcp->l4_csum &&
2187                 (rxcp->ip_csum || rxcp->ipv6) && !rxcp->err;
2188 }
2189
2190 static struct be_rx_page_info *get_rx_page_info(struct be_rx_obj *rxo)
2191 {
2192         struct be_adapter *adapter = rxo->adapter;
2193         struct be_rx_page_info *rx_page_info;
2194         struct be_queue_info *rxq = &rxo->q;
2195         u32 frag_idx = rxq->tail;
2196
2197         rx_page_info = &rxo->page_info_tbl[frag_idx];
2198         BUG_ON(!rx_page_info->page);
2199
2200         if (rx_page_info->last_frag) {
2201                 dma_unmap_page(&adapter->pdev->dev,
2202                                dma_unmap_addr(rx_page_info, bus),
2203                                adapter->big_page_size, DMA_FROM_DEVICE);
2204                 rx_page_info->last_frag = false;
2205         } else {
2206                 dma_sync_single_for_cpu(&adapter->pdev->dev,
2207                                         dma_unmap_addr(rx_page_info, bus),
2208                                         rx_frag_size, DMA_FROM_DEVICE);
2209         }
2210
2211         queue_tail_inc(rxq);
2212         atomic_dec(&rxq->used);
2213         return rx_page_info;
2214 }
2215
2216 /* Throwaway the data in the Rx completion */
2217 static void be_rx_compl_discard(struct be_rx_obj *rxo,
2218                                 struct be_rx_compl_info *rxcp)
2219 {
2220         struct be_rx_page_info *page_info;
2221         u16 i, num_rcvd = rxcp->num_rcvd;
2222
2223         for (i = 0; i < num_rcvd; i++) {
2224                 page_info = get_rx_page_info(rxo);
2225                 put_page(page_info->page);
2226                 memset(page_info, 0, sizeof(*page_info));
2227         }
2228 }
2229
2230 /*
2231  * skb_fill_rx_data forms a complete skb for an ether frame
2232  * indicated by rxcp.
2233  */
2234 static void skb_fill_rx_data(struct be_rx_obj *rxo, struct sk_buff *skb,
2235                              struct be_rx_compl_info *rxcp)
2236 {
2237         struct be_rx_page_info *page_info;
2238         u16 i, j;
2239         u16 hdr_len, curr_frag_len, remaining;
2240         u8 *start;
2241
2242         page_info = get_rx_page_info(rxo);
2243         start = page_address(page_info->page) + page_info->page_offset;
2244         prefetch(start);
2245
2246         /* Copy data in the first descriptor of this completion */
2247         curr_frag_len = min(rxcp->pkt_size, rx_frag_size);
2248
2249         skb->len = curr_frag_len;
2250         if (curr_frag_len <= BE_HDR_LEN) { /* tiny packet */
2251                 memcpy(skb->data, start, curr_frag_len);
2252                 /* Complete packet has now been moved to data */
2253                 put_page(page_info->page);
2254                 skb->data_len = 0;
2255                 skb->tail += curr_frag_len;
2256         } else {
2257                 hdr_len = ETH_HLEN;
2258                 memcpy(skb->data, start, hdr_len);
2259                 skb_shinfo(skb)->nr_frags = 1;
2260                 skb_frag_set_page(skb, 0, page_info->page);
2261                 skb_shinfo(skb)->frags[0].page_offset =
2262                                         page_info->page_offset + hdr_len;
2263                 skb_frag_size_set(&skb_shinfo(skb)->frags[0],
2264                                   curr_frag_len - hdr_len);
2265                 skb->data_len = curr_frag_len - hdr_len;
2266                 skb->truesize += rx_frag_size;
2267                 skb->tail += hdr_len;
2268         }
2269         page_info->page = NULL;
2270
2271         if (rxcp->pkt_size <= rx_frag_size) {
2272                 BUG_ON(rxcp->num_rcvd != 1);
2273                 return;
2274         }
2275
2276         /* More frags present for this completion */
2277         remaining = rxcp->pkt_size - curr_frag_len;
2278         for (i = 1, j = 0; i < rxcp->num_rcvd; i++) {
2279                 page_info = get_rx_page_info(rxo);
2280                 curr_frag_len = min(remaining, rx_frag_size);
2281
2282                 /* Coalesce all frags from the same physical page in one slot */
2283                 if (page_info->page_offset == 0) {
2284                         /* Fresh page */
2285                         j++;
2286                         skb_frag_set_page(skb, j, page_info->page);
2287                         skb_shinfo(skb)->frags[j].page_offset =
2288                                                         page_info->page_offset;
2289                         skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2290                         skb_shinfo(skb)->nr_frags++;
2291                 } else {
2292                         put_page(page_info->page);
2293                 }
2294
2295                 skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2296                 skb->len += curr_frag_len;
2297                 skb->data_len += curr_frag_len;
2298                 skb->truesize += rx_frag_size;
2299                 remaining -= curr_frag_len;
2300                 page_info->page = NULL;
2301         }
2302         BUG_ON(j > MAX_SKB_FRAGS);
2303 }
2304
2305 /* Process the RX completion indicated by rxcp when GRO is disabled */
2306 static void be_rx_compl_process(struct be_rx_obj *rxo, struct napi_struct *napi,
2307                                 struct be_rx_compl_info *rxcp)
2308 {
2309         struct be_adapter *adapter = rxo->adapter;
2310         struct net_device *netdev = adapter->netdev;
2311         struct sk_buff *skb;
2312
2313         skb = netdev_alloc_skb_ip_align(netdev, BE_RX_SKB_ALLOC_SIZE);
2314         if (unlikely(!skb)) {
2315                 rx_stats(rxo)->rx_drops_no_skbs++;
2316                 be_rx_compl_discard(rxo, rxcp);
2317                 return;
2318         }
2319
2320         skb_fill_rx_data(rxo, skb, rxcp);
2321
2322         if (likely((netdev->features & NETIF_F_RXCSUM) && csum_passed(rxcp)))
2323                 skb->ip_summed = CHECKSUM_UNNECESSARY;
2324         else
2325                 skb_checksum_none_assert(skb);
2326
2327         skb->protocol = eth_type_trans(skb, netdev);
2328         skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2329         if (netdev->features & NETIF_F_RXHASH)
2330                 skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2331
2332         skb->csum_level = rxcp->tunneled;
2333         skb_mark_napi_id(skb, napi);
2334
2335         if (rxcp->vlanf)
2336                 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2337
2338         netif_receive_skb(skb);
2339 }
2340
2341 /* Process the RX completion indicated by rxcp when GRO is enabled */
2342 static void be_rx_compl_process_gro(struct be_rx_obj *rxo,
2343                                     struct napi_struct *napi,
2344                                     struct be_rx_compl_info *rxcp)
2345 {
2346         struct be_adapter *adapter = rxo->adapter;
2347         struct be_rx_page_info *page_info;
2348         struct sk_buff *skb = NULL;
2349         u16 remaining, curr_frag_len;
2350         u16 i, j;
2351
2352         skb = napi_get_frags(napi);
2353         if (!skb) {
2354                 be_rx_compl_discard(rxo, rxcp);
2355                 return;
2356         }
2357
2358         remaining = rxcp->pkt_size;
2359         for (i = 0, j = -1; i < rxcp->num_rcvd; i++) {
2360                 page_info = get_rx_page_info(rxo);
2361
2362                 curr_frag_len = min(remaining, rx_frag_size);
2363
2364                 /* Coalesce all frags from the same physical page in one slot */
2365                 if (i == 0 || page_info->page_offset == 0) {
2366                         /* First frag or Fresh page */
2367                         j++;
2368                         skb_frag_set_page(skb, j, page_info->page);
2369                         skb_shinfo(skb)->frags[j].page_offset =
2370                                                         page_info->page_offset;
2371                         skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2372                 } else {
2373                         put_page(page_info->page);
2374                 }
2375                 skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2376                 skb->truesize += rx_frag_size;
2377                 remaining -= curr_frag_len;
2378                 memset(page_info, 0, sizeof(*page_info));
2379         }
2380         BUG_ON(j > MAX_SKB_FRAGS);
2381
2382         skb_shinfo(skb)->nr_frags = j + 1;
2383         skb->len = rxcp->pkt_size;
2384         skb->data_len = rxcp->pkt_size;
2385         skb->ip_summed = CHECKSUM_UNNECESSARY;
2386         skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2387         if (adapter->netdev->features & NETIF_F_RXHASH)
2388                 skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2389
2390         skb->csum_level = rxcp->tunneled;
2391
2392         if (rxcp->vlanf)
2393                 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2394
2395         napi_gro_frags(napi);
2396 }
2397
2398 static void be_parse_rx_compl_v1(struct be_eth_rx_compl *compl,
2399                                  struct be_rx_compl_info *rxcp)
2400 {
2401         rxcp->pkt_size = GET_RX_COMPL_V1_BITS(pktsize, compl);
2402         rxcp->vlanf = GET_RX_COMPL_V1_BITS(vtp, compl);
2403         rxcp->err = GET_RX_COMPL_V1_BITS(err, compl);
2404         rxcp->tcpf = GET_RX_COMPL_V1_BITS(tcpf, compl);
2405         rxcp->udpf = GET_RX_COMPL_V1_BITS(udpf, compl);
2406         rxcp->ip_csum = GET_RX_COMPL_V1_BITS(ipcksm, compl);
2407         rxcp->l4_csum = GET_RX_COMPL_V1_BITS(l4_cksm, compl);
2408         rxcp->ipv6 = GET_RX_COMPL_V1_BITS(ip_version, compl);
2409         rxcp->num_rcvd = GET_RX_COMPL_V1_BITS(numfrags, compl);
2410         rxcp->pkt_type = GET_RX_COMPL_V1_BITS(cast_enc, compl);
2411         rxcp->rss_hash = GET_RX_COMPL_V1_BITS(rsshash, compl);
2412         if (rxcp->vlanf) {
2413                 rxcp->qnq = GET_RX_COMPL_V1_BITS(qnq, compl);
2414                 rxcp->vlan_tag = GET_RX_COMPL_V1_BITS(vlan_tag, compl);
2415         }
2416         rxcp->port = GET_RX_COMPL_V1_BITS(port, compl);
2417         rxcp->tunneled =
2418                 GET_RX_COMPL_V1_BITS(tunneled, compl);
2419 }
2420
2421 static void be_parse_rx_compl_v0(struct be_eth_rx_compl *compl,
2422                                  struct be_rx_compl_info *rxcp)
2423 {
2424         rxcp->pkt_size = GET_RX_COMPL_V0_BITS(pktsize, compl);
2425         rxcp->vlanf = GET_RX_COMPL_V0_BITS(vtp, compl);
2426         rxcp->err = GET_RX_COMPL_V0_BITS(err, compl);
2427         rxcp->tcpf = GET_RX_COMPL_V0_BITS(tcpf, compl);
2428         rxcp->udpf = GET_RX_COMPL_V0_BITS(udpf, compl);
2429         rxcp->ip_csum = GET_RX_COMPL_V0_BITS(ipcksm, compl);
2430         rxcp->l4_csum = GET_RX_COMPL_V0_BITS(l4_cksm, compl);
2431         rxcp->ipv6 = GET_RX_COMPL_V0_BITS(ip_version, compl);
2432         rxcp->num_rcvd = GET_RX_COMPL_V0_BITS(numfrags, compl);
2433         rxcp->pkt_type = GET_RX_COMPL_V0_BITS(cast_enc, compl);
2434         rxcp->rss_hash = GET_RX_COMPL_V0_BITS(rsshash, compl);
2435         if (rxcp->vlanf) {
2436                 rxcp->qnq = GET_RX_COMPL_V0_BITS(qnq, compl);
2437                 rxcp->vlan_tag = GET_RX_COMPL_V0_BITS(vlan_tag, compl);
2438         }
2439         rxcp->port = GET_RX_COMPL_V0_BITS(port, compl);
2440         rxcp->ip_frag = GET_RX_COMPL_V0_BITS(ip_frag, compl);
2441 }
2442
2443 static struct be_rx_compl_info *be_rx_compl_get(struct be_rx_obj *rxo)
2444 {
2445         struct be_eth_rx_compl *compl = queue_tail_node(&rxo->cq);
2446         struct be_rx_compl_info *rxcp = &rxo->rxcp;
2447         struct be_adapter *adapter = rxo->adapter;
2448
2449         /* For checking the valid bit it is Ok to use either definition as the
2450          * valid bit is at the same position in both v0 and v1 Rx compl */
2451         if (compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] == 0)
2452                 return NULL;
2453
2454         rmb();
2455         be_dws_le_to_cpu(compl, sizeof(*compl));
2456
2457         if (adapter->be3_native)
2458                 be_parse_rx_compl_v1(compl, rxcp);
2459         else
2460                 be_parse_rx_compl_v0(compl, rxcp);
2461
2462         if (rxcp->ip_frag)
2463                 rxcp->l4_csum = 0;
2464
2465         if (rxcp->vlanf) {
2466                 /* In QNQ modes, if qnq bit is not set, then the packet was
2467                  * tagged only with the transparent outer vlan-tag and must
2468                  * not be treated as a vlan packet by host
2469                  */
2470                 if (be_is_qnq_mode(adapter) && !rxcp->qnq)
2471                         rxcp->vlanf = 0;
2472
2473                 if (!lancer_chip(adapter))
2474                         rxcp->vlan_tag = swab16(rxcp->vlan_tag);
2475
2476                 if (adapter->pvid == (rxcp->vlan_tag & VLAN_VID_MASK) &&
2477                     !test_bit(rxcp->vlan_tag, adapter->vids))
2478                         rxcp->vlanf = 0;
2479         }
2480
2481         /* As the compl has been parsed, reset it; we wont touch it again */
2482         compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] = 0;
2483
2484         queue_tail_inc(&rxo->cq);
2485         return rxcp;
2486 }
2487
2488 static inline struct page *be_alloc_pages(u32 size, gfp_t gfp)
2489 {
2490         u32 order = get_order(size);
2491
2492         if (order > 0)
2493                 gfp |= __GFP_COMP;
2494         return  alloc_pages(gfp, order);
2495 }
2496
2497 /*
2498  * Allocate a page, split it to fragments of size rx_frag_size and post as
2499  * receive buffers to BE
2500  */
2501 static void be_post_rx_frags(struct be_rx_obj *rxo, gfp_t gfp, u32 frags_needed)
2502 {
2503         struct be_adapter *adapter = rxo->adapter;
2504         struct be_rx_page_info *page_info = NULL, *prev_page_info = NULL;
2505         struct be_queue_info *rxq = &rxo->q;
2506         struct page *pagep = NULL;
2507         struct device *dev = &adapter->pdev->dev;
2508         struct be_eth_rx_d *rxd;
2509         u64 page_dmaaddr = 0, frag_dmaaddr;
2510         u32 posted, page_offset = 0, notify = 0;
2511
2512         page_info = &rxo->page_info_tbl[rxq->head];
2513         for (posted = 0; posted < frags_needed && !page_info->page; posted++) {
2514                 if (!pagep) {
2515                         pagep = be_alloc_pages(adapter->big_page_size, gfp);
2516                         if (unlikely(!pagep)) {
2517                                 rx_stats(rxo)->rx_post_fail++;
2518                                 break;
2519                         }
2520                         page_dmaaddr = dma_map_page(dev, pagep, 0,
2521                                                     adapter->big_page_size,
2522                                                     DMA_FROM_DEVICE);
2523                         if (dma_mapping_error(dev, page_dmaaddr)) {
2524                                 put_page(pagep);
2525                                 pagep = NULL;
2526                                 adapter->drv_stats.dma_map_errors++;
2527                                 break;
2528                         }
2529                         page_offset = 0;
2530                 } else {
2531                         get_page(pagep);
2532                         page_offset += rx_frag_size;
2533                 }
2534                 page_info->page_offset = page_offset;
2535                 page_info->page = pagep;
2536
2537                 rxd = queue_head_node(rxq);
2538                 frag_dmaaddr = page_dmaaddr + page_info->page_offset;
2539                 rxd->fragpa_lo = cpu_to_le32(frag_dmaaddr & 0xFFFFFFFF);
2540                 rxd->fragpa_hi = cpu_to_le32(upper_32_bits(frag_dmaaddr));
2541
2542                 /* Any space left in the current big page for another frag? */
2543                 if ((page_offset + rx_frag_size + rx_frag_size) >
2544                                         adapter->big_page_size) {
2545                         pagep = NULL;
2546                         page_info->last_frag = true;
2547                         dma_unmap_addr_set(page_info, bus, page_dmaaddr);
2548                 } else {
2549                         dma_unmap_addr_set(page_info, bus, frag_dmaaddr);
2550                 }
2551
2552                 prev_page_info = page_info;
2553                 queue_head_inc(rxq);
2554                 page_info = &rxo->page_info_tbl[rxq->head];
2555         }
2556
2557         /* Mark the last frag of a page when we break out of the above loop
2558          * with no more slots available in the RXQ
2559          */
2560         if (pagep) {
2561                 prev_page_info->last_frag = true;
2562                 dma_unmap_addr_set(prev_page_info, bus, page_dmaaddr);
2563         }
2564
2565         if (posted) {
2566                 atomic_add(posted, &rxq->used);
2567                 if (rxo->rx_post_starved)
2568                         rxo->rx_post_starved = false;
2569                 do {
2570                         notify = min(MAX_NUM_POST_ERX_DB, posted);
2571                         be_rxq_notify(adapter, rxq->id, notify);
2572                         posted -= notify;
2573                 } while (posted);
2574         } else if (atomic_read(&rxq->used) == 0) {
2575                 /* Let be_worker replenish when memory is available */
2576                 rxo->rx_post_starved = true;
2577         }
2578 }
2579
2580 static struct be_tx_compl_info *be_tx_compl_get(struct be_tx_obj *txo)
2581 {
2582         struct be_queue_info *tx_cq = &txo->cq;
2583         struct be_tx_compl_info *txcp = &txo->txcp;
2584         struct be_eth_tx_compl *compl = queue_tail_node(tx_cq);
2585
2586         if (compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] == 0)
2587                 return NULL;
2588
2589         /* Ensure load ordering of valid bit dword and other dwords below */
2590         rmb();
2591         be_dws_le_to_cpu(compl, sizeof(*compl));
2592
2593         txcp->status = GET_TX_COMPL_BITS(status, compl);
2594         txcp->end_index = GET_TX_COMPL_BITS(wrb_index, compl);
2595
2596         compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] = 0;
2597         queue_tail_inc(tx_cq);
2598         return txcp;
2599 }
2600
2601 static u16 be_tx_compl_process(struct be_adapter *adapter,
2602                                struct be_tx_obj *txo, u16 last_index)
2603 {
2604         struct sk_buff **sent_skbs = txo->sent_skb_list;
2605         struct be_queue_info *txq = &txo->q;
2606         struct sk_buff *skb = NULL;
2607         bool unmap_skb_hdr = false;
2608         struct be_eth_wrb *wrb;
2609         u16 num_wrbs = 0;
2610         u32 frag_index;
2611
2612         do {
2613                 if (sent_skbs[txq->tail]) {
2614                         /* Free skb from prev req */
2615                         if (skb)
2616                                 dev_consume_skb_any(skb);
2617                         skb = sent_skbs[txq->tail];
2618                         sent_skbs[txq->tail] = NULL;
2619                         queue_tail_inc(txq);  /* skip hdr wrb */
2620                         num_wrbs++;
2621                         unmap_skb_hdr = true;
2622                 }
2623                 wrb = queue_tail_node(txq);
2624                 frag_index = txq->tail;
2625                 unmap_tx_frag(&adapter->pdev->dev, wrb,
2626                               (unmap_skb_hdr && skb_headlen(skb)));
2627                 unmap_skb_hdr = false;
2628                 queue_tail_inc(txq);
2629                 num_wrbs++;
2630         } while (frag_index != last_index);
2631         dev_consume_skb_any(skb);
2632
2633         return num_wrbs;
2634 }
2635
2636 /* Return the number of events in the event queue */
2637 static inline int events_get(struct be_eq_obj *eqo)
2638 {
2639         struct be_eq_entry *eqe;
2640         int num = 0;
2641
2642         do {
2643                 eqe = queue_tail_node(&eqo->q);
2644                 if (eqe->evt == 0)
2645                         break;
2646
2647                 rmb();
2648                 eqe->evt = 0;
2649                 num++;
2650                 queue_tail_inc(&eqo->q);
2651         } while (true);
2652
2653         return num;
2654 }
2655
2656 /* Leaves the EQ is disarmed state */
2657 static void be_eq_clean(struct be_eq_obj *eqo)
2658 {
2659         int num = events_get(eqo);
2660
2661         be_eq_notify(eqo->adapter, eqo->q.id, false, true, num, 0);
2662 }
2663
2664 /* Free posted rx buffers that were not used */
2665 static void be_rxq_clean(struct be_rx_obj *rxo)
2666 {
2667         struct be_queue_info *rxq = &rxo->q;
2668         struct be_rx_page_info *page_info;
2669
2670         while (atomic_read(&rxq->used) > 0) {
2671                 page_info = get_rx_page_info(rxo);
2672                 put_page(page_info->page);
2673                 memset(page_info, 0, sizeof(*page_info));
2674         }
2675         BUG_ON(atomic_read(&rxq->used));
2676         rxq->tail = 0;
2677         rxq->head = 0;
2678 }
2679
2680 static void be_rx_cq_clean(struct be_rx_obj *rxo)
2681 {
2682         struct be_queue_info *rx_cq = &rxo->cq;
2683         struct be_rx_compl_info *rxcp;
2684         struct be_adapter *adapter = rxo->adapter;
2685         int flush_wait = 0;
2686
2687         /* Consume pending rx completions.
2688          * Wait for the flush completion (identified by zero num_rcvd)
2689          * to arrive. Notify CQ even when there are no more CQ entries
2690          * for HW to flush partially coalesced CQ entries.
2691          * In Lancer, there is no need to wait for flush compl.
2692          */
2693         for (;;) {
2694                 rxcp = be_rx_compl_get(rxo);
2695                 if (!rxcp) {
2696                         if (lancer_chip(adapter))
2697                                 break;
2698
2699                         if (flush_wait++ > 50 ||
2700                             be_check_error(adapter,
2701                                            BE_ERROR_HW)) {
2702                                 dev_warn(&adapter->pdev->dev,
2703                                          "did not receive flush compl\n");
2704                                 break;
2705                         }
2706                         be_cq_notify(adapter, rx_cq->id, true, 0);
2707                         mdelay(1);
2708                 } else {
2709                         be_rx_compl_discard(rxo, rxcp);
2710                         be_cq_notify(adapter, rx_cq->id, false, 1);
2711                         if (rxcp->num_rcvd == 0)
2712                                 break;
2713                 }
2714         }
2715
2716         /* After cleanup, leave the CQ in unarmed state */
2717         be_cq_notify(adapter, rx_cq->id, false, 0);
2718 }
2719
2720 static void be_tx_compl_clean(struct be_adapter *adapter)
2721 {
2722         struct device *dev = &adapter->pdev->dev;
2723         u16 cmpl = 0, timeo = 0, num_wrbs = 0;
2724         struct be_tx_compl_info *txcp;
2725         struct be_queue_info *txq;
2726         u32 end_idx, notified_idx;
2727         struct be_tx_obj *txo;
2728         int i, pending_txqs;
2729
2730         /* Stop polling for compls when HW has been silent for 10ms */
2731         do {
2732                 pending_txqs = adapter->num_tx_qs;
2733
2734                 for_all_tx_queues(adapter, txo, i) {
2735                         cmpl = 0;
2736                         num_wrbs = 0;
2737                         txq = &txo->q;
2738                         while ((txcp = be_tx_compl_get(txo))) {
2739                                 num_wrbs +=
2740                                         be_tx_compl_process(adapter, txo,
2741                                                             txcp->end_index);
2742                                 cmpl++;
2743                         }
2744                         if (cmpl) {
2745                                 be_cq_notify(adapter, txo->cq.id, false, cmpl);
2746                                 atomic_sub(num_wrbs, &txq->used);
2747                                 timeo = 0;
2748                         }
2749                         if (!be_is_tx_compl_pending(txo))
2750                                 pending_txqs--;
2751                 }
2752
2753                 if (pending_txqs == 0 || ++timeo > 10 ||
2754                     be_check_error(adapter, BE_ERROR_HW))
2755                         break;
2756
2757                 mdelay(1);
2758         } while (true);
2759
2760         /* Free enqueued TX that was never notified to HW */
2761         for_all_tx_queues(adapter, txo, i) {
2762                 txq = &txo->q;
2763
2764                 if (atomic_read(&txq->used)) {
2765                         dev_info(dev, "txq%d: cleaning %d pending tx-wrbs\n",
2766                                  i, atomic_read(&txq->used));
2767                         notified_idx = txq->tail;
2768                         end_idx = txq->tail;
2769                         index_adv(&end_idx, atomic_read(&txq->used) - 1,
2770                                   txq->len);
2771                         /* Use the tx-compl process logic to handle requests
2772                          * that were not sent to the HW.
2773                          */
2774                         num_wrbs = be_tx_compl_process(adapter, txo, end_idx);
2775                         atomic_sub(num_wrbs, &txq->used);
2776                         BUG_ON(atomic_read(&txq->used));
2777                         txo->pend_wrb_cnt = 0;
2778                         /* Since hw was never notified of these requests,
2779                          * reset TXQ indices
2780                          */
2781                         txq->head = notified_idx;
2782                         txq->tail = notified_idx;
2783                 }
2784         }
2785 }
2786
2787 static void be_evt_queues_destroy(struct be_adapter *adapter)
2788 {
2789         struct be_eq_obj *eqo;
2790         int i;
2791
2792         for_all_evt_queues(adapter, eqo, i) {
2793                 if (eqo->q.created) {
2794                         be_eq_clean(eqo);
2795                         be_cmd_q_destroy(adapter, &eqo->q, QTYPE_EQ);
2796                         netif_napi_del(&eqo->napi);
2797                         free_cpumask_var(eqo->affinity_mask);
2798                 }
2799                 be_queue_free(adapter, &eqo->q);
2800         }
2801 }
2802
2803 static int be_evt_queues_create(struct be_adapter *adapter)
2804 {
2805         struct be_queue_info *eq;
2806         struct be_eq_obj *eqo;
2807         struct be_aic_obj *aic;
2808         int i, rc;
2809
2810         /* need enough EQs to service both RX and TX queues */
2811         adapter->num_evt_qs = min_t(u16, num_irqs(adapter),
2812                                     max(adapter->cfg_num_rx_irqs,
2813                                         adapter->cfg_num_tx_irqs));
2814
2815         for_all_evt_queues(adapter, eqo, i) {
2816                 int numa_node = dev_to_node(&adapter->pdev->dev);
2817
2818                 aic = &adapter->aic_obj[i];
2819                 eqo->adapter = adapter;
2820                 eqo->idx = i;
2821                 aic->max_eqd = BE_MAX_EQD;
2822                 aic->enable = true;
2823
2824                 eq = &eqo->q;
2825                 rc = be_queue_alloc(adapter, eq, EVNT_Q_LEN,
2826                                     sizeof(struct be_eq_entry));
2827                 if (rc)
2828                         return rc;
2829
2830                 rc = be_cmd_eq_create(adapter, eqo);
2831                 if (rc)
2832                         return rc;
2833
2834                 if (!zalloc_cpumask_var(&eqo->affinity_mask, GFP_KERNEL))
2835                         return -ENOMEM;
2836                 cpumask_set_cpu(cpumask_local_spread(i, numa_node),
2837                                 eqo->affinity_mask);
2838                 netif_napi_add(adapter->netdev, &eqo->napi, be_poll,
2839                                BE_NAPI_WEIGHT);
2840         }
2841         return 0;
2842 }
2843
2844 static void be_mcc_queues_destroy(struct be_adapter *adapter)
2845 {
2846         struct be_queue_info *q;
2847
2848         q = &adapter->mcc_obj.q;
2849         if (q->created)
2850                 be_cmd_q_destroy(adapter, q, QTYPE_MCCQ);
2851         be_queue_free(adapter, q);
2852
2853         q = &adapter->mcc_obj.cq;
2854         if (q->created)
2855                 be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2856         be_queue_free(adapter, q);
2857 }
2858
2859 /* Must be called only after TX qs are created as MCC shares TX EQ */
2860 static int be_mcc_queues_create(struct be_adapter *adapter)
2861 {
2862         struct be_queue_info *q, *cq;
2863
2864         cq = &adapter->mcc_obj.cq;
2865         if (be_queue_alloc(adapter, cq, MCC_CQ_LEN,
2866                            sizeof(struct be_mcc_compl)))
2867                 goto err;
2868
2869         /* Use the default EQ for MCC completions */
2870         if (be_cmd_cq_create(adapter, cq, &mcc_eqo(adapter)->q, true, 0))
2871                 goto mcc_cq_free;
2872
2873         q = &adapter->mcc_obj.q;
2874         if (be_queue_alloc(adapter, q, MCC_Q_LEN, sizeof(struct be_mcc_wrb)))
2875                 goto mcc_cq_destroy;
2876
2877         if (be_cmd_mccq_create(adapter, q, cq))
2878                 goto mcc_q_free;
2879
2880         return 0;
2881
2882 mcc_q_free:
2883         be_queue_free(adapter, q);
2884 mcc_cq_destroy:
2885         be_cmd_q_destroy(adapter, cq, QTYPE_CQ);
2886 mcc_cq_free:
2887         be_queue_free(adapter, cq);
2888 err:
2889         return -1;
2890 }
2891
2892 static void be_tx_queues_destroy(struct be_adapter *adapter)
2893 {
2894         struct be_queue_info *q;
2895         struct be_tx_obj *txo;
2896         u8 i;
2897
2898         for_all_tx_queues(adapter, txo, i) {
2899                 q = &txo->q;
2900                 if (q->created)
2901                         be_cmd_q_destroy(adapter, q, QTYPE_TXQ);
2902                 be_queue_free(adapter, q);
2903
2904                 q = &txo->cq;
2905                 if (q->created)
2906                         be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2907                 be_queue_free(adapter, q);
2908         }
2909 }
2910
2911 static int be_tx_qs_create(struct be_adapter *adapter)
2912 {
2913         struct be_queue_info *cq;
2914         struct be_tx_obj *txo;
2915         struct be_eq_obj *eqo;
2916         int status, i;
2917
2918         adapter->num_tx_qs = min(adapter->num_evt_qs, adapter->cfg_num_tx_irqs);
2919
2920         for_all_tx_queues(adapter, txo, i) {
2921                 cq = &txo->cq;
2922                 status = be_queue_alloc(adapter, cq, TX_CQ_LEN,
2923                                         sizeof(struct be_eth_tx_compl));
2924                 if (status)
2925                         return status;
2926
2927                 u64_stats_init(&txo->stats.sync);
2928                 u64_stats_init(&txo->stats.sync_compl);
2929
2930                 /* If num_evt_qs is less than num_tx_qs, then more than
2931                  * one txq share an eq
2932                  */
2933                 eqo = &adapter->eq_obj[i % adapter->num_evt_qs];
2934                 status = be_cmd_cq_create(adapter, cq, &eqo->q, false, 3);
2935                 if (status)
2936                         return status;
2937
2938                 status = be_queue_alloc(adapter, &txo->q, TX_Q_LEN,
2939                                         sizeof(struct be_eth_wrb));
2940                 if (status)
2941                         return status;
2942
2943                 status = be_cmd_txq_create(adapter, txo);
2944                 if (status)
2945                         return status;
2946
2947                 netif_set_xps_queue(adapter->netdev, eqo->affinity_mask,
2948                                     eqo->idx);
2949         }
2950
2951         dev_info(&adapter->pdev->dev, "created %d TX queue(s)\n",
2952                  adapter->num_tx_qs);
2953         return 0;
2954 }
2955
2956 static void be_rx_cqs_destroy(struct be_adapter *adapter)
2957 {
2958         struct be_queue_info *q;
2959         struct be_rx_obj *rxo;
2960         int i;
2961
2962         for_all_rx_queues(adapter, rxo, i) {
2963                 q = &rxo->cq;
2964                 if (q->created)
2965                         be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2966                 be_queue_free(adapter, q);
2967         }
2968 }
2969
2970 static int be_rx_cqs_create(struct be_adapter *adapter)
2971 {
2972         struct be_queue_info *eq, *cq;
2973         struct be_rx_obj *rxo;
2974         int rc, i;
2975
2976         adapter->num_rss_qs =
2977                         min(adapter->num_evt_qs, adapter->cfg_num_rx_irqs);
2978
2979         /* We'll use RSS only if atleast 2 RSS rings are supported. */
2980         if (adapter->num_rss_qs < 2)
2981                 adapter->num_rss_qs = 0;
2982
2983         adapter->num_rx_qs = adapter->num_rss_qs + adapter->need_def_rxq;
2984
2985         /* When the interface is not capable of RSS rings (and there is no
2986          * need to create a default RXQ) we'll still need one RXQ
2987          */
2988         if (adapter->num_rx_qs == 0)
2989                 adapter->num_rx_qs = 1;
2990
2991         adapter->big_page_size = (1 << get_order(rx_frag_size)) * PAGE_SIZE;
2992         for_all_rx_queues(adapter, rxo, i) {
2993                 rxo->adapter = adapter;
2994                 cq = &rxo->cq;
2995                 rc = be_queue_alloc(adapter, cq, RX_CQ_LEN,
2996                                     sizeof(struct be_eth_rx_compl));
2997                 if (rc)
2998                         return rc;
2999
3000                 u64_stats_init(&rxo->stats.sync);
3001                 eq = &adapter->eq_obj[i % adapter->num_evt_qs].q;
3002                 rc = be_cmd_cq_create(adapter, cq, eq, false, 3);
3003                 if (rc)
3004                         return rc;
3005         }
3006
3007         dev_info(&adapter->pdev->dev,
3008                  "created %d RX queue(s)\n", adapter->num_rx_qs);
3009         return 0;
3010 }
3011
3012 static irqreturn_t be_intx(int irq, void *dev)
3013 {
3014         struct be_eq_obj *eqo = dev;
3015         struct be_adapter *adapter = eqo->adapter;
3016         int num_evts = 0;
3017
3018         /* IRQ is not expected when NAPI is scheduled as the EQ
3019          * will not be armed.
3020          * But, this can happen on Lancer INTx where it takes
3021          * a while to de-assert INTx or in BE2 where occasionaly
3022          * an interrupt may be raised even when EQ is unarmed.
3023          * If NAPI is already scheduled, then counting & notifying
3024          * events will orphan them.
3025          */
3026         if (napi_schedule_prep(&eqo->napi)) {
3027                 num_evts = events_get(eqo);
3028                 __napi_schedule(&eqo->napi);
3029                 if (num_evts)
3030                         eqo->spurious_intr = 0;
3031         }
3032         be_eq_notify(adapter, eqo->q.id, false, true, num_evts, 0);
3033
3034         /* Return IRQ_HANDLED only for the the first spurious intr
3035          * after a valid intr to stop the kernel from branding
3036          * this irq as a bad one!
3037          */
3038         if (num_evts || eqo->spurious_intr++ == 0)
3039                 return IRQ_HANDLED;
3040         else
3041                 return IRQ_NONE;
3042 }
3043
3044 static irqreturn_t be_msix(int irq, void *dev)
3045 {
3046         struct be_eq_obj *eqo = dev;
3047
3048         be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
3049         napi_schedule(&eqo->napi);
3050         return IRQ_HANDLED;
3051 }
3052
3053 static inline bool do_gro(struct be_rx_compl_info *rxcp)
3054 {
3055         return (rxcp->tcpf && !rxcp->err && rxcp->l4_csum) ? true : false;
3056 }
3057
3058 static int be_process_rx(struct be_rx_obj *rxo, struct napi_struct *napi,
3059                          int budget, int polling)
3060 {
3061         struct be_adapter *adapter = rxo->adapter;
3062         struct be_queue_info *rx_cq = &rxo->cq;
3063         struct be_rx_compl_info *rxcp;
3064         u32 work_done;
3065         u32 frags_consumed = 0;
3066
3067         for (work_done = 0; work_done < budget; work_done++) {
3068                 rxcp = be_rx_compl_get(rxo);
3069                 if (!rxcp)
3070                         break;
3071
3072                 /* Is it a flush compl that has no data */
3073                 if (unlikely(rxcp->num_rcvd == 0))
3074                         goto loop_continue;
3075
3076                 /* Discard compl with partial DMA Lancer B0 */
3077                 if (unlikely(!rxcp->pkt_size)) {
3078                         be_rx_compl_discard(rxo, rxcp);
3079                         goto loop_continue;
3080                 }
3081
3082                 /* On BE drop pkts that arrive due to imperfect filtering in
3083                  * promiscuous mode on some skews
3084                  */
3085                 if (unlikely(rxcp->port != adapter->port_num &&
3086                              !lancer_chip(adapter))) {
3087                         be_rx_compl_discard(rxo, rxcp);
3088                         goto loop_continue;
3089                 }
3090
3091                 /* Don't do gro when we're busy_polling */
3092                 if (do_gro(rxcp) && polling != BUSY_POLLING)
3093                         be_rx_compl_process_gro(rxo, napi, rxcp);
3094                 else
3095                         be_rx_compl_process(rxo, napi, rxcp);
3096
3097 loop_continue:
3098                 frags_consumed += rxcp->num_rcvd;
3099                 be_rx_stats_update(rxo, rxcp);
3100         }
3101
3102         if (work_done) {
3103                 be_cq_notify(adapter, rx_cq->id, true, work_done);
3104
3105                 /* When an rx-obj gets into post_starved state, just
3106                  * let be_worker do the posting.
3107                  */
3108                 if (atomic_read(&rxo->q.used) < RX_FRAGS_REFILL_WM &&
3109                     !rxo->rx_post_starved)
3110                         be_post_rx_frags(rxo, GFP_ATOMIC,
3111                                          max_t(u32, MAX_RX_POST,
3112                                                frags_consumed));
3113         }
3114
3115         return work_done;
3116 }
3117
3118 static inline void be_update_tx_err(struct be_tx_obj *txo, u8 status)
3119 {
3120         switch (status) {
3121         case BE_TX_COMP_HDR_PARSE_ERR:
3122                 tx_stats(txo)->tx_hdr_parse_err++;
3123                 break;
3124         case BE_TX_COMP_NDMA_ERR:
3125                 tx_stats(txo)->tx_dma_err++;
3126                 break;
3127         case BE_TX_COMP_ACL_ERR:
3128                 tx_stats(txo)->tx_spoof_check_err++;
3129                 break;
3130         }
3131 }
3132
3133 static inline void lancer_update_tx_err(struct be_tx_obj *txo, u8 status)
3134 {
3135         switch (status) {
3136         case LANCER_TX_COMP_LSO_ERR:
3137                 tx_stats(txo)->tx_tso_err++;
3138                 break;
3139         case LANCER_TX_COMP_HSW_DROP_MAC_ERR:
3140         case LANCER_TX_COMP_HSW_DROP_VLAN_ERR:
3141                 tx_stats(txo)->tx_spoof_check_err++;
3142                 break;
3143         case LANCER_TX_COMP_QINQ_ERR:
3144                 tx_stats(txo)->tx_qinq_err++;
3145                 break;
3146         case LANCER_TX_COMP_PARITY_ERR:
3147                 tx_stats(txo)->tx_internal_parity_err++;
3148                 break;
3149         case LANCER_TX_COMP_DMA_ERR:
3150                 tx_stats(txo)->tx_dma_err++;
3151                 break;
3152         }
3153 }
3154
3155 static void be_process_tx(struct be_adapter *adapter, struct be_tx_obj *txo,
3156                           int idx)
3157 {
3158         int num_wrbs = 0, work_done = 0;
3159         struct be_tx_compl_info *txcp;
3160
3161         while ((txcp = be_tx_compl_get(txo))) {
3162                 num_wrbs += be_tx_compl_process(adapter, txo, txcp->end_index);
3163                 work_done++;
3164
3165                 if (txcp->status) {
3166                         if (lancer_chip(adapter))
3167                                 lancer_update_tx_err(txo, txcp->status);
3168                         else
3169                                 be_update_tx_err(txo, txcp->status);
3170                 }
3171         }
3172
3173         if (work_done) {
3174                 be_cq_notify(adapter, txo->cq.id, true, work_done);
3175                 atomic_sub(num_wrbs, &txo->q.used);
3176
3177                 /* As Tx wrbs have been freed up, wake up netdev queue
3178                  * if it was stopped due to lack of tx wrbs.  */
3179                 if (__netif_subqueue_stopped(adapter->netdev, idx) &&
3180                     be_can_txq_wake(txo)) {
3181                         netif_wake_subqueue(adapter->netdev, idx);
3182                 }
3183
3184                 u64_stats_update_begin(&tx_stats(txo)->sync_compl);
3185                 tx_stats(txo)->tx_compl += work_done;
3186                 u64_stats_update_end(&tx_stats(txo)->sync_compl);
3187         }
3188 }
3189
3190 #ifdef CONFIG_NET_RX_BUSY_POLL
3191 static inline bool be_lock_napi(struct be_eq_obj *eqo)
3192 {
3193         bool status = true;
3194
3195         spin_lock(&eqo->lock); /* BH is already disabled */
3196         if (eqo->state & BE_EQ_LOCKED) {
3197                 WARN_ON(eqo->state & BE_EQ_NAPI);
3198                 eqo->state |= BE_EQ_NAPI_YIELD;
3199                 status = false;
3200         } else {
3201                 eqo->state = BE_EQ_NAPI;
3202         }
3203         spin_unlock(&eqo->lock);
3204         return status;
3205 }
3206
3207 static inline void be_unlock_napi(struct be_eq_obj *eqo)
3208 {
3209         spin_lock(&eqo->lock); /* BH is already disabled */
3210
3211         WARN_ON(eqo->state & (BE_EQ_POLL | BE_EQ_NAPI_YIELD));
3212         eqo->state = BE_EQ_IDLE;
3213
3214         spin_unlock(&eqo->lock);
3215 }
3216
3217 static inline bool be_lock_busy_poll(struct be_eq_obj *eqo)
3218 {
3219         bool status = true;
3220
3221         spin_lock_bh(&eqo->lock);
3222         if (eqo->state & BE_EQ_LOCKED) {
3223                 eqo->state |= BE_EQ_POLL_YIELD;
3224                 status = false;
3225         } else {
3226                 eqo->state |= BE_EQ_POLL;
3227         }
3228         spin_unlock_bh(&eqo->lock);
3229         return status;
3230 }
3231
3232 static inline void be_unlock_busy_poll(struct be_eq_obj *eqo)
3233 {
3234         spin_lock_bh(&eqo->lock);
3235
3236         WARN_ON(eqo->state & (BE_EQ_NAPI));
3237         eqo->state = BE_EQ_IDLE;
3238
3239         spin_unlock_bh(&eqo->lock);
3240 }
3241
3242 static inline void be_enable_busy_poll(struct be_eq_obj *eqo)
3243 {
3244         spin_lock_init(&eqo->lock);
3245         eqo->state = BE_EQ_IDLE;
3246 }
3247
3248 static inline void be_disable_busy_poll(struct be_eq_obj *eqo)
3249 {
3250         local_bh_disable();
3251
3252         /* It's enough to just acquire napi lock on the eqo to stop
3253          * be_busy_poll() from processing any queueus.
3254          */
3255         while (!be_lock_napi(eqo))
3256                 mdelay(1);
3257
3258         local_bh_enable();
3259 }
3260
3261 #else /* CONFIG_NET_RX_BUSY_POLL */
3262
3263 static inline bool be_lock_napi(struct be_eq_obj *eqo)
3264 {
3265         return true;
3266 }
3267
3268 static inline void be_unlock_napi(struct be_eq_obj *eqo)
3269 {
3270 }
3271
3272 static inline bool be_lock_busy_poll(struct be_eq_obj *eqo)
3273 {
3274         return false;
3275 }
3276
3277 static inline void be_unlock_busy_poll(struct be_eq_obj *eqo)
3278 {
3279 }
3280
3281 static inline void be_enable_busy_poll(struct be_eq_obj *eqo)
3282 {
3283 }
3284
3285 static inline void be_disable_busy_poll(struct be_eq_obj *eqo)
3286 {
3287 }
3288 #endif /* CONFIG_NET_RX_BUSY_POLL */
3289
3290 int be_poll(struct napi_struct *napi, int budget)
3291 {
3292         struct be_eq_obj *eqo = container_of(napi, struct be_eq_obj, napi);
3293         struct be_adapter *adapter = eqo->adapter;
3294         int max_work = 0, work, i, num_evts;
3295         struct be_rx_obj *rxo;
3296         struct be_tx_obj *txo;
3297         u32 mult_enc = 0;
3298
3299         num_evts = events_get(eqo);
3300
3301         for_all_tx_queues_on_eq(adapter, eqo, txo, i)
3302                 be_process_tx(adapter, txo, i);
3303
3304         if (be_lock_napi(eqo)) {
3305                 /* This loop will iterate twice for EQ0 in which
3306                  * completions of the last RXQ (default one) are also processed
3307                  * For other EQs the loop iterates only once
3308                  */
3309                 for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
3310                         work = be_process_rx(rxo, napi, budget, NAPI_POLLING);
3311                         max_work = max(work, max_work);
3312                 }
3313                 be_unlock_napi(eqo);
3314         } else {
3315                 max_work = budget;
3316         }
3317
3318         if (is_mcc_eqo(eqo))
3319                 be_process_mcc(adapter);
3320
3321         if (max_work < budget) {
3322                 napi_complete(napi);
3323
3324                 /* Skyhawk EQ_DB has a provision to set the rearm to interrupt
3325                  * delay via a delay multiplier encoding value
3326                  */
3327                 if (skyhawk_chip(adapter))
3328                         mult_enc = be_get_eq_delay_mult_enc(eqo);
3329
3330                 be_eq_notify(adapter, eqo->q.id, true, false, num_evts,
3331                              mult_enc);
3332         } else {
3333                 /* As we'll continue in polling mode, count and clear events */
3334                 be_eq_notify(adapter, eqo->q.id, false, false, num_evts, 0);
3335         }
3336         return max_work;
3337 }
3338
3339 #ifdef CONFIG_NET_RX_BUSY_POLL
3340 static int be_busy_poll(struct napi_struct *napi)
3341 {
3342         struct be_eq_obj *eqo = container_of(napi, struct be_eq_obj, napi);
3343         struct be_adapter *adapter = eqo->adapter;
3344         struct be_rx_obj *rxo;
3345         int i, work = 0;
3346
3347         if (!be_lock_busy_poll(eqo))
3348                 return LL_FLUSH_BUSY;
3349
3350         for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
3351                 work = be_process_rx(rxo, napi, 4, BUSY_POLLING);
3352                 if (work)
3353                         break;
3354         }
3355
3356         be_unlock_busy_poll(eqo);
3357         return work;
3358 }
3359 #endif
3360
3361 void be_detect_error(struct be_adapter *adapter)
3362 {
3363         u32 ue_lo = 0, ue_hi = 0, ue_lo_mask = 0, ue_hi_mask = 0;
3364         u32 sliport_status = 0, sliport_err1 = 0, sliport_err2 = 0;
3365         u32 i;
3366         struct device *dev = &adapter->pdev->dev;
3367
3368         if (be_check_error(adapter, BE_ERROR_HW))
3369                 return;
3370
3371         if (lancer_chip(adapter)) {
3372                 sliport_status = ioread32(adapter->db + SLIPORT_STATUS_OFFSET);
3373                 if (sliport_status & SLIPORT_STATUS_ERR_MASK) {
3374                         be_set_error(adapter, BE_ERROR_UE);
3375                         sliport_err1 = ioread32(adapter->db +
3376                                                 SLIPORT_ERROR1_OFFSET);
3377                         sliport_err2 = ioread32(adapter->db +
3378                                                 SLIPORT_ERROR2_OFFSET);
3379                         /* Do not log error messages if its a FW reset */
3380                         if (sliport_err1 == SLIPORT_ERROR_FW_RESET1 &&
3381                             sliport_err2 == SLIPORT_ERROR_FW_RESET2) {
3382                                 dev_info(dev, "Firmware update in progress\n");
3383                         } else {
3384                                 dev_err(dev, "Error detected in the card\n");
3385                                 dev_err(dev, "ERR: sliport status 0x%x\n",
3386                                         sliport_status);
3387                                 dev_err(dev, "ERR: sliport error1 0x%x\n",
3388                                         sliport_err1);
3389                                 dev_err(dev, "ERR: sliport error2 0x%x\n",
3390                                         sliport_err2);
3391                         }
3392                 }
3393         } else {
3394                 ue_lo = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_LOW);
3395                 ue_hi = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_HIGH);
3396                 ue_lo_mask = ioread32(adapter->pcicfg +
3397                                       PCICFG_UE_STATUS_LOW_MASK);
3398                 ue_hi_mask = ioread32(adapter->pcicfg +
3399                                       PCICFG_UE_STATUS_HI_MASK);
3400
3401                 ue_lo = (ue_lo & ~ue_lo_mask);
3402                 ue_hi = (ue_hi & ~ue_hi_mask);
3403
3404                 /* On certain platforms BE hardware can indicate spurious UEs.
3405                  * Allow HW to stop working completely in case of a real UE.
3406                  * Hence not setting the hw_error for UE detection.
3407                  */
3408
3409                 if (ue_lo || ue_hi) {
3410                         dev_err(dev, "Error detected in the adapter");
3411                         if (skyhawk_chip(adapter))
3412                                 be_set_error(adapter, BE_ERROR_UE);
3413
3414                         for (i = 0; ue_lo; ue_lo >>= 1, i++) {
3415                                 if (ue_lo & 1)
3416                                         dev_err(dev, "UE: %s bit set\n",
3417                                                 ue_status_low_desc[i]);
3418                         }
3419                         for (i = 0; ue_hi; ue_hi >>= 1, i++) {
3420                                 if (ue_hi & 1)
3421                                         dev_err(dev, "UE: %s bit set\n",
3422                                                 ue_status_hi_desc[i]);
3423                         }
3424                 }
3425         }
3426 }
3427
3428 static void be_msix_disable(struct be_adapter *adapter)
3429 {
3430         if (msix_enabled(adapter)) {
3431                 pci_disable_msix(adapter->pdev);
3432                 adapter->num_msix_vec = 0;
3433                 adapter->num_msix_roce_vec = 0;
3434         }
3435 }
3436
3437 static int be_msix_enable(struct be_adapter *adapter)
3438 {
3439         unsigned int i, max_roce_eqs;
3440         struct device *dev = &adapter->pdev->dev;
3441         int num_vec;
3442
3443         /* If RoCE is supported, program the max number of vectors that
3444          * could be used for NIC and RoCE, else, just program the number
3445          * we'll use initially.
3446          */
3447         if (be_roce_supported(adapter)) {
3448                 max_roce_eqs =
3449                         be_max_func_eqs(adapter) - be_max_nic_eqs(adapter);
3450                 max_roce_eqs = min(max_roce_eqs, num_online_cpus());
3451                 num_vec = be_max_any_irqs(adapter) + max_roce_eqs;
3452         } else {
3453                 num_vec = max(adapter->cfg_num_rx_irqs,
3454                               adapter->cfg_num_tx_irqs);
3455         }
3456
3457         for (i = 0; i < num_vec; i++)
3458                 adapter->msix_entries[i].entry = i;
3459
3460         num_vec = pci_enable_msix_range(adapter->pdev, adapter->msix_entries,
3461                                         MIN_MSIX_VECTORS, num_vec);
3462         if (num_vec < 0)
3463                 goto fail;
3464
3465         if (be_roce_supported(adapter) && num_vec > MIN_MSIX_VECTORS) {
3466                 adapter->num_msix_roce_vec = num_vec / 2;
3467                 dev_info(dev, "enabled %d MSI-x vector(s) for RoCE\n",
3468                          adapter->num_msix_roce_vec);
3469         }
3470
3471         adapter->num_msix_vec = num_vec - adapter->num_msix_roce_vec;
3472
3473         dev_info(dev, "enabled %d MSI-x vector(s) for NIC\n",
3474                  adapter->num_msix_vec);
3475         return 0;
3476
3477 fail:
3478         dev_warn(dev, "MSIx enable failed\n");
3479
3480         /* INTx is not supported in VFs, so fail probe if enable_msix fails */
3481         if (be_virtfn(adapter))
3482                 return num_vec;
3483         return 0;
3484 }
3485
3486 static inline int be_msix_vec_get(struct be_adapter *adapter,
3487                                   struct be_eq_obj *eqo)
3488 {
3489         return adapter->msix_entries[eqo->msix_idx].vector;
3490 }
3491
3492 static int be_msix_register(struct be_adapter *adapter)
3493 {
3494         struct net_device *netdev = adapter->netdev;
3495         struct be_eq_obj *eqo;
3496         int status, i, vec;
3497
3498         for_all_evt_queues(adapter, eqo, i) {
3499                 sprintf(eqo->desc, "%s-q%d", netdev->name, i);
3500                 vec = be_msix_vec_get(adapter, eqo);
3501                 status = request_irq(vec, be_msix, 0, eqo->desc, eqo);
3502                 if (status)
3503                         goto err_msix;
3504
3505                 irq_set_affinity_hint(vec, eqo->affinity_mask);
3506         }
3507
3508         return 0;
3509 err_msix:
3510         for (i--; i >= 0; i--) {
3511                 eqo = &adapter->eq_obj[i];
3512                 free_irq(be_msix_vec_get(adapter, eqo), eqo);
3513         }
3514         dev_warn(&adapter->pdev->dev, "MSIX Request IRQ failed - err %d\n",
3515                  status);
3516         be_msix_disable(adapter);
3517         return status;
3518 }
3519
3520 static int be_irq_register(struct be_adapter *adapter)
3521 {
3522         struct net_device *netdev = adapter->netdev;
3523         int status;
3524
3525         if (msix_enabled(adapter)) {
3526                 status = be_msix_register(adapter);
3527                 if (status == 0)
3528                         goto done;
3529                 /* INTx is not supported for VF */
3530                 if (be_virtfn(adapter))
3531                         return status;
3532         }
3533
3534         /* INTx: only the first EQ is used */
3535         netdev->irq = adapter->pdev->irq;
3536         status = request_irq(netdev->irq, be_intx, IRQF_SHARED, netdev->name,
3537                              &adapter->eq_obj[0]);
3538         if (status) {
3539                 dev_err(&adapter->pdev->dev,
3540                         "INTx request IRQ failed - err %d\n", status);
3541                 return status;
3542         }
3543 done:
3544         adapter->isr_registered = true;
3545         return 0;
3546 }
3547
3548 static void be_irq_unregister(struct be_adapter *adapter)
3549 {
3550         struct net_device *netdev = adapter->netdev;
3551         struct be_eq_obj *eqo;
3552         int i, vec;
3553
3554         if (!adapter->isr_registered)
3555                 return;
3556
3557         /* INTx */
3558         if (!msix_enabled(adapter)) {
3559                 free_irq(netdev->irq, &adapter->eq_obj[0]);
3560                 goto done;
3561         }
3562
3563         /* MSIx */
3564         for_all_evt_queues(adapter, eqo, i) {
3565                 vec = be_msix_vec_get(adapter, eqo);
3566                 irq_set_affinity_hint(vec, NULL);
3567                 free_irq(vec, eqo);
3568         }
3569
3570 done:
3571         adapter->isr_registered = false;
3572 }
3573
3574 static void be_rx_qs_destroy(struct be_adapter *adapter)
3575 {
3576         struct rss_info *rss = &adapter->rss_info;
3577         struct be_queue_info *q;
3578         struct be_rx_obj *rxo;
3579         int i;
3580
3581         for_all_rx_queues(adapter, rxo, i) {
3582                 q = &rxo->q;
3583                 if (q->created) {
3584                         /* If RXQs are destroyed while in an "out of buffer"
3585                          * state, there is a possibility of an HW stall on
3586                          * Lancer. So, post 64 buffers to each queue to relieve
3587                          * the "out of buffer" condition.
3588                          * Make sure there's space in the RXQ before posting.
3589                          */
3590                         if (lancer_chip(adapter)) {
3591                                 be_rx_cq_clean(rxo);
3592                                 if (atomic_read(&q->used) == 0)
3593                                         be_post_rx_frags(rxo, GFP_KERNEL,
3594                                                          MAX_RX_POST);
3595                         }
3596
3597                         be_cmd_rxq_destroy(adapter, q);
3598                         be_rx_cq_clean(rxo);
3599                         be_rxq_clean(rxo);
3600                 }
3601                 be_queue_free(adapter, q);
3602         }
3603
3604         if (rss->rss_flags) {
3605                 rss->rss_flags = RSS_ENABLE_NONE;
3606                 be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3607                                   128, rss->rss_hkey);
3608         }
3609 }
3610
3611 static void be_disable_if_filters(struct be_adapter *adapter)
3612 {
3613         be_dev_mac_del(adapter, adapter->pmac_id[0]);
3614         be_clear_uc_list(adapter);
3615         be_clear_mc_list(adapter);
3616
3617         /* The IFACE flags are enabled in the open path and cleared
3618          * in the close path. When a VF gets detached from the host and
3619          * assigned to a VM the following happens:
3620          *      - VF's IFACE flags get cleared in the detach path
3621          *      - IFACE create is issued by the VF in the attach path
3622          * Due to a bug in the BE3/Skyhawk-R FW
3623          * (Lancer FW doesn't have the bug), the IFACE capability flags
3624          * specified along with the IFACE create cmd issued by a VF are not
3625          * honoured by FW.  As a consequence, if a *new* driver
3626          * (that enables/disables IFACE flags in open/close)
3627          * is loaded in the host and an *old* driver is * used by a VM/VF,
3628          * the IFACE gets created *without* the needed flags.
3629          * To avoid this, disable RX-filter flags only for Lancer.
3630          */
3631         if (lancer_chip(adapter)) {
3632                 be_cmd_rx_filter(adapter, BE_IF_ALL_FILT_FLAGS, OFF);
3633                 adapter->if_flags &= ~BE_IF_ALL_FILT_FLAGS;
3634         }
3635 }
3636
3637 static int be_close(struct net_device *netdev)
3638 {
3639         struct be_adapter *adapter = netdev_priv(netdev);
3640         struct be_eq_obj *eqo;
3641         int i;
3642
3643         /* This protection is needed as be_close() may be called even when the
3644          * adapter is in cleared state (after eeh perm failure)
3645          */
3646         if (!(adapter->flags & BE_FLAGS_SETUP_DONE))
3647                 return 0;
3648
3649         /* Before attempting cleanup ensure all the pending cmds in the
3650          * config_wq have finished execution
3651          */
3652         flush_workqueue(be_wq);
3653
3654         be_disable_if_filters(adapter);
3655
3656         if (adapter->flags & BE_FLAGS_NAPI_ENABLED) {
3657                 for_all_evt_queues(adapter, eqo, i) {
3658                         napi_disable(&eqo->napi);
3659                         be_disable_busy_poll(eqo);
3660                 }
3661                 adapter->flags &= ~BE_FLAGS_NAPI_ENABLED;
3662         }
3663
3664         be_async_mcc_disable(adapter);
3665
3666         /* Wait for all pending tx completions to arrive so that
3667          * all tx skbs are freed.
3668          */
3669         netif_tx_disable(netdev);
3670         be_tx_compl_clean(adapter);
3671
3672         be_rx_qs_destroy(adapter);
3673
3674         for_all_evt_queues(adapter, eqo, i) {
3675                 if (msix_enabled(adapter))
3676                         synchronize_irq(be_msix_vec_get(adapter, eqo));
3677                 else
3678                         synchronize_irq(netdev->irq);
3679                 be_eq_clean(eqo);
3680         }
3681
3682         be_irq_unregister(adapter);
3683
3684         return 0;
3685 }
3686
3687 static int be_rx_qs_create(struct be_adapter *adapter)
3688 {
3689         struct rss_info *rss = &adapter->rss_info;
3690         u8 rss_key[RSS_HASH_KEY_LEN];
3691         struct be_rx_obj *rxo;
3692         int rc, i, j;
3693
3694         for_all_rx_queues(adapter, rxo, i) {
3695                 rc = be_queue_alloc(adapter, &rxo->q, RX_Q_LEN,
3696                                     sizeof(struct be_eth_rx_d));
3697                 if (rc)
3698                         return rc;
3699         }
3700
3701         if (adapter->need_def_rxq || !adapter->num_rss_qs) {
3702                 rxo = default_rxo(adapter);
3703                 rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3704                                        rx_frag_size, adapter->if_handle,
3705                                        false, &rxo->rss_id);
3706                 if (rc)
3707                         return rc;
3708         }
3709
3710         for_all_rss_queues(adapter, rxo, i) {
3711                 rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3712                                        rx_frag_size, adapter->if_handle,
3713                                        true, &rxo->rss_id);
3714                 if (rc)
3715                         return rc;
3716         }
3717
3718         if (be_multi_rxq(adapter)) {
3719                 for (j = 0; j < RSS_INDIR_TABLE_LEN; j += adapter->num_rss_qs) {
3720                         for_all_rss_queues(adapter, rxo, i) {
3721                                 if ((j + i) >= RSS_INDIR_TABLE_LEN)
3722                                         break;
3723                                 rss->rsstable[j + i] = rxo->rss_id;
3724                                 rss->rss_queue[j + i] = i;
3725                         }
3726                 }
3727                 rss->rss_flags = RSS_ENABLE_TCP_IPV4 | RSS_ENABLE_IPV4 |
3728                         RSS_ENABLE_TCP_IPV6 | RSS_ENABLE_IPV6;
3729
3730                 if (!BEx_chip(adapter))
3731                         rss->rss_flags |= RSS_ENABLE_UDP_IPV4 |
3732                                 RSS_ENABLE_UDP_IPV6;
3733
3734                 netdev_rss_key_fill(rss_key, RSS_HASH_KEY_LEN);
3735                 rc = be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3736                                        RSS_INDIR_TABLE_LEN, rss_key);
3737                 if (rc) {
3738                         rss->rss_flags = RSS_ENABLE_NONE;
3739                         return rc;
3740                 }
3741
3742                 memcpy(rss->rss_hkey, rss_key, RSS_HASH_KEY_LEN);
3743         } else {
3744                 /* Disable RSS, if only default RX Q is created */
3745                 rss->rss_flags = RSS_ENABLE_NONE;
3746         }
3747
3748
3749         /* Post 1 less than RXQ-len to avoid head being equal to tail,
3750          * which is a queue empty condition
3751          */
3752         for_all_rx_queues(adapter, rxo, i)
3753                 be_post_rx_frags(rxo, GFP_KERNEL, RX_Q_LEN - 1);
3754
3755         return 0;
3756 }
3757
3758 static int be_enable_if_filters(struct be_adapter *adapter)
3759 {
3760         int status;
3761
3762         status = be_cmd_rx_filter(adapter, BE_IF_FILT_FLAGS_BASIC, ON);
3763         if (status)
3764                 return status;
3765
3766         /* For BE3 VFs, the PF programs the initial MAC address */
3767         if (!(BEx_chip(adapter) && be_virtfn(adapter))) {
3768                 status = be_dev_mac_add(adapter, adapter->netdev->dev_addr);
3769                 if (status)
3770                         return status;
3771                 ether_addr_copy(adapter->dev_mac, adapter->netdev->dev_addr);
3772         }
3773
3774         if (adapter->vlans_added)
3775                 be_vid_config(adapter);
3776
3777         __be_set_rx_mode(adapter);
3778
3779         return 0;
3780 }
3781
3782 static int be_open(struct net_device *netdev)
3783 {
3784         struct be_adapter *adapter = netdev_priv(netdev);
3785         struct be_eq_obj *eqo;
3786         struct be_rx_obj *rxo;
3787         struct be_tx_obj *txo;
3788         u8 link_status;
3789         int status, i;
3790
3791         status = be_rx_qs_create(adapter);
3792         if (status)
3793                 goto err;
3794
3795         status = be_enable_if_filters(adapter);
3796         if (status)
3797                 goto err;
3798
3799         status = be_irq_register(adapter);
3800         if (status)
3801                 goto err;
3802
3803         for_all_rx_queues(adapter, rxo, i)
3804                 be_cq_notify(adapter, rxo->cq.id, true, 0);
3805
3806         for_all_tx_queues(adapter, txo, i)
3807                 be_cq_notify(adapter, txo->cq.id, true, 0);
3808
3809         be_async_mcc_enable(adapter);
3810
3811         for_all_evt_queues(adapter, eqo, i) {
3812                 napi_enable(&eqo->napi);
3813                 be_enable_busy_poll(eqo);
3814                 be_eq_notify(adapter, eqo->q.id, true, true, 0, 0);
3815         }
3816         adapter->flags |= BE_FLAGS_NAPI_ENABLED;
3817
3818         status = be_cmd_link_status_query(adapter, NULL, &link_status, 0);
3819         if (!status)
3820                 be_link_status_update(adapter, link_status);
3821
3822         netif_tx_start_all_queues(netdev);
3823         if (skyhawk_chip(adapter))
3824                 udp_tunnel_get_rx_info(netdev);
3825
3826         return 0;
3827 err:
3828         be_close(adapter->netdev);
3829         return -EIO;
3830 }
3831
3832 static void be_vf_eth_addr_generate(struct be_adapter *adapter, u8 *mac)
3833 {
3834         u32 addr;
3835
3836         addr = jhash(adapter->netdev->dev_addr, ETH_ALEN, 0);
3837
3838         mac[5] = (u8)(addr & 0xFF);
3839         mac[4] = (u8)((addr >> 8) & 0xFF);
3840         mac[3] = (u8)((addr >> 16) & 0xFF);
3841         /* Use the OUI from the current MAC address */
3842         memcpy(mac, adapter->netdev->dev_addr, 3);
3843 }
3844
3845 /*
3846  * Generate a seed MAC address from the PF MAC Address using jhash.
3847  * MAC Address for VFs are assigned incrementally starting from the seed.
3848  * These addresses are programmed in the ASIC by the PF and the VF driver
3849  * queries for the MAC address during its probe.
3850  */
3851 static int be_vf_eth_addr_config(struct be_adapter *adapter)
3852 {
3853         u32 vf;
3854         int status = 0;
3855         u8 mac[ETH_ALEN];
3856         struct be_vf_cfg *vf_cfg;
3857
3858         be_vf_eth_addr_generate(adapter, mac);
3859
3860         for_all_vfs(adapter, vf_cfg, vf) {
3861                 if (BEx_chip(adapter))
3862                         status = be_cmd_pmac_add(adapter, mac,
3863                                                  vf_cfg->if_handle,
3864                                                  &vf_cfg->pmac_id, vf + 1);
3865                 else
3866                         status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
3867                                                 vf + 1);
3868
3869                 if (status)
3870                         dev_err(&adapter->pdev->dev,
3871                                 "Mac address assignment failed for VF %d\n",
3872                                 vf);
3873                 else
3874                         memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3875
3876                 mac[5] += 1;
3877         }
3878         return status;
3879 }
3880
3881 static int be_vfs_mac_query(struct be_adapter *adapter)
3882 {
3883         int status, vf;
3884         u8 mac[ETH_ALEN];
3885         struct be_vf_cfg *vf_cfg;
3886
3887         for_all_vfs(adapter, vf_cfg, vf) {
3888                 status = be_cmd_get_active_mac(adapter, vf_cfg->pmac_id,
3889                                                mac, vf_cfg->if_handle,
3890                                                false, vf+1);
3891                 if (status)
3892                         return status;
3893                 memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3894         }
3895         return 0;
3896 }
3897
3898 static void be_vf_clear(struct be_adapter *adapter)
3899 {
3900         struct be_vf_cfg *vf_cfg;
3901         u32 vf;
3902
3903         if (pci_vfs_assigned(adapter->pdev)) {
3904                 dev_warn(&adapter->pdev->dev,
3905                          "VFs are assigned to VMs: not disabling VFs\n");
3906                 goto done;
3907         }
3908
3909         pci_disable_sriov(adapter->pdev);
3910
3911         for_all_vfs(adapter, vf_cfg, vf) {
3912                 if (BEx_chip(adapter))
3913                         be_cmd_pmac_del(adapter, vf_cfg->if_handle,
3914                                         vf_cfg->pmac_id, vf + 1);
3915                 else
3916                         be_cmd_set_mac(adapter, NULL, vf_cfg->if_handle,
3917                                        vf + 1);
3918
3919                 be_cmd_if_destroy(adapter, vf_cfg->if_handle, vf + 1);
3920         }
3921
3922         if (BE3_chip(adapter))
3923                 be_cmd_set_hsw_config(adapter, 0, 0,
3924                                       adapter->if_handle,
3925                                       PORT_FWD_TYPE_PASSTHRU, 0);
3926 done:
3927         kfree(adapter->vf_cfg);
3928         adapter->num_vfs = 0;
3929         adapter->flags &= ~BE_FLAGS_SRIOV_ENABLED;
3930 }
3931
3932 static void be_clear_queues(struct be_adapter *adapter)
3933 {
3934         be_mcc_queues_destroy(adapter);
3935         be_rx_cqs_destroy(adapter);
3936         be_tx_queues_destroy(adapter);
3937         be_evt_queues_destroy(adapter);
3938 }
3939
3940 static void be_cancel_worker(struct be_adapter *adapter)
3941 {
3942         if (adapter->flags & BE_FLAGS_WORKER_SCHEDULED) {
3943                 cancel_delayed_work_sync(&adapter->work);
3944                 adapter->flags &= ~BE_FLAGS_WORKER_SCHEDULED;
3945         }
3946 }
3947
3948 static void be_cancel_err_detection(struct be_adapter *adapter)
3949 {
3950         struct be_error_recovery *err_rec = &adapter->error_recovery;
3951
3952         if (!be_err_recovery_workq)
3953                 return;
3954
3955         if (adapter->flags & BE_FLAGS_ERR_DETECTION_SCHEDULED) {
3956                 cancel_delayed_work_sync(&err_rec->err_detection_work);
3957                 adapter->flags &= ~BE_FLAGS_ERR_DETECTION_SCHEDULED;
3958         }
3959 }
3960
3961 static void be_disable_vxlan_offloads(struct be_adapter *adapter)
3962 {
3963         struct net_device *netdev = adapter->netdev;
3964
3965         if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS)
3966                 be_cmd_manage_iface(adapter, adapter->if_handle,
3967                                     OP_CONVERT_TUNNEL_TO_NORMAL);
3968
3969         if (adapter->vxlan_port)
3970                 be_cmd_set_vxlan_port(adapter, 0);
3971
3972         adapter->flags &= ~BE_FLAGS_VXLAN_OFFLOADS;
3973         adapter->vxlan_port = 0;
3974
3975         netdev->hw_enc_features = 0;
3976         netdev->hw_features &= ~(NETIF_F_GSO_UDP_TUNNEL);
3977         netdev->features &= ~(NETIF_F_GSO_UDP_TUNNEL);
3978 }
3979
3980 static void be_calculate_vf_res(struct be_adapter *adapter, u16 num_vfs,
3981                                 struct be_resources *vft_res)
3982 {
3983         struct be_resources res = adapter->pool_res;
3984         u32 vf_if_cap_flags = res.vf_if_cap_flags;
3985         struct be_resources res_mod = {0};
3986         u16 num_vf_qs = 1;
3987
3988         /* Distribute the queue resources among the PF and it's VFs */
3989         if (num_vfs) {
3990                 /* Divide the rx queues evenly among the VFs and the PF, capped
3991                  * at VF-EQ-count. Any remainder queues belong to the PF.
3992                  */
3993                 num_vf_qs = min(SH_VF_MAX_NIC_EQS,
3994                                 res.max_rss_qs / (num_vfs + 1));
3995
3996                 /* Skyhawk-R chip supports only MAX_PORT_RSS_TABLES
3997                  * RSS Tables per port. Provide RSS on VFs, only if number of
3998                  * VFs requested is less than it's PF Pool's RSS Tables limit.
3999                  */
4000                 if (num_vfs >= be_max_pf_pool_rss_tables(adapter))
4001                         num_vf_qs = 1;
4002         }
4003
4004         /* Resource with fields set to all '1's by GET_PROFILE_CONFIG cmd,
4005          * which are modifiable using SET_PROFILE_CONFIG cmd.
4006          */
4007         be_cmd_get_profile_config(adapter, &res_mod, NULL, ACTIVE_PROFILE_TYPE,
4008                                   RESOURCE_MODIFIABLE, 0);
4009
4010         /* If RSS IFACE capability flags are modifiable for a VF, set the
4011          * capability flag as valid and set RSS and DEFQ_RSS IFACE flags if
4012          * more than 1 RSSQ is available for a VF.
4013          * Otherwise, provision only 1 queue pair for VF.
4014          */
4015         if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_RSS) {
4016                 vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
4017                 if (num_vf_qs > 1) {
4018                         vf_if_cap_flags |= BE_IF_FLAGS_RSS;
4019                         if (res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS)
4020                                 vf_if_cap_flags |= BE_IF_FLAGS_DEFQ_RSS;
4021                 } else {
4022                         vf_if_cap_flags &= ~(BE_IF_FLAGS_RSS |
4023                                              BE_IF_FLAGS_DEFQ_RSS);
4024                 }
4025         } else {
4026                 num_vf_qs = 1;
4027         }
4028
4029         if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
4030                 vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
4031                 vf_if_cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4032         }
4033
4034         vft_res->vf_if_cap_flags = vf_if_cap_flags;
4035         vft_res->max_rx_qs = num_vf_qs;
4036         vft_res->max_rss_qs = num_vf_qs;
4037         vft_res->max_tx_qs = res.max_tx_qs / (num_vfs + 1);
4038         vft_res->max_cq_count = res.max_cq_count / (num_vfs + 1);
4039
4040         /* Distribute unicast MACs, VLANs, IFACE count and MCCQ count equally
4041          * among the PF and it's VFs, if the fields are changeable
4042          */
4043         if (res_mod.max_uc_mac == FIELD_MODIFIABLE)
4044                 vft_res->max_uc_mac = res.max_uc_mac / (num_vfs + 1);
4045
4046         if (res_mod.max_vlans == FIELD_MODIFIABLE)
4047                 vft_res->max_vlans = res.max_vlans / (num_vfs + 1);
4048
4049         if (res_mod.max_iface_count == FIELD_MODIFIABLE)
4050                 vft_res->max_iface_count = res.max_iface_count / (num_vfs + 1);
4051
4052         if (res_mod.max_mcc_count == FIELD_MODIFIABLE)
4053                 vft_res->max_mcc_count = res.max_mcc_count / (num_vfs + 1);
4054 }
4055
4056 static void be_if_destroy(struct be_adapter *adapter)
4057 {
4058         be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4059
4060         kfree(adapter->pmac_id);
4061         adapter->pmac_id = NULL;
4062
4063         kfree(adapter->mc_list);
4064         adapter->mc_list = NULL;
4065
4066         kfree(adapter->uc_list);
4067         adapter->uc_list = NULL;
4068 }
4069
4070 static int be_clear(struct be_adapter *adapter)
4071 {
4072         struct pci_dev *pdev = adapter->pdev;
4073         struct  be_resources vft_res = {0};
4074
4075         be_cancel_worker(adapter);
4076
4077         flush_workqueue(be_wq);
4078
4079         if (sriov_enabled(adapter))
4080                 be_vf_clear(adapter);
4081
4082         /* Re-configure FW to distribute resources evenly across max-supported
4083          * number of VFs, only when VFs are not already enabled.
4084          */
4085         if (skyhawk_chip(adapter) && be_physfn(adapter) &&
4086             !pci_vfs_assigned(pdev)) {
4087                 be_calculate_vf_res(adapter,
4088                                     pci_sriov_get_totalvfs(pdev),
4089                                     &vft_res);
4090                 be_cmd_set_sriov_config(adapter, adapter->pool_res,
4091                                         pci_sriov_get_totalvfs(pdev),
4092                                         &vft_res);
4093         }
4094
4095         be_disable_vxlan_offloads(adapter);
4096
4097         be_if_destroy(adapter);
4098
4099         be_clear_queues(adapter);
4100
4101         be_msix_disable(adapter);
4102         adapter->flags &= ~BE_FLAGS_SETUP_DONE;
4103         return 0;
4104 }
4105
4106 static int be_vfs_if_create(struct be_adapter *adapter)
4107 {
4108         struct be_resources res = {0};
4109         u32 cap_flags, en_flags, vf;
4110         struct be_vf_cfg *vf_cfg;
4111         int status;
4112
4113         /* If a FW profile exists, then cap_flags are updated */
4114         cap_flags = BE_VF_IF_EN_FLAGS;
4115
4116         for_all_vfs(adapter, vf_cfg, vf) {
4117                 if (!BE3_chip(adapter)) {
4118                         status = be_cmd_get_profile_config(adapter, &res, NULL,
4119                                                            ACTIVE_PROFILE_TYPE,
4120                                                            RESOURCE_LIMITS,
4121                                                            vf + 1);
4122                         if (!status) {
4123                                 cap_flags = res.if_cap_flags;
4124                                 /* Prevent VFs from enabling VLAN promiscuous
4125                                  * mode
4126                                  */
4127                                 cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4128                         }
4129                 }
4130
4131                 /* PF should enable IF flags during proxy if_create call */
4132                 en_flags = cap_flags & BE_VF_IF_EN_FLAGS;
4133                 status = be_cmd_if_create(adapter, cap_flags, en_flags,
4134                                           &vf_cfg->if_handle, vf + 1);
4135                 if (status)
4136                         return status;
4137         }
4138
4139         return 0;
4140 }
4141
4142 static int be_vf_setup_init(struct be_adapter *adapter)
4143 {
4144         struct be_vf_cfg *vf_cfg;
4145         int vf;
4146
4147         adapter->vf_cfg = kcalloc(adapter->num_vfs, sizeof(*vf_cfg),
4148                                   GFP_KERNEL);
4149         if (!adapter->vf_cfg)
4150                 return -ENOMEM;
4151
4152         for_all_vfs(adapter, vf_cfg, vf) {
4153                 vf_cfg->if_handle = -1;
4154                 vf_cfg->pmac_id = -1;
4155         }
4156         return 0;
4157 }
4158
4159 static int be_vf_setup(struct be_adapter *adapter)
4160 {
4161         struct device *dev = &adapter->pdev->dev;
4162         struct be_vf_cfg *vf_cfg;
4163         int status, old_vfs, vf;
4164         bool spoofchk;
4165
4166         old_vfs = pci_num_vf(adapter->pdev);
4167
4168         status = be_vf_setup_init(adapter);
4169         if (status)
4170                 goto err;
4171
4172         if (old_vfs) {
4173                 for_all_vfs(adapter, vf_cfg, vf) {
4174                         status = be_cmd_get_if_id(adapter, vf_cfg, vf);
4175                         if (status)
4176                                 goto err;
4177                 }
4178
4179                 status = be_vfs_mac_query(adapter);
4180                 if (status)
4181                         goto err;
4182         } else {
4183                 status = be_vfs_if_create(adapter);
4184                 if (status)
4185                         goto err;
4186
4187                 status = be_vf_eth_addr_config(adapter);
4188                 if (status)
4189                         goto err;
4190         }
4191
4192         for_all_vfs(adapter, vf_cfg, vf) {
4193                 /* Allow VFs to programs MAC/VLAN filters */
4194                 status = be_cmd_get_fn_privileges(adapter, &vf_cfg->privileges,
4195                                                   vf + 1);
4196                 if (!status && !(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
4197                         status = be_cmd_set_fn_privileges(adapter,
4198                                                           vf_cfg->privileges |
4199                                                           BE_PRIV_FILTMGMT,
4200                                                           vf + 1);
4201                         if (!status) {
4202                                 vf_cfg->privileges |= BE_PRIV_FILTMGMT;
4203                                 dev_info(dev, "VF%d has FILTMGMT privilege\n",
4204                                          vf);
4205                         }
4206                 }
4207
4208                 /* Allow full available bandwidth */
4209                 if (!old_vfs)
4210                         be_cmd_config_qos(adapter, 0, 0, vf + 1);
4211
4212                 status = be_cmd_get_hsw_config(adapter, NULL, vf + 1,
4213                                                vf_cfg->if_handle, NULL,
4214                                                &spoofchk);
4215                 if (!status)
4216                         vf_cfg->spoofchk = spoofchk;
4217
4218                 if (!old_vfs) {
4219                         be_cmd_enable_vf(adapter, vf + 1);
4220                         be_cmd_set_logical_link_config(adapter,
4221                                                        IFLA_VF_LINK_STATE_AUTO,
4222                                                        vf+1);
4223                 }
4224         }
4225
4226         if (!old_vfs) {
4227                 status = pci_enable_sriov(adapter->pdev, adapter->num_vfs);
4228                 if (status) {
4229                         dev_err(dev, "SRIOV enable failed\n");
4230                         adapter->num_vfs = 0;
4231                         goto err;
4232                 }
4233         }
4234
4235         if (BE3_chip(adapter)) {
4236                 /* On BE3, enable VEB only when SRIOV is enabled */
4237                 status = be_cmd_set_hsw_config(adapter, 0, 0,
4238                                                adapter->if_handle,
4239                                                PORT_FWD_TYPE_VEB, 0);
4240                 if (status)
4241                         goto err;
4242         }
4243
4244         adapter->flags |= BE_FLAGS_SRIOV_ENABLED;
4245         return 0;
4246 err:
4247         dev_err(dev, "VF setup failed\n");
4248         be_vf_clear(adapter);
4249         return status;
4250 }
4251
4252 /* Converting function_mode bits on BE3 to SH mc_type enums */
4253
4254 static u8 be_convert_mc_type(u32 function_mode)
4255 {
4256         if (function_mode & VNIC_MODE && function_mode & QNQ_MODE)
4257                 return vNIC1;
4258         else if (function_mode & QNQ_MODE)
4259                 return FLEX10;
4260         else if (function_mode & VNIC_MODE)
4261                 return vNIC2;
4262         else if (function_mode & UMC_ENABLED)
4263                 return UMC;
4264         else
4265                 return MC_NONE;
4266 }
4267
4268 /* On BE2/BE3 FW does not suggest the supported limits */
4269 static void BEx_get_resources(struct be_adapter *adapter,
4270                               struct be_resources *res)
4271 {
4272         bool use_sriov = adapter->num_vfs ? 1 : 0;
4273
4274         if (be_physfn(adapter))
4275                 res->max_uc_mac = BE_UC_PMAC_COUNT;
4276         else
4277                 res->max_uc_mac = BE_VF_UC_PMAC_COUNT;
4278
4279         adapter->mc_type = be_convert_mc_type(adapter->function_mode);
4280
4281         if (be_is_mc(adapter)) {
4282                 /* Assuming that there are 4 channels per port,
4283                  * when multi-channel is enabled
4284                  */
4285                 if (be_is_qnq_mode(adapter))
4286                         res->max_vlans = BE_NUM_VLANS_SUPPORTED/8;
4287                 else
4288                         /* In a non-qnq multichannel mode, the pvid
4289                          * takes up one vlan entry
4290                          */
4291                         res->max_vlans = (BE_NUM_VLANS_SUPPORTED / 4) - 1;
4292         } else {
4293                 res->max_vlans = BE_NUM_VLANS_SUPPORTED;
4294         }
4295
4296         res->max_mcast_mac = BE_MAX_MC;
4297
4298         /* 1) For BE3 1Gb ports, FW does not support multiple TXQs
4299          * 2) Create multiple TX rings on a BE3-R multi-channel interface
4300          *    *only* if it is RSS-capable.
4301          */
4302         if (BE2_chip(adapter) || use_sriov ||  (adapter->port_num > 1) ||
4303             be_virtfn(adapter) ||
4304             (be_is_mc(adapter) &&
4305              !(adapter->function_caps & BE_FUNCTION_CAPS_RSS))) {
4306                 res->max_tx_qs = 1;
4307         } else if (adapter->function_caps & BE_FUNCTION_CAPS_SUPER_NIC) {
4308                 struct be_resources super_nic_res = {0};
4309
4310                 /* On a SuperNIC profile, the driver needs to use the
4311                  * GET_PROFILE_CONFIG cmd to query the per-function TXQ limits
4312                  */
4313                 be_cmd_get_profile_config(adapter, &super_nic_res, NULL,
4314                                           ACTIVE_PROFILE_TYPE, RESOURCE_LIMITS,
4315                                           0);
4316                 /* Some old versions of BE3 FW don't report max_tx_qs value */
4317                 res->max_tx_qs = super_nic_res.max_tx_qs ? : BE3_MAX_TX_QS;
4318         } else {
4319                 res->max_tx_qs = BE3_MAX_TX_QS;
4320         }
4321
4322         if ((adapter->function_caps & BE_FUNCTION_CAPS_RSS) &&
4323             !use_sriov && be_physfn(adapter))
4324                 res->max_rss_qs = (adapter->be3_native) ?
4325                                            BE3_MAX_RSS_QS : BE2_MAX_RSS_QS;
4326         res->max_rx_qs = res->max_rss_qs + 1;
4327
4328         if (be_physfn(adapter))
4329                 res->max_evt_qs = (be_max_vfs(adapter) > 0) ?
4330                                         BE3_SRIOV_MAX_EVT_QS : BE3_MAX_EVT_QS;
4331         else
4332                 res->max_evt_qs = 1;
4333
4334         res->if_cap_flags = BE_IF_CAP_FLAGS_WANT;
4335         res->if_cap_flags &= ~BE_IF_FLAGS_DEFQ_RSS;
4336         if (!(adapter->function_caps & BE_FUNCTION_CAPS_RSS))
4337                 res->if_cap_flags &= ~BE_IF_FLAGS_RSS;
4338 }
4339
4340 static void be_setup_init(struct be_adapter *adapter)
4341 {
4342         adapter->vlan_prio_bmap = 0xff;
4343         adapter->phy.link_speed = -1;
4344         adapter->if_handle = -1;
4345         adapter->be3_native = false;
4346         adapter->if_flags = 0;
4347         adapter->phy_state = BE_UNKNOWN_PHY_STATE;
4348         if (be_physfn(adapter))
4349                 adapter->cmd_privileges = MAX_PRIVILEGES;
4350         else
4351                 adapter->cmd_privileges = MIN_PRIVILEGES;
4352 }
4353
4354 /* HW supports only MAX_PORT_RSS_TABLES RSS Policy Tables per port.
4355  * However, this HW limitation is not exposed to the host via any SLI cmd.
4356  * As a result, in the case of SRIOV and in particular multi-partition configs
4357  * the driver needs to calcuate a proportional share of RSS Tables per PF-pool
4358  * for distribution between the VFs. This self-imposed limit will determine the
4359  * no: of VFs for which RSS can be enabled.
4360  */
4361 static void be_calculate_pf_pool_rss_tables(struct be_adapter *adapter)
4362 {
4363         struct be_port_resources port_res = {0};
4364         u8 rss_tables_on_port;
4365         u16 max_vfs = be_max_vfs(adapter);
4366
4367         be_cmd_get_profile_config(adapter, NULL, &port_res, SAVED_PROFILE_TYPE,
4368                                   RESOURCE_LIMITS, 0);
4369
4370         rss_tables_on_port = MAX_PORT_RSS_TABLES - port_res.nic_pfs;
4371
4372         /* Each PF Pool's RSS Tables limit =
4373          * PF's Max VFs / Total_Max_VFs on Port * RSS Tables on Port
4374          */
4375         adapter->pool_res.max_rss_tables =
4376                 max_vfs * rss_tables_on_port / port_res.max_vfs;
4377 }
4378
4379 static int be_get_sriov_config(struct be_adapter *adapter)
4380 {
4381         struct be_resources res = {0};
4382         int max_vfs, old_vfs;
4383
4384         be_cmd_get_profile_config(adapter, &res, NULL, ACTIVE_PROFILE_TYPE,
4385                                   RESOURCE_LIMITS, 0);
4386
4387         /* Some old versions of BE3 FW don't report max_vfs value */
4388         if (BE3_chip(adapter) && !res.max_vfs) {
4389                 max_vfs = pci_sriov_get_totalvfs(adapter->pdev);
4390                 res.max_vfs = max_vfs > 0 ? min(MAX_VFS, max_vfs) : 0;
4391         }
4392
4393         adapter->pool_res = res;
4394
4395         /* If during previous unload of the driver, the VFs were not disabled,
4396          * then we cannot rely on the PF POOL limits for the TotalVFs value.
4397          * Instead use the TotalVFs value stored in the pci-dev struct.
4398          */
4399         old_vfs = pci_num_vf(adapter->pdev);
4400         if (old_vfs) {
4401                 dev_info(&adapter->pdev->dev, "%d VFs are already enabled\n",
4402                          old_vfs);
4403
4404                 adapter->pool_res.max_vfs =
4405                         pci_sriov_get_totalvfs(adapter->pdev);
4406                 adapter->num_vfs = old_vfs;
4407         }
4408
4409         if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4410                 be_calculate_pf_pool_rss_tables(adapter);
4411                 dev_info(&adapter->pdev->dev,
4412                          "RSS can be enabled for all VFs if num_vfs <= %d\n",
4413                          be_max_pf_pool_rss_tables(adapter));
4414         }
4415         return 0;
4416 }
4417
4418 static void be_alloc_sriov_res(struct be_adapter *adapter)
4419 {
4420         int old_vfs = pci_num_vf(adapter->pdev);
4421         struct  be_resources vft_res = {0};
4422         int status;
4423
4424         be_get_sriov_config(adapter);
4425
4426         if (!old_vfs)
4427                 pci_sriov_set_totalvfs(adapter->pdev, be_max_vfs(adapter));
4428
4429         /* When the HW is in SRIOV capable configuration, the PF-pool
4430          * resources are given to PF during driver load, if there are no
4431          * old VFs. This facility is not available in BE3 FW.
4432          * Also, this is done by FW in Lancer chip.
4433          */
4434         if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4435                 be_calculate_vf_res(adapter, 0, &vft_res);
4436                 status = be_cmd_set_sriov_config(adapter, adapter->pool_res, 0,
4437                                                  &vft_res);
4438                 if (status)
4439                         dev_err(&adapter->pdev->dev,
4440                                 "Failed to optimize SRIOV resources\n");
4441         }
4442 }
4443
4444 static int be_get_resources(struct be_adapter *adapter)
4445 {
4446         struct device *dev = &adapter->pdev->dev;
4447         struct be_resources res = {0};
4448         int status;
4449
4450         /* For Lancer, SH etc read per-function resource limits from FW.
4451          * GET_FUNC_CONFIG returns per function guaranteed limits.
4452          * GET_PROFILE_CONFIG returns PCI-E related limits PF-pool limits
4453          */
4454         if (BEx_chip(adapter)) {
4455                 BEx_get_resources(adapter, &res);
4456         } else {
4457                 status = be_cmd_get_func_config(adapter, &res);
4458                 if (status)
4459                         return status;
4460
4461                 /* If a deafault RXQ must be created, we'll use up one RSSQ*/
4462                 if (res.max_rss_qs && res.max_rss_qs == res.max_rx_qs &&
4463                     !(res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS))
4464                         res.max_rss_qs -= 1;
4465         }
4466
4467         /* If RoCE is supported stash away half the EQs for RoCE */
4468         res.max_nic_evt_qs = be_roce_supported(adapter) ?
4469                                 res.max_evt_qs / 2 : res.max_evt_qs;
4470         adapter->res = res;
4471
4472         /* If FW supports RSS default queue, then skip creating non-RSS
4473          * queue for non-IP traffic.
4474          */
4475         adapter->need_def_rxq = (be_if_cap_flags(adapter) &
4476                                  BE_IF_FLAGS_DEFQ_RSS) ? 0 : 1;
4477
4478         dev_info(dev, "Max: txqs %d, rxqs %d, rss %d, eqs %d, vfs %d\n",
4479                  be_max_txqs(adapter), be_max_rxqs(adapter),
4480                  be_max_rss(adapter), be_max_nic_eqs(adapter),
4481                  be_max_vfs(adapter));
4482         dev_info(dev, "Max: uc-macs %d, mc-macs %d, vlans %d\n",
4483                  be_max_uc(adapter), be_max_mc(adapter),
4484                  be_max_vlans(adapter));
4485
4486         /* Ensure RX and TX queues are created in pairs at init time */
4487         adapter->cfg_num_rx_irqs =
4488                                 min_t(u16, netif_get_num_default_rss_queues(),
4489                                       be_max_qp_irqs(adapter));
4490         adapter->cfg_num_tx_irqs = adapter->cfg_num_rx_irqs;
4491         return 0;
4492 }
4493
4494 static int be_get_config(struct be_adapter *adapter)
4495 {
4496         int status, level;
4497         u16 profile_id;
4498
4499         status = be_cmd_get_cntl_attributes(adapter);
4500         if (status)
4501                 return status;
4502
4503         status = be_cmd_query_fw_cfg(adapter);
4504         if (status)
4505                 return status;
4506
4507         if (!lancer_chip(adapter) && be_physfn(adapter))
4508                 be_cmd_get_fat_dump_len(adapter, &adapter->fat_dump_len);
4509
4510         if (BEx_chip(adapter)) {
4511                 level = be_cmd_get_fw_log_level(adapter);
4512                 adapter->msg_enable =
4513                         level <= FW_LOG_LEVEL_DEFAULT ? NETIF_MSG_HW : 0;
4514         }
4515
4516         be_cmd_get_acpi_wol_cap(adapter);
4517         pci_enable_wake(adapter->pdev, PCI_D3hot, adapter->wol_en);
4518         pci_enable_wake(adapter->pdev, PCI_D3cold, adapter->wol_en);
4519
4520         be_cmd_query_port_name(adapter);
4521
4522         if (be_physfn(adapter)) {
4523                 status = be_cmd_get_active_profile(adapter, &profile_id);
4524                 if (!status)
4525                         dev_info(&adapter->pdev->dev,
4526                                  "Using profile 0x%x\n", profile_id);
4527         }
4528
4529         return 0;
4530 }
4531
4532 static int be_mac_setup(struct be_adapter *adapter)
4533 {
4534         u8 mac[ETH_ALEN];
4535         int status;
4536
4537         if (is_zero_ether_addr(adapter->netdev->dev_addr)) {
4538                 status = be_cmd_get_perm_mac(adapter, mac);
4539                 if (status)
4540                         return status;
4541
4542                 memcpy(adapter->netdev->dev_addr, mac, ETH_ALEN);
4543                 memcpy(adapter->netdev->perm_addr, mac, ETH_ALEN);
4544         }
4545
4546         return 0;
4547 }
4548
4549 static void be_schedule_worker(struct be_adapter *adapter)
4550 {
4551         queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
4552         adapter->flags |= BE_FLAGS_WORKER_SCHEDULED;
4553 }
4554
4555 static void be_destroy_err_recovery_workq(void)
4556 {
4557         if (!be_err_recovery_workq)
4558                 return;
4559
4560         flush_workqueue(be_err_recovery_workq);
4561         destroy_workqueue(be_err_recovery_workq);
4562         be_err_recovery_workq = NULL;
4563 }
4564
4565 static void be_schedule_err_detection(struct be_adapter *adapter, u32 delay)
4566 {
4567         struct be_error_recovery *err_rec = &adapter->error_recovery;
4568
4569         if (!be_err_recovery_workq)
4570                 return;
4571
4572         queue_delayed_work(be_err_recovery_workq, &err_rec->err_detection_work,
4573                            msecs_to_jiffies(delay));
4574         adapter->flags |= BE_FLAGS_ERR_DETECTION_SCHEDULED;
4575 }
4576
4577 static int be_setup_queues(struct be_adapter *adapter)
4578 {
4579         struct net_device *netdev = adapter->netdev;
4580         int status;
4581
4582         status = be_evt_queues_create(adapter);
4583         if (status)
4584                 goto err;
4585
4586         status = be_tx_qs_create(adapter);
4587         if (status)
4588                 goto err;
4589
4590         status = be_rx_cqs_create(adapter);
4591         if (status)
4592                 goto err;
4593
4594         status = be_mcc_queues_create(adapter);
4595         if (status)
4596                 goto err;
4597
4598         status = netif_set_real_num_rx_queues(netdev, adapter->num_rx_qs);
4599         if (status)
4600                 goto err;
4601
4602         status = netif_set_real_num_tx_queues(netdev, adapter->num_tx_qs);
4603         if (status)
4604                 goto err;
4605
4606         return 0;
4607 err:
4608         dev_err(&adapter->pdev->dev, "queue_setup failed\n");
4609         return status;
4610 }
4611
4612 static int be_if_create(struct be_adapter *adapter)
4613 {
4614         u32 en_flags = BE_IF_FLAGS_RSS | BE_IF_FLAGS_DEFQ_RSS;
4615         u32 cap_flags = be_if_cap_flags(adapter);
4616         int status;
4617
4618         /* alloc required memory for other filtering fields */
4619         adapter->pmac_id = kcalloc(be_max_uc(adapter),
4620                                    sizeof(*adapter->pmac_id), GFP_KERNEL);
4621         if (!adapter->pmac_id)
4622                 return -ENOMEM;
4623
4624         adapter->mc_list = kcalloc(be_max_mc(adapter),
4625                                    sizeof(*adapter->mc_list), GFP_KERNEL);
4626         if (!adapter->mc_list)
4627                 return -ENOMEM;
4628
4629         adapter->uc_list = kcalloc(be_max_uc(adapter),
4630                                    sizeof(*adapter->uc_list), GFP_KERNEL);
4631         if (!adapter->uc_list)
4632                 return -ENOMEM;
4633
4634         if (adapter->cfg_num_rx_irqs == 1)
4635                 cap_flags &= ~(BE_IF_FLAGS_DEFQ_RSS | BE_IF_FLAGS_RSS);
4636
4637         en_flags &= cap_flags;
4638         /* will enable all the needed filter flags in be_open() */
4639         status = be_cmd_if_create(adapter, be_if_cap_flags(adapter), en_flags,
4640                                   &adapter->if_handle, 0);
4641
4642         if (status)
4643                 return status;
4644
4645         return 0;
4646 }
4647
4648 int be_update_queues(struct be_adapter *adapter)
4649 {
4650         struct net_device *netdev = adapter->netdev;
4651         int status;
4652
4653         if (netif_running(netdev))
4654                 be_close(netdev);
4655
4656         be_cancel_worker(adapter);
4657
4658         /* If any vectors have been shared with RoCE we cannot re-program
4659          * the MSIx table.
4660          */
4661         if (!adapter->num_msix_roce_vec)
4662                 be_msix_disable(adapter);
4663
4664         be_clear_queues(adapter);
4665         status = be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4666         if (status)
4667                 return status;
4668
4669         if (!msix_enabled(adapter)) {
4670                 status = be_msix_enable(adapter);
4671                 if (status)
4672                         return status;
4673         }
4674
4675         status = be_if_create(adapter);
4676         if (status)
4677                 return status;
4678
4679         status = be_setup_queues(adapter);
4680         if (status)
4681                 return status;
4682
4683         be_schedule_worker(adapter);
4684
4685         if (netif_running(netdev))
4686                 status = be_open(netdev);
4687
4688         return status;
4689 }
4690
4691 static inline int fw_major_num(const char *fw_ver)
4692 {
4693         int fw_major = 0, i;
4694
4695         i = sscanf(fw_ver, "%d.", &fw_major);
4696         if (i != 1)
4697                 return 0;
4698
4699         return fw_major;
4700 }
4701
4702 /* If it is error recovery, FLR the PF
4703  * Else if any VFs are already enabled don't FLR the PF
4704  */
4705 static bool be_reset_required(struct be_adapter *adapter)
4706 {
4707         if (be_error_recovering(adapter))
4708                 return true;
4709         else
4710                 return pci_num_vf(adapter->pdev) == 0;
4711 }
4712
4713 /* Wait for the FW to be ready and perform the required initialization */
4714 static int be_func_init(struct be_adapter *adapter)
4715 {
4716         int status;
4717
4718         status = be_fw_wait_ready(adapter);
4719         if (status)
4720                 return status;
4721
4722         /* FW is now ready; clear errors to allow cmds/doorbell */
4723         be_clear_error(adapter, BE_CLEAR_ALL);
4724
4725         if (be_reset_required(adapter)) {
4726                 status = be_cmd_reset_function(adapter);
4727                 if (status)
4728                         return status;
4729
4730                 /* Wait for interrupts to quiesce after an FLR */
4731                 msleep(100);
4732         }
4733
4734         /* Tell FW we're ready to fire cmds */
4735         status = be_cmd_fw_init(adapter);
4736         if (status)
4737                 return status;
4738
4739         /* Allow interrupts for other ULPs running on NIC function */
4740         be_intr_set(adapter, true);
4741
4742         return 0;
4743 }
4744
4745 static int be_setup(struct be_adapter *adapter)
4746 {
4747         struct device *dev = &adapter->pdev->dev;
4748         int status;
4749
4750         status = be_func_init(adapter);
4751         if (status)
4752                 return status;
4753
4754         be_setup_init(adapter);
4755
4756         if (!lancer_chip(adapter))
4757                 be_cmd_req_native_mode(adapter);
4758
4759         /* invoke this cmd first to get pf_num and vf_num which are needed
4760          * for issuing profile related cmds
4761          */
4762         if (!BEx_chip(adapter)) {
4763                 status = be_cmd_get_func_config(adapter, NULL);
4764                 if (status)
4765                         return status;
4766         }
4767
4768         status = be_get_config(adapter);
4769         if (status)
4770                 goto err;
4771
4772         if (!BE2_chip(adapter) && be_physfn(adapter))
4773                 be_alloc_sriov_res(adapter);
4774
4775         status = be_get_resources(adapter);
4776         if (status)
4777                 goto err;
4778
4779         status = be_msix_enable(adapter);
4780         if (status)
4781                 goto err;
4782
4783         /* will enable all the needed filter flags in be_open() */
4784         status = be_if_create(adapter);
4785         if (status)
4786                 goto err;
4787
4788         /* Updating real_num_tx/rx_queues() requires rtnl_lock() */
4789         rtnl_lock();
4790         status = be_setup_queues(adapter);
4791         rtnl_unlock();
4792         if (status)
4793                 goto err;
4794
4795         be_cmd_get_fn_privileges(adapter, &adapter->cmd_privileges, 0);
4796
4797         status = be_mac_setup(adapter);
4798         if (status)
4799                 goto err;
4800
4801         be_cmd_get_fw_ver(adapter);
4802         dev_info(dev, "FW version is %s\n", adapter->fw_ver);
4803
4804         if (BE2_chip(adapter) && fw_major_num(adapter->fw_ver) < 4) {
4805                 dev_err(dev, "Firmware on card is old(%s), IRQs may not work",
4806                         adapter->fw_ver);
4807                 dev_err(dev, "Please upgrade firmware to version >= 4.0\n");
4808         }
4809
4810         status = be_cmd_set_flow_control(adapter, adapter->tx_fc,
4811                                          adapter->rx_fc);
4812         if (status)
4813                 be_cmd_get_flow_control(adapter, &adapter->tx_fc,
4814                                         &adapter->rx_fc);
4815
4816         dev_info(&adapter->pdev->dev, "HW Flow control - TX:%d RX:%d\n",
4817                  adapter->tx_fc, adapter->rx_fc);
4818
4819         if (be_physfn(adapter))
4820                 be_cmd_set_logical_link_config(adapter,
4821                                                IFLA_VF_LINK_STATE_AUTO, 0);
4822
4823         /* BE3 EVB echoes broadcast/multicast packets back to PF's vport
4824          * confusing a linux bridge or OVS that it might be connected to.
4825          * Set the EVB to PASSTHRU mode which effectively disables the EVB
4826          * when SRIOV is not enabled.
4827          */
4828         if (BE3_chip(adapter))
4829                 be_cmd_set_hsw_config(adapter, 0, 0, adapter->if_handle,
4830                                       PORT_FWD_TYPE_PASSTHRU, 0);
4831
4832         if (adapter->num_vfs)
4833                 be_vf_setup(adapter);
4834
4835         status = be_cmd_get_phy_info(adapter);
4836         if (!status && be_pause_supported(adapter))
4837                 adapter->phy.fc_autoneg = 1;
4838
4839         if (be_physfn(adapter) && !lancer_chip(adapter))
4840                 be_cmd_set_features(adapter);
4841
4842         be_schedule_worker(adapter);
4843         adapter->flags |= BE_FLAGS_SETUP_DONE;
4844         return 0;
4845 err:
4846         be_clear(adapter);
4847         return status;
4848 }
4849
4850 #ifdef CONFIG_NET_POLL_CONTROLLER
4851 static void be_netpoll(struct net_device *netdev)
4852 {
4853         struct be_adapter *adapter = netdev_priv(netdev);
4854         struct be_eq_obj *eqo;
4855         int i;
4856
4857         for_all_evt_queues(adapter, eqo, i) {
4858                 be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
4859                 napi_schedule(&eqo->napi);
4860         }
4861 }
4862 #endif
4863
4864 int be_load_fw(struct be_adapter *adapter, u8 *fw_file)
4865 {
4866         const struct firmware *fw;
4867         int status;
4868
4869         if (!netif_running(adapter->netdev)) {
4870                 dev_err(&adapter->pdev->dev,
4871                         "Firmware load not allowed (interface is down)\n");
4872                 return -ENETDOWN;
4873         }
4874
4875         status = request_firmware(&fw, fw_file, &adapter->pdev->dev);
4876         if (status)
4877                 goto fw_exit;
4878
4879         dev_info(&adapter->pdev->dev, "Flashing firmware file %s\n", fw_file);
4880
4881         if (lancer_chip(adapter))
4882                 status = lancer_fw_download(adapter, fw);
4883         else
4884                 status = be_fw_download(adapter, fw);
4885
4886         if (!status)
4887                 be_cmd_get_fw_ver(adapter);
4888
4889 fw_exit:
4890         release_firmware(fw);
4891         return status;
4892 }
4893
4894 static int be_ndo_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh,
4895                                  u16 flags)
4896 {
4897         struct be_adapter *adapter = netdev_priv(dev);
4898         struct nlattr *attr, *br_spec;
4899         int rem;
4900         int status = 0;
4901         u16 mode = 0;
4902
4903         if (!sriov_enabled(adapter))
4904                 return -EOPNOTSUPP;
4905
4906         br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
4907         if (!br_spec)
4908                 return -EINVAL;
4909
4910         nla_for_each_nested(attr, br_spec, rem) {
4911                 if (nla_type(attr) != IFLA_BRIDGE_MODE)
4912                         continue;
4913
4914                 if (nla_len(attr) < sizeof(mode))
4915                         return -EINVAL;
4916
4917                 mode = nla_get_u16(attr);
4918                 if (BE3_chip(adapter) && mode == BRIDGE_MODE_VEPA)
4919                         return -EOPNOTSUPP;
4920
4921                 if (mode != BRIDGE_MODE_VEPA && mode != BRIDGE_MODE_VEB)
4922                         return -EINVAL;
4923
4924                 status = be_cmd_set_hsw_config(adapter, 0, 0,
4925                                                adapter->if_handle,
4926                                                mode == BRIDGE_MODE_VEPA ?
4927                                                PORT_FWD_TYPE_VEPA :
4928                                                PORT_FWD_TYPE_VEB, 0);
4929                 if (status)
4930                         goto err;
4931
4932                 dev_info(&adapter->pdev->dev, "enabled switch mode: %s\n",
4933                          mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
4934
4935                 return status;
4936         }
4937 err:
4938         dev_err(&adapter->pdev->dev, "Failed to set switch mode %s\n",
4939                 mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
4940
4941         return status;
4942 }
4943
4944 static int be_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
4945                                  struct net_device *dev, u32 filter_mask,
4946                                  int nlflags)
4947 {
4948         struct be_adapter *adapter = netdev_priv(dev);
4949         int status = 0;
4950         u8 hsw_mode;
4951
4952         /* BE and Lancer chips support VEB mode only */
4953         if (BEx_chip(adapter) || lancer_chip(adapter)) {
4954                 /* VEB is disabled in non-SR-IOV profiles on BE3/Lancer */
4955                 if (!pci_sriov_get_totalvfs(adapter->pdev))
4956                         return 0;
4957                 hsw_mode = PORT_FWD_TYPE_VEB;
4958         } else {
4959                 status = be_cmd_get_hsw_config(adapter, NULL, 0,
4960                                                adapter->if_handle, &hsw_mode,
4961                                                NULL);
4962                 if (status)
4963                         return 0;
4964
4965                 if (hsw_mode == PORT_FWD_TYPE_PASSTHRU)
4966                         return 0;
4967         }
4968
4969         return ndo_dflt_bridge_getlink(skb, pid, seq, dev,
4970                                        hsw_mode == PORT_FWD_TYPE_VEPA ?
4971                                        BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB,
4972                                        0, 0, nlflags, filter_mask, NULL);
4973 }
4974
4975 static struct be_cmd_work *be_alloc_work(struct be_adapter *adapter,
4976                                          void (*func)(struct work_struct *))
4977 {
4978         struct be_cmd_work *work;
4979
4980         work = kzalloc(sizeof(*work), GFP_ATOMIC);
4981         if (!work) {
4982                 dev_err(&adapter->pdev->dev,
4983                         "be_work memory allocation failed\n");
4984                 return NULL;
4985         }
4986
4987         INIT_WORK(&work->work, func);
4988         work->adapter = adapter;
4989         return work;
4990 }
4991
4992 /* VxLAN offload Notes:
4993  *
4994  * The stack defines tunnel offload flags (hw_enc_features) for IP and doesn't
4995  * distinguish various types of transports (VxLAN, GRE, NVGRE ..). So, offload
4996  * is expected to work across all types of IP tunnels once exported. Skyhawk
4997  * supports offloads for either VxLAN or NVGRE, exclusively. So we export VxLAN
4998  * offloads in hw_enc_features only when a VxLAN port is added. If other (non
4999  * VxLAN) tunnels are configured while VxLAN offloads are enabled, offloads for
5000  * those other tunnels are unexported on the fly through ndo_features_check().
5001  *
5002  * Skyhawk supports VxLAN offloads only for one UDP dport. So, if the stack
5003  * adds more than one port, disable offloads and don't re-enable them again
5004  * until after all the tunnels are removed.
5005  */
5006 static void be_work_add_vxlan_port(struct work_struct *work)
5007 {
5008         struct be_cmd_work *cmd_work =
5009                                 container_of(work, struct be_cmd_work, work);
5010         struct be_adapter *adapter = cmd_work->adapter;
5011         struct net_device *netdev = adapter->netdev;
5012         struct device *dev = &adapter->pdev->dev;
5013         __be16 port = cmd_work->info.vxlan_port;
5014         int status;
5015
5016         if (adapter->vxlan_port == port && adapter->vxlan_port_count) {
5017                 adapter->vxlan_port_aliases++;
5018                 goto done;
5019         }
5020
5021         if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS) {
5022                 dev_info(dev,
5023                          "Only one UDP port supported for VxLAN offloads\n");
5024                 dev_info(dev, "Disabling VxLAN offloads\n");
5025                 adapter->vxlan_port_count++;
5026                 goto err;
5027         }
5028
5029         if (adapter->vxlan_port_count++ >= 1)
5030                 goto done;
5031
5032         status = be_cmd_manage_iface(adapter, adapter->if_handle,
5033                                      OP_CONVERT_NORMAL_TO_TUNNEL);
5034         if (status) {
5035                 dev_warn(dev, "Failed to convert normal interface to tunnel\n");
5036                 goto err;
5037         }
5038
5039         status = be_cmd_set_vxlan_port(adapter, port);
5040         if (status) {
5041                 dev_warn(dev, "Failed to add VxLAN port\n");
5042                 goto err;
5043         }
5044         adapter->flags |= BE_FLAGS_VXLAN_OFFLOADS;
5045         adapter->vxlan_port = port;
5046
5047         netdev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
5048                                    NETIF_F_TSO | NETIF_F_TSO6 |
5049                                    NETIF_F_GSO_UDP_TUNNEL;
5050         netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL;
5051         netdev->features |= NETIF_F_GSO_UDP_TUNNEL;
5052
5053         dev_info(dev, "Enabled VxLAN offloads for UDP port %d\n",
5054                  be16_to_cpu(port));
5055         goto done;
5056 err:
5057         be_disable_vxlan_offloads(adapter);
5058 done:
5059         kfree(cmd_work);
5060 }
5061
5062 static void be_work_del_vxlan_port(struct work_struct *work)
5063 {
5064         struct be_cmd_work *cmd_work =
5065                                 container_of(work, struct be_cmd_work, work);
5066         struct be_adapter *adapter = cmd_work->adapter;
5067         __be16 port = cmd_work->info.vxlan_port;
5068
5069         if (adapter->vxlan_port != port)
5070                 goto done;
5071
5072         if (adapter->vxlan_port_aliases) {
5073                 adapter->vxlan_port_aliases--;
5074                 goto out;
5075         }
5076
5077         be_disable_vxlan_offloads(adapter);
5078
5079         dev_info(&adapter->pdev->dev,
5080                  "Disabled VxLAN offloads for UDP port %d\n",
5081                  be16_to_cpu(port));
5082 done:
5083         adapter->vxlan_port_count--;
5084 out:
5085         kfree(cmd_work);
5086 }
5087
5088 static void be_cfg_vxlan_port(struct net_device *netdev,
5089                               struct udp_tunnel_info *ti,
5090                               void (*func)(struct work_struct *))
5091 {
5092         struct be_adapter *adapter = netdev_priv(netdev);
5093         struct be_cmd_work *cmd_work;
5094
5095         if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
5096                 return;
5097
5098         if (lancer_chip(adapter) || BEx_chip(adapter) || be_is_mc(adapter))
5099                 return;
5100
5101         cmd_work = be_alloc_work(adapter, func);
5102         if (cmd_work) {
5103                 cmd_work->info.vxlan_port = ti->port;
5104                 queue_work(be_wq, &cmd_work->work);
5105         }
5106 }
5107
5108 static void be_del_vxlan_port(struct net_device *netdev,
5109                               struct udp_tunnel_info *ti)
5110 {
5111         be_cfg_vxlan_port(netdev, ti, be_work_del_vxlan_port);
5112 }
5113
5114 static void be_add_vxlan_port(struct net_device *netdev,
5115                               struct udp_tunnel_info *ti)
5116 {
5117         be_cfg_vxlan_port(netdev, ti, be_work_add_vxlan_port);
5118 }
5119
5120 static netdev_features_t be_features_check(struct sk_buff *skb,
5121                                            struct net_device *dev,
5122                                            netdev_features_t features)
5123 {
5124         struct be_adapter *adapter = netdev_priv(dev);
5125         u8 l4_hdr = 0;
5126
5127         /* The code below restricts offload features for some tunneled packets.
5128          * Offload features for normal (non tunnel) packets are unchanged.
5129          */
5130         if (!skb->encapsulation ||
5131             !(adapter->flags & BE_FLAGS_VXLAN_OFFLOADS))
5132                 return features;
5133
5134         /* It's an encapsulated packet and VxLAN offloads are enabled. We
5135          * should disable tunnel offload features if it's not a VxLAN packet,
5136          * as tunnel offloads have been enabled only for VxLAN. This is done to
5137          * allow other tunneled traffic like GRE work fine while VxLAN
5138          * offloads are configured in Skyhawk-R.
5139          */
5140         switch (vlan_get_protocol(skb)) {
5141         case htons(ETH_P_IP):
5142                 l4_hdr = ip_hdr(skb)->protocol;
5143                 break;
5144         case htons(ETH_P_IPV6):
5145                 l4_hdr = ipv6_hdr(skb)->nexthdr;
5146                 break;
5147         default:
5148                 return features;
5149         }
5150
5151         if (l4_hdr != IPPROTO_UDP ||
5152             skb->inner_protocol_type != ENCAP_TYPE_ETHER ||
5153             skb->inner_protocol != htons(ETH_P_TEB) ||
5154             skb_inner_mac_header(skb) - skb_transport_header(skb) !=
5155                 sizeof(struct udphdr) + sizeof(struct vxlanhdr) ||
5156             !adapter->vxlan_port ||
5157             udp_hdr(skb)->dest != adapter->vxlan_port)
5158                 return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
5159
5160         return features;
5161 }
5162
5163 static int be_get_phys_port_id(struct net_device *dev,
5164                                struct netdev_phys_item_id *ppid)
5165 {
5166         int i, id_len = CNTL_SERIAL_NUM_WORDS * CNTL_SERIAL_NUM_WORD_SZ + 1;
5167         struct be_adapter *adapter = netdev_priv(dev);
5168         u8 *id;
5169
5170         if (MAX_PHYS_ITEM_ID_LEN < id_len)
5171                 return -ENOSPC;
5172
5173         ppid->id[0] = adapter->hba_port_num + 1;
5174         id = &ppid->id[1];
5175         for (i = CNTL_SERIAL_NUM_WORDS - 1; i >= 0;
5176              i--, id += CNTL_SERIAL_NUM_WORD_SZ)
5177                 memcpy(id, &adapter->serial_num[i], CNTL_SERIAL_NUM_WORD_SZ);
5178
5179         ppid->id_len = id_len;
5180
5181         return 0;
5182 }
5183
5184 static void be_set_rx_mode(struct net_device *dev)
5185 {
5186         struct be_adapter *adapter = netdev_priv(dev);
5187         struct be_cmd_work *work;
5188
5189         work = be_alloc_work(adapter, be_work_set_rx_mode);
5190         if (work)
5191                 queue_work(be_wq, &work->work);
5192 }
5193
5194 static const struct net_device_ops be_netdev_ops = {
5195         .ndo_open               = be_open,
5196         .ndo_stop               = be_close,
5197         .ndo_start_xmit         = be_xmit,
5198         .ndo_set_rx_mode        = be_set_rx_mode,
5199         .ndo_set_mac_address    = be_mac_addr_set,
5200         .ndo_get_stats64        = be_get_stats64,
5201         .ndo_validate_addr      = eth_validate_addr,
5202         .ndo_vlan_rx_add_vid    = be_vlan_add_vid,
5203         .ndo_vlan_rx_kill_vid   = be_vlan_rem_vid,
5204         .ndo_set_vf_mac         = be_set_vf_mac,
5205         .ndo_set_vf_vlan        = be_set_vf_vlan,
5206         .ndo_set_vf_rate        = be_set_vf_tx_rate,
5207         .ndo_get_vf_config      = be_get_vf_config,
5208         .ndo_set_vf_link_state  = be_set_vf_link_state,
5209         .ndo_set_vf_spoofchk    = be_set_vf_spoofchk,
5210 #ifdef CONFIG_NET_POLL_CONTROLLER
5211         .ndo_poll_controller    = be_netpoll,
5212 #endif
5213         .ndo_bridge_setlink     = be_ndo_bridge_setlink,
5214         .ndo_bridge_getlink     = be_ndo_bridge_getlink,
5215 #ifdef CONFIG_NET_RX_BUSY_POLL
5216         .ndo_busy_poll          = be_busy_poll,
5217 #endif
5218         .ndo_udp_tunnel_add     = be_add_vxlan_port,
5219         .ndo_udp_tunnel_del     = be_del_vxlan_port,
5220         .ndo_features_check     = be_features_check,
5221         .ndo_get_phys_port_id   = be_get_phys_port_id,
5222 };
5223
5224 static void be_netdev_init(struct net_device *netdev)
5225 {
5226         struct be_adapter *adapter = netdev_priv(netdev);
5227
5228         netdev->hw_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5229                 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM |
5230                 NETIF_F_HW_VLAN_CTAG_TX;
5231         if ((be_if_cap_flags(adapter) & BE_IF_FLAGS_RSS))
5232                 netdev->hw_features |= NETIF_F_RXHASH;
5233
5234         netdev->features |= netdev->hw_features |
5235                 NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER;
5236
5237         netdev->vlan_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5238                 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
5239
5240         netdev->priv_flags |= IFF_UNICAST_FLT;
5241
5242         netdev->flags |= IFF_MULTICAST;
5243
5244         netif_set_gso_max_size(netdev, BE_MAX_GSO_SIZE - ETH_HLEN);
5245
5246         netdev->netdev_ops = &be_netdev_ops;
5247
5248         netdev->ethtool_ops = &be_ethtool_ops;
5249
5250         /* MTU range: 256 - 9000 */
5251         netdev->min_mtu = BE_MIN_MTU;
5252         netdev->max_mtu = BE_MAX_MTU;
5253 }
5254
5255 static void be_cleanup(struct be_adapter *adapter)
5256 {
5257         struct net_device *netdev = adapter->netdev;
5258
5259         rtnl_lock();
5260         netif_device_detach(netdev);
5261         if (netif_running(netdev))
5262                 be_close(netdev);
5263         rtnl_unlock();
5264
5265         be_clear(adapter);
5266 }
5267
5268 static int be_resume(struct be_adapter *adapter)
5269 {
5270         struct net_device *netdev = adapter->netdev;
5271         int status;
5272
5273         status = be_setup(adapter);
5274         if (status)
5275                 return status;
5276
5277         rtnl_lock();
5278         if (netif_running(netdev))
5279                 status = be_open(netdev);
5280         rtnl_unlock();
5281
5282         if (status)
5283                 return status;
5284
5285         netif_device_attach(netdev);
5286
5287         return 0;
5288 }
5289
5290 static void be_soft_reset(struct be_adapter *adapter)
5291 {
5292         u32 val;
5293
5294         dev_info(&adapter->pdev->dev, "Initiating chip soft reset\n");
5295         val = ioread32(adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5296         val |= SLIPORT_SOFTRESET_SR_MASK;
5297         iowrite32(val, adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5298 }
5299
5300 static bool be_err_is_recoverable(struct be_adapter *adapter)
5301 {
5302         struct be_error_recovery *err_rec = &adapter->error_recovery;
5303         unsigned long initial_idle_time =
5304                 msecs_to_jiffies(ERR_RECOVERY_IDLE_TIME);
5305         unsigned long recovery_interval =
5306                 msecs_to_jiffies(ERR_RECOVERY_INTERVAL);
5307         u16 ue_err_code;
5308         u32 val;
5309
5310         val = be_POST_stage_get(adapter);
5311         if ((val & POST_STAGE_RECOVERABLE_ERR) != POST_STAGE_RECOVERABLE_ERR)
5312                 return false;
5313         ue_err_code = val & POST_ERR_RECOVERY_CODE_MASK;
5314         if (ue_err_code == 0)
5315                 return false;
5316
5317         dev_err(&adapter->pdev->dev, "Recoverable HW error code: 0x%x\n",
5318                 ue_err_code);
5319
5320         if (jiffies - err_rec->probe_time <= initial_idle_time) {
5321                 dev_err(&adapter->pdev->dev,
5322                         "Cannot recover within %lu sec from driver load\n",
5323                         jiffies_to_msecs(initial_idle_time) / MSEC_PER_SEC);
5324                 return false;
5325         }
5326
5327         if (err_rec->last_recovery_time &&
5328             (jiffies - err_rec->last_recovery_time <= recovery_interval)) {
5329                 dev_err(&adapter->pdev->dev,
5330                         "Cannot recover within %lu sec from last recovery\n",
5331                         jiffies_to_msecs(recovery_interval) / MSEC_PER_SEC);
5332                 return false;
5333         }
5334
5335         if (ue_err_code == err_rec->last_err_code) {
5336                 dev_err(&adapter->pdev->dev,
5337                         "Cannot recover from a consecutive TPE error\n");
5338                 return false;
5339         }
5340
5341         err_rec->last_recovery_time = jiffies;
5342         err_rec->last_err_code = ue_err_code;
5343         return true;
5344 }
5345
5346 static int be_tpe_recover(struct be_adapter *adapter)
5347 {
5348         struct be_error_recovery *err_rec = &adapter->error_recovery;
5349         int status = -EAGAIN;
5350         u32 val;
5351
5352         switch (err_rec->recovery_state) {
5353         case ERR_RECOVERY_ST_NONE:
5354                 err_rec->recovery_state = ERR_RECOVERY_ST_DETECT;
5355                 err_rec->resched_delay = ERR_RECOVERY_UE_DETECT_DURATION;
5356                 break;
5357
5358         case ERR_RECOVERY_ST_DETECT:
5359                 val = be_POST_stage_get(adapter);
5360                 if ((val & POST_STAGE_RECOVERABLE_ERR) !=
5361                     POST_STAGE_RECOVERABLE_ERR) {
5362                         dev_err(&adapter->pdev->dev,
5363                                 "Unrecoverable HW error detected: 0x%x\n", val);
5364                         status = -EINVAL;
5365                         err_rec->resched_delay = 0;
5366                         break;
5367                 }
5368
5369                 dev_err(&adapter->pdev->dev, "Recoverable HW error detected\n");
5370
5371                 /* Only PF0 initiates Chip Soft Reset. But PF0 must wait UE2SR
5372                  * milliseconds before it checks for final error status in
5373                  * SLIPORT_SEMAPHORE to determine if recovery criteria is met.
5374                  * If it does, then PF0 initiates a Soft Reset.
5375                  */
5376                 if (adapter->pf_num == 0) {
5377                         err_rec->recovery_state = ERR_RECOVERY_ST_RESET;
5378                         err_rec->resched_delay = err_rec->ue_to_reset_time -
5379                                         ERR_RECOVERY_UE_DETECT_DURATION;
5380                         break;
5381                 }
5382
5383                 err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5384                 err_rec->resched_delay = err_rec->ue_to_poll_time -
5385                                         ERR_RECOVERY_UE_DETECT_DURATION;
5386                 break;
5387
5388         case ERR_RECOVERY_ST_RESET:
5389                 if (!be_err_is_recoverable(adapter)) {
5390                         dev_err(&adapter->pdev->dev,
5391                                 "Failed to meet recovery criteria\n");
5392                         status = -EIO;
5393                         err_rec->resched_delay = 0;
5394                         break;
5395                 }
5396                 be_soft_reset(adapter);
5397                 err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5398                 err_rec->resched_delay = err_rec->ue_to_poll_time -
5399                                         err_rec->ue_to_reset_time;
5400                 break;
5401
5402         case ERR_RECOVERY_ST_PRE_POLL:
5403                 err_rec->recovery_state = ERR_RECOVERY_ST_REINIT;
5404                 err_rec->resched_delay = 0;
5405                 status = 0;                     /* done */
5406                 break;
5407
5408         default:
5409                 status = -EINVAL;
5410                 err_rec->resched_delay = 0;
5411                 break;
5412         }
5413
5414         return status;
5415 }
5416
5417 static int be_err_recover(struct be_adapter *adapter)
5418 {
5419         int status;
5420
5421         if (!lancer_chip(adapter)) {
5422                 if (!adapter->error_recovery.recovery_supported ||
5423                     adapter->priv_flags & BE_DISABLE_TPE_RECOVERY)
5424                         return -EIO;
5425                 status = be_tpe_recover(adapter);
5426                 if (status)
5427                         goto err;
5428         }
5429
5430         /* Wait for adapter to reach quiescent state before
5431          * destroying queues
5432          */
5433         status = be_fw_wait_ready(adapter);
5434         if (status)
5435                 goto err;
5436
5437         adapter->flags |= BE_FLAGS_TRY_RECOVERY;
5438
5439         be_cleanup(adapter);
5440
5441         status = be_resume(adapter);
5442         if (status)
5443                 goto err;
5444
5445         adapter->flags &= ~BE_FLAGS_TRY_RECOVERY;
5446
5447 err:
5448         return status;
5449 }
5450
5451 static void be_err_detection_task(struct work_struct *work)
5452 {
5453         struct be_error_recovery *err_rec =
5454                         container_of(work, struct be_error_recovery,
5455                                      err_detection_work.work);
5456         struct be_adapter *adapter =
5457                         container_of(err_rec, struct be_adapter,
5458                                      error_recovery);
5459         u32 resched_delay = ERR_RECOVERY_DETECTION_DELAY;
5460         struct device *dev = &adapter->pdev->dev;
5461         int recovery_status;
5462
5463         be_detect_error(adapter);
5464         if (!be_check_error(adapter, BE_ERROR_HW))
5465                 goto reschedule_task;
5466
5467         recovery_status = be_err_recover(adapter);
5468         if (!recovery_status) {
5469                 err_rec->recovery_retries = 0;
5470                 err_rec->recovery_state = ERR_RECOVERY_ST_NONE;
5471                 dev_info(dev, "Adapter recovery successful\n");
5472                 goto reschedule_task;
5473         } else if (!lancer_chip(adapter) && err_rec->resched_delay) {
5474                 /* BEx/SH recovery state machine */
5475                 if (adapter->pf_num == 0 &&
5476                     err_rec->recovery_state > ERR_RECOVERY_ST_DETECT)
5477                         dev_err(&adapter->pdev->dev,
5478                                 "Adapter recovery in progress\n");
5479                 resched_delay = err_rec->resched_delay;
5480                 goto reschedule_task;
5481         } else if (lancer_chip(adapter) && be_virtfn(adapter)) {
5482                 /* For VFs, check if PF have allocated resources
5483                  * every second.
5484                  */
5485                 dev_err(dev, "Re-trying adapter recovery\n");
5486                 goto reschedule_task;
5487         } else if (lancer_chip(adapter) && err_rec->recovery_retries++ <
5488                    ERR_RECOVERY_MAX_RETRY_COUNT) {
5489                 /* In case of another error during recovery, it takes 30 sec
5490                  * for adapter to come out of error. Retry error recovery after
5491                  * this time interval.
5492                  */
5493                 dev_err(&adapter->pdev->dev, "Re-trying adapter recovery\n");
5494                 resched_delay = ERR_RECOVERY_RETRY_DELAY;
5495                 goto reschedule_task;
5496         } else {
5497                 dev_err(dev, "Adapter recovery failed\n");
5498                 dev_err(dev, "Please reboot server to recover\n");
5499         }
5500
5501         return;
5502
5503 reschedule_task:
5504         be_schedule_err_detection(adapter, resched_delay);
5505 }
5506
5507 static void be_log_sfp_info(struct be_adapter *adapter)
5508 {
5509         int status;
5510
5511         status = be_cmd_query_sfp_info(adapter);
5512         if (!status) {
5513                 dev_err(&adapter->pdev->dev,
5514                         "Port %c: %s Vendor: %s part no: %s",
5515                         adapter->port_name,
5516                         be_misconfig_evt_port_state[adapter->phy_state],
5517                         adapter->phy.vendor_name,
5518                         adapter->phy.vendor_pn);
5519         }
5520         adapter->flags &= ~BE_FLAGS_PHY_MISCONFIGURED;
5521 }
5522
5523 static void be_worker(struct work_struct *work)
5524 {
5525         struct be_adapter *adapter =
5526                 container_of(work, struct be_adapter, work.work);
5527         struct be_rx_obj *rxo;
5528         int i;
5529
5530         if (be_physfn(adapter) &&
5531             MODULO(adapter->work_counter, adapter->be_get_temp_freq) == 0)
5532                 be_cmd_get_die_temperature(adapter);
5533
5534         /* when interrupts are not yet enabled, just reap any pending
5535          * mcc completions
5536          */
5537         if (!netif_running(adapter->netdev)) {
5538                 local_bh_disable();
5539                 be_process_mcc(adapter);
5540                 local_bh_enable();
5541                 goto reschedule;
5542         }
5543
5544         if (!adapter->stats_cmd_sent) {
5545                 if (lancer_chip(adapter))
5546                         lancer_cmd_get_pport_stats(adapter,
5547                                                    &adapter->stats_cmd);
5548                 else
5549                         be_cmd_get_stats(adapter, &adapter->stats_cmd);
5550         }
5551
5552         for_all_rx_queues(adapter, rxo, i) {
5553                 /* Replenish RX-queues starved due to memory
5554                  * allocation failures.
5555                  */
5556                 if (rxo->rx_post_starved)
5557                         be_post_rx_frags(rxo, GFP_KERNEL, MAX_RX_POST);
5558         }
5559
5560         /* EQ-delay update for Skyhawk is done while notifying EQ */
5561         if (!skyhawk_chip(adapter))
5562                 be_eqd_update(adapter, false);
5563
5564         if (adapter->flags & BE_FLAGS_PHY_MISCONFIGURED)
5565                 be_log_sfp_info(adapter);
5566
5567 reschedule:
5568         adapter->work_counter++;
5569         queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
5570 }
5571
5572 static void be_unmap_pci_bars(struct be_adapter *adapter)
5573 {
5574         if (adapter->csr)
5575                 pci_iounmap(adapter->pdev, adapter->csr);
5576         if (adapter->db)
5577                 pci_iounmap(adapter->pdev, adapter->db);
5578         if (adapter->pcicfg && adapter->pcicfg_mapped)
5579                 pci_iounmap(adapter->pdev, adapter->pcicfg);
5580 }
5581
5582 static int db_bar(struct be_adapter *adapter)
5583 {
5584         if (lancer_chip(adapter) || be_virtfn(adapter))
5585                 return 0;
5586         else
5587                 return 4;
5588 }
5589
5590 static int be_roce_map_pci_bars(struct be_adapter *adapter)
5591 {
5592         if (skyhawk_chip(adapter)) {
5593                 adapter->roce_db.size = 4096;
5594                 adapter->roce_db.io_addr = pci_resource_start(adapter->pdev,
5595                                                               db_bar(adapter));
5596                 adapter->roce_db.total_size = pci_resource_len(adapter->pdev,
5597                                                                db_bar(adapter));
5598         }
5599         return 0;
5600 }
5601
5602 static int be_map_pci_bars(struct be_adapter *adapter)
5603 {
5604         struct pci_dev *pdev = adapter->pdev;
5605         u8 __iomem *addr;
5606         u32 sli_intf;
5607
5608         pci_read_config_dword(adapter->pdev, SLI_INTF_REG_OFFSET, &sli_intf);
5609         adapter->sli_family = (sli_intf & SLI_INTF_FAMILY_MASK) >>
5610                                 SLI_INTF_FAMILY_SHIFT;
5611         adapter->virtfn = (sli_intf & SLI_INTF_FT_MASK) ? 1 : 0;
5612
5613         if (BEx_chip(adapter) && be_physfn(adapter)) {
5614                 adapter->csr = pci_iomap(pdev, 2, 0);
5615                 if (!adapter->csr)
5616                         return -ENOMEM;
5617         }
5618
5619         addr = pci_iomap(pdev, db_bar(adapter), 0);
5620         if (!addr)
5621                 goto pci_map_err;
5622         adapter->db = addr;
5623
5624         if (skyhawk_chip(adapter) || BEx_chip(adapter)) {
5625                 if (be_physfn(adapter)) {
5626                         /* PCICFG is the 2nd BAR in BE2 */
5627                         addr = pci_iomap(pdev, BE2_chip(adapter) ? 1 : 0, 0);
5628                         if (!addr)
5629                                 goto pci_map_err;
5630                         adapter->pcicfg = addr;
5631                         adapter->pcicfg_mapped = true;
5632                 } else {
5633                         adapter->pcicfg = adapter->db + SRIOV_VF_PCICFG_OFFSET;
5634                         adapter->pcicfg_mapped = false;
5635                 }
5636         }
5637
5638         be_roce_map_pci_bars(adapter);
5639         return 0;
5640
5641 pci_map_err:
5642         dev_err(&pdev->dev, "Error in mapping PCI BARs\n");
5643         be_unmap_pci_bars(adapter);
5644         return -ENOMEM;
5645 }
5646
5647 static void be_drv_cleanup(struct be_adapter *adapter)
5648 {
5649         struct be_dma_mem *mem = &adapter->mbox_mem_alloced;
5650         struct device *dev = &adapter->pdev->dev;
5651
5652         if (mem->va)
5653                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5654
5655         mem = &adapter->rx_filter;
5656         if (mem->va)
5657                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5658
5659         mem = &adapter->stats_cmd;
5660         if (mem->va)
5661                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5662 }
5663
5664 /* Allocate and initialize various fields in be_adapter struct */
5665 static int be_drv_init(struct be_adapter *adapter)
5666 {
5667         struct be_dma_mem *mbox_mem_alloc = &adapter->mbox_mem_alloced;
5668         struct be_dma_mem *mbox_mem_align = &adapter->mbox_mem;
5669         struct be_dma_mem *rx_filter = &adapter->rx_filter;
5670         struct be_dma_mem *stats_cmd = &adapter->stats_cmd;
5671         struct device *dev = &adapter->pdev->dev;
5672         int status = 0;
5673
5674         mbox_mem_alloc->size = sizeof(struct be_mcc_mailbox) + 16;
5675         mbox_mem_alloc->va = dma_zalloc_coherent(dev, mbox_mem_alloc->size,
5676                                                  &mbox_mem_alloc->dma,
5677                                                  GFP_KERNEL);
5678         if (!mbox_mem_alloc->va)
5679                 return -ENOMEM;
5680
5681         mbox_mem_align->size = sizeof(struct be_mcc_mailbox);
5682         mbox_mem_align->va = PTR_ALIGN(mbox_mem_alloc->va, 16);
5683         mbox_mem_align->dma = PTR_ALIGN(mbox_mem_alloc->dma, 16);
5684
5685         rx_filter->size = sizeof(struct be_cmd_req_rx_filter);
5686         rx_filter->va = dma_zalloc_coherent(dev, rx_filter->size,
5687                                             &rx_filter->dma, GFP_KERNEL);
5688         if (!rx_filter->va) {
5689                 status = -ENOMEM;
5690                 goto free_mbox;
5691         }
5692
5693         if (lancer_chip(adapter))
5694                 stats_cmd->size = sizeof(struct lancer_cmd_req_pport_stats);
5695         else if (BE2_chip(adapter))
5696                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v0);
5697         else if (BE3_chip(adapter))
5698                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v1);
5699         else
5700                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v2);
5701         stats_cmd->va = dma_zalloc_coherent(dev, stats_cmd->size,
5702                                             &stats_cmd->dma, GFP_KERNEL);
5703         if (!stats_cmd->va) {
5704                 status = -ENOMEM;
5705                 goto free_rx_filter;
5706         }
5707
5708         mutex_init(&adapter->mbox_lock);
5709         mutex_init(&adapter->mcc_lock);
5710         mutex_init(&adapter->rx_filter_lock);
5711         spin_lock_init(&adapter->mcc_cq_lock);
5712         init_completion(&adapter->et_cmd_compl);
5713
5714         pci_save_state(adapter->pdev);
5715
5716         INIT_DELAYED_WORK(&adapter->work, be_worker);
5717
5718         adapter->error_recovery.recovery_state = ERR_RECOVERY_ST_NONE;
5719         adapter->error_recovery.resched_delay = 0;
5720         INIT_DELAYED_WORK(&adapter->error_recovery.err_detection_work,
5721                           be_err_detection_task);
5722
5723         adapter->rx_fc = true;
5724         adapter->tx_fc = true;
5725
5726         /* Must be a power of 2 or else MODULO will BUG_ON */
5727         adapter->be_get_temp_freq = 64;
5728
5729         return 0;
5730
5731 free_rx_filter:
5732         dma_free_coherent(dev, rx_filter->size, rx_filter->va, rx_filter->dma);
5733 free_mbox:
5734         dma_free_coherent(dev, mbox_mem_alloc->size, mbox_mem_alloc->va,
5735                           mbox_mem_alloc->dma);
5736         return status;
5737 }
5738
5739 static void be_remove(struct pci_dev *pdev)
5740 {
5741         struct be_adapter *adapter = pci_get_drvdata(pdev);
5742
5743         if (!adapter)
5744                 return;
5745
5746         be_roce_dev_remove(adapter);
5747         be_intr_set(adapter, false);
5748
5749         be_cancel_err_detection(adapter);
5750
5751         unregister_netdev(adapter->netdev);
5752
5753         be_clear(adapter);
5754
5755         if (!pci_vfs_assigned(adapter->pdev))
5756                 be_cmd_reset_function(adapter);
5757
5758         /* tell fw we're done with firing cmds */
5759         be_cmd_fw_clean(adapter);
5760
5761         be_unmap_pci_bars(adapter);
5762         be_drv_cleanup(adapter);
5763
5764         pci_disable_pcie_error_reporting(pdev);
5765
5766         pci_release_regions(pdev);
5767         pci_disable_device(pdev);
5768
5769         free_netdev(adapter->netdev);
5770 }
5771
5772 static ssize_t be_hwmon_show_temp(struct device *dev,
5773                                   struct device_attribute *dev_attr,
5774                                   char *buf)
5775 {
5776         struct be_adapter *adapter = dev_get_drvdata(dev);
5777
5778         /* Unit: millidegree Celsius */
5779         if (adapter->hwmon_info.be_on_die_temp == BE_INVALID_DIE_TEMP)
5780                 return -EIO;
5781         else
5782                 return sprintf(buf, "%u\n",
5783                                adapter->hwmon_info.be_on_die_temp * 1000);
5784 }
5785
5786 static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO,
5787                           be_hwmon_show_temp, NULL, 1);
5788
5789 static struct attribute *be_hwmon_attrs[] = {
5790         &sensor_dev_attr_temp1_input.dev_attr.attr,
5791         NULL
5792 };
5793
5794 ATTRIBUTE_GROUPS(be_hwmon);
5795
5796 static char *mc_name(struct be_adapter *adapter)
5797 {
5798         char *str = ""; /* default */
5799
5800         switch (adapter->mc_type) {
5801         case UMC:
5802                 str = "UMC";
5803                 break;
5804         case FLEX10:
5805                 str = "FLEX10";
5806                 break;
5807         case vNIC1:
5808                 str = "vNIC-1";
5809                 break;
5810         case nPAR:
5811                 str = "nPAR";
5812                 break;
5813         case UFP:
5814                 str = "UFP";
5815                 break;
5816         case vNIC2:
5817                 str = "vNIC-2";
5818                 break;
5819         default:
5820                 str = "";
5821         }
5822
5823         return str;
5824 }
5825
5826 static inline char *func_name(struct be_adapter *adapter)
5827 {
5828         return be_physfn(adapter) ? "PF" : "VF";
5829 }
5830
5831 static inline char *nic_name(struct pci_dev *pdev)
5832 {
5833         switch (pdev->device) {
5834         case OC_DEVICE_ID1:
5835                 return OC_NAME;
5836         case OC_DEVICE_ID2:
5837                 return OC_NAME_BE;
5838         case OC_DEVICE_ID3:
5839         case OC_DEVICE_ID4:
5840                 return OC_NAME_LANCER;
5841         case BE_DEVICE_ID2:
5842                 return BE3_NAME;
5843         case OC_DEVICE_ID5:
5844         case OC_DEVICE_ID6:
5845                 return OC_NAME_SH;
5846         default:
5847                 return BE_NAME;
5848         }
5849 }
5850
5851 static int be_probe(struct pci_dev *pdev, const struct pci_device_id *pdev_id)
5852 {
5853         struct be_adapter *adapter;
5854         struct net_device *netdev;
5855         int status = 0;
5856
5857         dev_info(&pdev->dev, "%s version is %s\n", DRV_NAME, DRV_VER);
5858
5859         status = pci_enable_device(pdev);
5860         if (status)
5861                 goto do_none;
5862
5863         status = pci_request_regions(pdev, DRV_NAME);
5864         if (status)
5865                 goto disable_dev;
5866         pci_set_master(pdev);
5867
5868         netdev = alloc_etherdev_mqs(sizeof(*adapter), MAX_TX_QS, MAX_RX_QS);
5869         if (!netdev) {
5870                 status = -ENOMEM;
5871                 goto rel_reg;
5872         }
5873         adapter = netdev_priv(netdev);
5874         adapter->pdev = pdev;
5875         pci_set_drvdata(pdev, adapter);
5876         adapter->netdev = netdev;
5877         SET_NETDEV_DEV(netdev, &pdev->dev);
5878
5879         status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
5880         if (!status) {
5881                 netdev->features |= NETIF_F_HIGHDMA;
5882         } else {
5883                 status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
5884                 if (status) {
5885                         dev_err(&pdev->dev, "Could not set PCI DMA Mask\n");
5886                         goto free_netdev;
5887                 }
5888         }
5889
5890         status = pci_enable_pcie_error_reporting(pdev);
5891         if (!status)
5892                 dev_info(&pdev->dev, "PCIe error reporting enabled\n");
5893
5894         status = be_map_pci_bars(adapter);
5895         if (status)
5896                 goto free_netdev;
5897
5898         status = be_drv_init(adapter);
5899         if (status)
5900                 goto unmap_bars;
5901
5902         status = be_setup(adapter);
5903         if (status)
5904                 goto drv_cleanup;
5905
5906         be_netdev_init(netdev);
5907         status = register_netdev(netdev);
5908         if (status != 0)
5909                 goto unsetup;
5910
5911         be_roce_dev_add(adapter);
5912
5913         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5914         adapter->error_recovery.probe_time = jiffies;
5915
5916         /* On Die temperature not supported for VF. */
5917         if (be_physfn(adapter) && IS_ENABLED(CONFIG_BE2NET_HWMON)) {
5918                 adapter->hwmon_info.hwmon_dev =
5919                         devm_hwmon_device_register_with_groups(&pdev->dev,
5920                                                                DRV_NAME,
5921                                                                adapter,
5922                                                                be_hwmon_groups);
5923                 adapter->hwmon_info.be_on_die_temp = BE_INVALID_DIE_TEMP;
5924         }
5925
5926         dev_info(&pdev->dev, "%s: %s %s port %c\n", nic_name(pdev),
5927                  func_name(adapter), mc_name(adapter), adapter->port_name);
5928
5929         return 0;
5930
5931 unsetup:
5932         be_clear(adapter);
5933 drv_cleanup:
5934         be_drv_cleanup(adapter);
5935 unmap_bars:
5936         be_unmap_pci_bars(adapter);
5937 free_netdev:
5938         free_netdev(netdev);
5939 rel_reg:
5940         pci_release_regions(pdev);
5941 disable_dev:
5942         pci_disable_device(pdev);
5943 do_none:
5944         dev_err(&pdev->dev, "%s initialization failed\n", nic_name(pdev));
5945         return status;
5946 }
5947
5948 static int be_suspend(struct pci_dev *pdev, pm_message_t state)
5949 {
5950         struct be_adapter *adapter = pci_get_drvdata(pdev);
5951
5952         be_intr_set(adapter, false);
5953         be_cancel_err_detection(adapter);
5954
5955         be_cleanup(adapter);
5956
5957         pci_save_state(pdev);
5958         pci_disable_device(pdev);
5959         pci_set_power_state(pdev, pci_choose_state(pdev, state));
5960         return 0;
5961 }
5962
5963 static int be_pci_resume(struct pci_dev *pdev)
5964 {
5965         struct be_adapter *adapter = pci_get_drvdata(pdev);
5966         int status = 0;
5967
5968         status = pci_enable_device(pdev);
5969         if (status)
5970                 return status;
5971
5972         pci_restore_state(pdev);
5973
5974         status = be_resume(adapter);
5975         if (status)
5976                 return status;
5977
5978         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5979
5980         return 0;
5981 }
5982
5983 /*
5984  * An FLR will stop BE from DMAing any data.
5985  */
5986 static void be_shutdown(struct pci_dev *pdev)
5987 {
5988         struct be_adapter *adapter = pci_get_drvdata(pdev);
5989
5990         if (!adapter)
5991                 return;
5992
5993         be_roce_dev_shutdown(adapter);
5994         cancel_delayed_work_sync(&adapter->work);
5995         be_cancel_err_detection(adapter);
5996
5997         netif_device_detach(adapter->netdev);
5998
5999         be_cmd_reset_function(adapter);
6000
6001         pci_disable_device(pdev);
6002 }
6003
6004 static pci_ers_result_t be_eeh_err_detected(struct pci_dev *pdev,
6005                                             pci_channel_state_t state)
6006 {
6007         struct be_adapter *adapter = pci_get_drvdata(pdev);
6008
6009         dev_err(&adapter->pdev->dev, "EEH error detected\n");
6010
6011         be_roce_dev_remove(adapter);
6012
6013         if (!be_check_error(adapter, BE_ERROR_EEH)) {
6014                 be_set_error(adapter, BE_ERROR_EEH);
6015
6016                 be_cancel_err_detection(adapter);
6017
6018                 be_cleanup(adapter);
6019         }
6020
6021         if (state == pci_channel_io_perm_failure)
6022                 return PCI_ERS_RESULT_DISCONNECT;
6023
6024         pci_disable_device(pdev);
6025
6026         /* The error could cause the FW to trigger a flash debug dump.
6027          * Resetting the card while flash dump is in progress
6028          * can cause it not to recover; wait for it to finish.
6029          * Wait only for first function as it is needed only once per
6030          * adapter.
6031          */
6032         if (pdev->devfn == 0)
6033                 ssleep(30);
6034
6035         return PCI_ERS_RESULT_NEED_RESET;
6036 }
6037
6038 static pci_ers_result_t be_eeh_reset(struct pci_dev *pdev)
6039 {
6040         struct be_adapter *adapter = pci_get_drvdata(pdev);
6041         int status;
6042
6043         dev_info(&adapter->pdev->dev, "EEH reset\n");
6044
6045         status = pci_enable_device(pdev);
6046         if (status)
6047                 return PCI_ERS_RESULT_DISCONNECT;
6048
6049         pci_set_master(pdev);
6050         pci_restore_state(pdev);
6051
6052         /* Check if card is ok and fw is ready */
6053         dev_info(&adapter->pdev->dev,
6054                  "Waiting for FW to be ready after EEH reset\n");
6055         status = be_fw_wait_ready(adapter);
6056         if (status)
6057                 return PCI_ERS_RESULT_DISCONNECT;
6058
6059         pci_cleanup_aer_uncorrect_error_status(pdev);
6060         be_clear_error(adapter, BE_CLEAR_ALL);
6061         return PCI_ERS_RESULT_RECOVERED;
6062 }
6063
6064 static void be_eeh_resume(struct pci_dev *pdev)
6065 {
6066         int status = 0;
6067         struct be_adapter *adapter = pci_get_drvdata(pdev);
6068
6069         dev_info(&adapter->pdev->dev, "EEH resume\n");
6070
6071         pci_save_state(pdev);
6072
6073         status = be_resume(adapter);
6074         if (status)
6075                 goto err;
6076
6077         be_roce_dev_add(adapter);
6078
6079         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
6080         return;
6081 err:
6082         dev_err(&adapter->pdev->dev, "EEH resume failed\n");
6083 }
6084
6085 static int be_pci_sriov_configure(struct pci_dev *pdev, int num_vfs)
6086 {
6087         struct be_adapter *adapter = pci_get_drvdata(pdev);
6088         struct be_resources vft_res = {0};
6089         int status;
6090
6091         if (!num_vfs)
6092                 be_vf_clear(adapter);
6093
6094         adapter->num_vfs = num_vfs;
6095
6096         if (adapter->num_vfs == 0 && pci_vfs_assigned(pdev)) {
6097                 dev_warn(&pdev->dev,
6098                          "Cannot disable VFs while they are assigned\n");
6099                 return -EBUSY;
6100         }
6101
6102         /* When the HW is in SRIOV capable configuration, the PF-pool resources
6103          * are equally distributed across the max-number of VFs. The user may
6104          * request only a subset of the max-vfs to be enabled.
6105          * Based on num_vfs, redistribute the resources across num_vfs so that
6106          * each VF will have access to more number of resources.
6107          * This facility is not available in BE3 FW.
6108          * Also, this is done by FW in Lancer chip.
6109          */
6110         if (skyhawk_chip(adapter) && !pci_num_vf(pdev)) {
6111                 be_calculate_vf_res(adapter, adapter->num_vfs,
6112                                     &vft_res);
6113                 status = be_cmd_set_sriov_config(adapter, adapter->pool_res,
6114                                                  adapter->num_vfs, &vft_res);
6115                 if (status)
6116                         dev_err(&pdev->dev,
6117                                 "Failed to optimize SR-IOV resources\n");
6118         }
6119
6120         status = be_get_resources(adapter);
6121         if (status)
6122                 return be_cmd_status(status);
6123
6124         /* Updating real_num_tx/rx_queues() requires rtnl_lock() */
6125         rtnl_lock();
6126         status = be_update_queues(adapter);
6127         rtnl_unlock();
6128         if (status)
6129                 return be_cmd_status(status);
6130
6131         if (adapter->num_vfs)
6132                 status = be_vf_setup(adapter);
6133
6134         if (!status)
6135                 return adapter->num_vfs;
6136
6137         return 0;
6138 }
6139
6140 static const struct pci_error_handlers be_eeh_handlers = {
6141         .error_detected = be_eeh_err_detected,
6142         .slot_reset = be_eeh_reset,
6143         .resume = be_eeh_resume,
6144 };
6145
6146 static struct pci_driver be_driver = {
6147         .name = DRV_NAME,
6148         .id_table = be_dev_ids,
6149         .probe = be_probe,
6150         .remove = be_remove,
6151         .suspend = be_suspend,
6152         .resume = be_pci_resume,
6153         .shutdown = be_shutdown,
6154         .sriov_configure = be_pci_sriov_configure,
6155         .err_handler = &be_eeh_handlers
6156 };
6157
6158 static int __init be_init_module(void)
6159 {
6160         int status;
6161
6162         if (rx_frag_size != 8192 && rx_frag_size != 4096 &&
6163             rx_frag_size != 2048) {
6164                 printk(KERN_WARNING DRV_NAME
6165                         " : Module param rx_frag_size must be 2048/4096/8192."
6166                         " Using 2048\n");
6167                 rx_frag_size = 2048;
6168         }
6169
6170         if (num_vfs > 0) {
6171                 pr_info(DRV_NAME " : Module param num_vfs is obsolete.");
6172                 pr_info(DRV_NAME " : Use sysfs method to enable VFs\n");
6173         }
6174
6175         be_wq = create_singlethread_workqueue("be_wq");
6176         if (!be_wq) {
6177                 pr_warn(DRV_NAME "workqueue creation failed\n");
6178                 return -1;
6179         }
6180
6181         be_err_recovery_workq =
6182                 create_singlethread_workqueue("be_err_recover");
6183         if (!be_err_recovery_workq)
6184                 pr_warn(DRV_NAME "Could not create error recovery workqueue\n");
6185
6186         status = pci_register_driver(&be_driver);
6187         if (status) {
6188                 destroy_workqueue(be_wq);
6189                 be_destroy_err_recovery_workq();
6190         }
6191         return status;
6192 }
6193 module_init(be_init_module);
6194
6195 static void __exit be_exit_module(void)
6196 {
6197         pci_unregister_driver(&be_driver);
6198
6199         be_destroy_err_recovery_workq();
6200
6201         if (be_wq)
6202                 destroy_workqueue(be_wq);
6203 }
6204 module_exit(be_exit_module);