]> asedeno.scripts.mit.edu Git - linux.git/blob - drivers/net/ethernet/emulex/benet/be_main.c
be2net: fix MAC addr setting on privileged BE3 VFs
[linux.git] / drivers / net / ethernet / emulex / benet / be_main.c
1 /*
2  * Copyright (C) 2005 - 2016 Broadcom
3  * All rights reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License version 2
7  * as published by the Free Software Foundation.  The full GNU General
8  * Public License is included in this distribution in the file called COPYING.
9  *
10  * Contact Information:
11  * linux-drivers@emulex.com
12  *
13  * Emulex
14  * 3333 Susan Street
15  * Costa Mesa, CA 92626
16  */
17
18 #include <linux/prefetch.h>
19 #include <linux/module.h>
20 #include "be.h"
21 #include "be_cmds.h"
22 #include <asm/div64.h>
23 #include <linux/aer.h>
24 #include <linux/if_bridge.h>
25 #include <net/busy_poll.h>
26 #include <net/vxlan.h>
27
28 MODULE_VERSION(DRV_VER);
29 MODULE_DESCRIPTION(DRV_DESC " " DRV_VER);
30 MODULE_AUTHOR("Emulex Corporation");
31 MODULE_LICENSE("GPL");
32
33 /* num_vfs module param is obsolete.
34  * Use sysfs method to enable/disable VFs.
35  */
36 static unsigned int num_vfs;
37 module_param(num_vfs, uint, S_IRUGO);
38 MODULE_PARM_DESC(num_vfs, "Number of PCI VFs to initialize");
39
40 static ushort rx_frag_size = 2048;
41 module_param(rx_frag_size, ushort, S_IRUGO);
42 MODULE_PARM_DESC(rx_frag_size, "Size of a fragment that holds rcvd data.");
43
44 /* Per-module error detection/recovery workq shared across all functions.
45  * Each function schedules its own work request on this shared workq.
46  */
47 static struct workqueue_struct *be_err_recovery_workq;
48
49 static const struct pci_device_id be_dev_ids[] = {
50         { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID1) },
51         { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID2) },
52         { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID1) },
53         { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID2) },
54         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID3)},
55         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID4)},
56         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID5)},
57         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID6)},
58         { 0 }
59 };
60 MODULE_DEVICE_TABLE(pci, be_dev_ids);
61
62 /* Workqueue used by all functions for defering cmd calls to the adapter */
63 static struct workqueue_struct *be_wq;
64
65 /* UE Status Low CSR */
66 static const char * const ue_status_low_desc[] = {
67         "CEV",
68         "CTX",
69         "DBUF",
70         "ERX",
71         "Host",
72         "MPU",
73         "NDMA",
74         "PTC ",
75         "RDMA ",
76         "RXF ",
77         "RXIPS ",
78         "RXULP0 ",
79         "RXULP1 ",
80         "RXULP2 ",
81         "TIM ",
82         "TPOST ",
83         "TPRE ",
84         "TXIPS ",
85         "TXULP0 ",
86         "TXULP1 ",
87         "UC ",
88         "WDMA ",
89         "TXULP2 ",
90         "HOST1 ",
91         "P0_OB_LINK ",
92         "P1_OB_LINK ",
93         "HOST_GPIO ",
94         "MBOX ",
95         "ERX2 ",
96         "SPARE ",
97         "JTAG ",
98         "MPU_INTPEND "
99 };
100
101 /* UE Status High CSR */
102 static const char * const ue_status_hi_desc[] = {
103         "LPCMEMHOST",
104         "MGMT_MAC",
105         "PCS0ONLINE",
106         "MPU_IRAM",
107         "PCS1ONLINE",
108         "PCTL0",
109         "PCTL1",
110         "PMEM",
111         "RR",
112         "TXPB",
113         "RXPP",
114         "XAUI",
115         "TXP",
116         "ARM",
117         "IPC",
118         "HOST2",
119         "HOST3",
120         "HOST4",
121         "HOST5",
122         "HOST6",
123         "HOST7",
124         "ECRC",
125         "Poison TLP",
126         "NETC",
127         "PERIPH",
128         "LLTXULP",
129         "D2P",
130         "RCON",
131         "LDMA",
132         "LLTXP",
133         "LLTXPB",
134         "Unknown"
135 };
136
137 #define BE_VF_IF_EN_FLAGS       (BE_IF_FLAGS_UNTAGGED | \
138                                  BE_IF_FLAGS_BROADCAST | \
139                                  BE_IF_FLAGS_MULTICAST | \
140                                  BE_IF_FLAGS_PASS_L3L4_ERRORS)
141
142 static void be_queue_free(struct be_adapter *adapter, struct be_queue_info *q)
143 {
144         struct be_dma_mem *mem = &q->dma_mem;
145
146         if (mem->va) {
147                 dma_free_coherent(&adapter->pdev->dev, mem->size, mem->va,
148                                   mem->dma);
149                 mem->va = NULL;
150         }
151 }
152
153 static int be_queue_alloc(struct be_adapter *adapter, struct be_queue_info *q,
154                           u16 len, u16 entry_size)
155 {
156         struct be_dma_mem *mem = &q->dma_mem;
157
158         memset(q, 0, sizeof(*q));
159         q->len = len;
160         q->entry_size = entry_size;
161         mem->size = len * entry_size;
162         mem->va = dma_zalloc_coherent(&adapter->pdev->dev, mem->size, &mem->dma,
163                                       GFP_KERNEL);
164         if (!mem->va)
165                 return -ENOMEM;
166         return 0;
167 }
168
169 static void be_reg_intr_set(struct be_adapter *adapter, bool enable)
170 {
171         u32 reg, enabled;
172
173         pci_read_config_dword(adapter->pdev, PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET,
174                               &reg);
175         enabled = reg & MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
176
177         if (!enabled && enable)
178                 reg |= MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
179         else if (enabled && !enable)
180                 reg &= ~MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
181         else
182                 return;
183
184         pci_write_config_dword(adapter->pdev,
185                                PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET, reg);
186 }
187
188 static void be_intr_set(struct be_adapter *adapter, bool enable)
189 {
190         int status = 0;
191
192         /* On lancer interrupts can't be controlled via this register */
193         if (lancer_chip(adapter))
194                 return;
195
196         if (be_check_error(adapter, BE_ERROR_EEH))
197                 return;
198
199         status = be_cmd_intr_set(adapter, enable);
200         if (status)
201                 be_reg_intr_set(adapter, enable);
202 }
203
204 static void be_rxq_notify(struct be_adapter *adapter, u16 qid, u16 posted)
205 {
206         u32 val = 0;
207
208         if (be_check_error(adapter, BE_ERROR_HW))
209                 return;
210
211         val |= qid & DB_RQ_RING_ID_MASK;
212         val |= posted << DB_RQ_NUM_POSTED_SHIFT;
213
214         wmb();
215         iowrite32(val, adapter->db + DB_RQ_OFFSET);
216 }
217
218 static void be_txq_notify(struct be_adapter *adapter, struct be_tx_obj *txo,
219                           u16 posted)
220 {
221         u32 val = 0;
222
223         if (be_check_error(adapter, BE_ERROR_HW))
224                 return;
225
226         val |= txo->q.id & DB_TXULP_RING_ID_MASK;
227         val |= (posted & DB_TXULP_NUM_POSTED_MASK) << DB_TXULP_NUM_POSTED_SHIFT;
228
229         wmb();
230         iowrite32(val, adapter->db + txo->db_offset);
231 }
232
233 static void be_eq_notify(struct be_adapter *adapter, u16 qid,
234                          bool arm, bool clear_int, u16 num_popped,
235                          u32 eq_delay_mult_enc)
236 {
237         u32 val = 0;
238
239         val |= qid & DB_EQ_RING_ID_MASK;
240         val |= ((qid & DB_EQ_RING_ID_EXT_MASK) << DB_EQ_RING_ID_EXT_MASK_SHIFT);
241
242         if (be_check_error(adapter, BE_ERROR_HW))
243                 return;
244
245         if (arm)
246                 val |= 1 << DB_EQ_REARM_SHIFT;
247         if (clear_int)
248                 val |= 1 << DB_EQ_CLR_SHIFT;
249         val |= 1 << DB_EQ_EVNT_SHIFT;
250         val |= num_popped << DB_EQ_NUM_POPPED_SHIFT;
251         val |= eq_delay_mult_enc << DB_EQ_R2I_DLY_SHIFT;
252         iowrite32(val, adapter->db + DB_EQ_OFFSET);
253 }
254
255 void be_cq_notify(struct be_adapter *adapter, u16 qid, bool arm, u16 num_popped)
256 {
257         u32 val = 0;
258
259         val |= qid & DB_CQ_RING_ID_MASK;
260         val |= ((qid & DB_CQ_RING_ID_EXT_MASK) <<
261                         DB_CQ_RING_ID_EXT_MASK_SHIFT);
262
263         if (be_check_error(adapter, BE_ERROR_HW))
264                 return;
265
266         if (arm)
267                 val |= 1 << DB_CQ_REARM_SHIFT;
268         val |= num_popped << DB_CQ_NUM_POPPED_SHIFT;
269         iowrite32(val, adapter->db + DB_CQ_OFFSET);
270 }
271
272 static int be_dev_mac_add(struct be_adapter *adapter, u8 *mac)
273 {
274         int i;
275
276         /* Check if mac has already been added as part of uc-list */
277         for (i = 0; i < adapter->uc_macs; i++) {
278                 if (ether_addr_equal(adapter->uc_list[i].mac, mac)) {
279                         /* mac already added, skip addition */
280                         adapter->pmac_id[0] = adapter->pmac_id[i + 1];
281                         return 0;
282                 }
283         }
284
285         return be_cmd_pmac_add(adapter, mac, adapter->if_handle,
286                                &adapter->pmac_id[0], 0);
287 }
288
289 static void be_dev_mac_del(struct be_adapter *adapter, int pmac_id)
290 {
291         int i;
292
293         /* Skip deletion if the programmed mac is
294          * being used in uc-list
295          */
296         for (i = 0; i < adapter->uc_macs; i++) {
297                 if (adapter->pmac_id[i + 1] == pmac_id)
298                         return;
299         }
300         be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
301 }
302
303 static int be_mac_addr_set(struct net_device *netdev, void *p)
304 {
305         struct be_adapter *adapter = netdev_priv(netdev);
306         struct device *dev = &adapter->pdev->dev;
307         struct sockaddr *addr = p;
308         int status;
309         u8 mac[ETH_ALEN];
310         u32 old_pmac_id = adapter->pmac_id[0];
311
312         if (!is_valid_ether_addr(addr->sa_data))
313                 return -EADDRNOTAVAIL;
314
315         /* Proceed further only if, User provided MAC is different
316          * from active MAC
317          */
318         if (ether_addr_equal(addr->sa_data, adapter->dev_mac))
319                 return 0;
320
321         /* BE3 VFs without FILTMGMT privilege are not allowed to set its MAC
322          * address
323          */
324         if (BEx_chip(adapter) && be_virtfn(adapter) &&
325             !check_privilege(adapter, BE_PRIV_FILTMGMT))
326                 return -EPERM;
327
328         /* if device is not running, copy MAC to netdev->dev_addr */
329         if (!netif_running(netdev))
330                 goto done;
331
332         /* The PMAC_ADD cmd may fail if the VF doesn't have FILTMGMT
333          * privilege or if PF did not provision the new MAC address.
334          * On BE3, this cmd will always fail if the VF doesn't have the
335          * FILTMGMT privilege. This failure is OK, only if the PF programmed
336          * the MAC for the VF.
337          */
338         mutex_lock(&adapter->rx_filter_lock);
339         status = be_dev_mac_add(adapter, (u8 *)addr->sa_data);
340         if (!status) {
341
342                 /* Delete the old programmed MAC. This call may fail if the
343                  * old MAC was already deleted by the PF driver.
344                  */
345                 if (adapter->pmac_id[0] != old_pmac_id)
346                         be_dev_mac_del(adapter, old_pmac_id);
347         }
348
349         mutex_unlock(&adapter->rx_filter_lock);
350         /* Decide if the new MAC is successfully activated only after
351          * querying the FW
352          */
353         status = be_cmd_get_active_mac(adapter, adapter->pmac_id[0], mac,
354                                        adapter->if_handle, true, 0);
355         if (status)
356                 goto err;
357
358         /* The MAC change did not happen, either due to lack of privilege
359          * or PF didn't pre-provision.
360          */
361         if (!ether_addr_equal(addr->sa_data, mac)) {
362                 status = -EPERM;
363                 goto err;
364         }
365 done:
366         ether_addr_copy(adapter->dev_mac, addr->sa_data);
367         ether_addr_copy(netdev->dev_addr, addr->sa_data);
368         dev_info(dev, "MAC address changed to %pM\n", addr->sa_data);
369         return 0;
370 err:
371         dev_warn(dev, "MAC address change to %pM failed\n", addr->sa_data);
372         return status;
373 }
374
375 /* BE2 supports only v0 cmd */
376 static void *hw_stats_from_cmd(struct be_adapter *adapter)
377 {
378         if (BE2_chip(adapter)) {
379                 struct be_cmd_resp_get_stats_v0 *cmd = adapter->stats_cmd.va;
380
381                 return &cmd->hw_stats;
382         } else if (BE3_chip(adapter)) {
383                 struct be_cmd_resp_get_stats_v1 *cmd = adapter->stats_cmd.va;
384
385                 return &cmd->hw_stats;
386         } else {
387                 struct be_cmd_resp_get_stats_v2 *cmd = adapter->stats_cmd.va;
388
389                 return &cmd->hw_stats;
390         }
391 }
392
393 /* BE2 supports only v0 cmd */
394 static void *be_erx_stats_from_cmd(struct be_adapter *adapter)
395 {
396         if (BE2_chip(adapter)) {
397                 struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
398
399                 return &hw_stats->erx;
400         } else if (BE3_chip(adapter)) {
401                 struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
402
403                 return &hw_stats->erx;
404         } else {
405                 struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
406
407                 return &hw_stats->erx;
408         }
409 }
410
411 static void populate_be_v0_stats(struct be_adapter *adapter)
412 {
413         struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
414         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
415         struct be_rxf_stats_v0 *rxf_stats = &hw_stats->rxf;
416         struct be_port_rxf_stats_v0 *port_stats =
417                                         &rxf_stats->port[adapter->port_num];
418         struct be_drv_stats *drvs = &adapter->drv_stats;
419
420         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
421         drvs->rx_pause_frames = port_stats->rx_pause_frames;
422         drvs->rx_crc_errors = port_stats->rx_crc_errors;
423         drvs->rx_control_frames = port_stats->rx_control_frames;
424         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
425         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
426         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
427         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
428         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
429         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
430         drvs->rxpp_fifo_overflow_drop = port_stats->rx_fifo_overflow;
431         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
432         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
433         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
434         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
435         drvs->rx_input_fifo_overflow_drop = port_stats->rx_input_fifo_overflow;
436         drvs->rx_dropped_header_too_small =
437                 port_stats->rx_dropped_header_too_small;
438         drvs->rx_address_filtered =
439                                         port_stats->rx_address_filtered +
440                                         port_stats->rx_vlan_filtered;
441         drvs->rx_alignment_symbol_errors =
442                 port_stats->rx_alignment_symbol_errors;
443
444         drvs->tx_pauseframes = port_stats->tx_pauseframes;
445         drvs->tx_controlframes = port_stats->tx_controlframes;
446
447         if (adapter->port_num)
448                 drvs->jabber_events = rxf_stats->port1_jabber_events;
449         else
450                 drvs->jabber_events = rxf_stats->port0_jabber_events;
451         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
452         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
453         drvs->forwarded_packets = rxf_stats->forwarded_packets;
454         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
455         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
456         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
457         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
458 }
459
460 static void populate_be_v1_stats(struct be_adapter *adapter)
461 {
462         struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
463         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
464         struct be_rxf_stats_v1 *rxf_stats = &hw_stats->rxf;
465         struct be_port_rxf_stats_v1 *port_stats =
466                                         &rxf_stats->port[adapter->port_num];
467         struct be_drv_stats *drvs = &adapter->drv_stats;
468
469         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
470         drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
471         drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
472         drvs->rx_pause_frames = port_stats->rx_pause_frames;
473         drvs->rx_crc_errors = port_stats->rx_crc_errors;
474         drvs->rx_control_frames = port_stats->rx_control_frames;
475         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
476         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
477         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
478         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
479         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
480         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
481         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
482         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
483         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
484         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
485         drvs->rx_dropped_header_too_small =
486                 port_stats->rx_dropped_header_too_small;
487         drvs->rx_input_fifo_overflow_drop =
488                 port_stats->rx_input_fifo_overflow_drop;
489         drvs->rx_address_filtered = port_stats->rx_address_filtered;
490         drvs->rx_alignment_symbol_errors =
491                 port_stats->rx_alignment_symbol_errors;
492         drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
493         drvs->tx_pauseframes = port_stats->tx_pauseframes;
494         drvs->tx_controlframes = port_stats->tx_controlframes;
495         drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
496         drvs->jabber_events = port_stats->jabber_events;
497         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
498         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
499         drvs->forwarded_packets = rxf_stats->forwarded_packets;
500         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
501         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
502         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
503         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
504 }
505
506 static void populate_be_v2_stats(struct be_adapter *adapter)
507 {
508         struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
509         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
510         struct be_rxf_stats_v2 *rxf_stats = &hw_stats->rxf;
511         struct be_port_rxf_stats_v2 *port_stats =
512                                         &rxf_stats->port[adapter->port_num];
513         struct be_drv_stats *drvs = &adapter->drv_stats;
514
515         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
516         drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
517         drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
518         drvs->rx_pause_frames = port_stats->rx_pause_frames;
519         drvs->rx_crc_errors = port_stats->rx_crc_errors;
520         drvs->rx_control_frames = port_stats->rx_control_frames;
521         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
522         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
523         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
524         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
525         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
526         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
527         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
528         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
529         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
530         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
531         drvs->rx_dropped_header_too_small =
532                 port_stats->rx_dropped_header_too_small;
533         drvs->rx_input_fifo_overflow_drop =
534                 port_stats->rx_input_fifo_overflow_drop;
535         drvs->rx_address_filtered = port_stats->rx_address_filtered;
536         drvs->rx_alignment_symbol_errors =
537                 port_stats->rx_alignment_symbol_errors;
538         drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
539         drvs->tx_pauseframes = port_stats->tx_pauseframes;
540         drvs->tx_controlframes = port_stats->tx_controlframes;
541         drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
542         drvs->jabber_events = port_stats->jabber_events;
543         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
544         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
545         drvs->forwarded_packets = rxf_stats->forwarded_packets;
546         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
547         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
548         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
549         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
550         if (be_roce_supported(adapter)) {
551                 drvs->rx_roce_bytes_lsd = port_stats->roce_bytes_received_lsd;
552                 drvs->rx_roce_bytes_msd = port_stats->roce_bytes_received_msd;
553                 drvs->rx_roce_frames = port_stats->roce_frames_received;
554                 drvs->roce_drops_crc = port_stats->roce_drops_crc;
555                 drvs->roce_drops_payload_len =
556                         port_stats->roce_drops_payload_len;
557         }
558 }
559
560 static void populate_lancer_stats(struct be_adapter *adapter)
561 {
562         struct be_drv_stats *drvs = &adapter->drv_stats;
563         struct lancer_pport_stats *pport_stats = pport_stats_from_cmd(adapter);
564
565         be_dws_le_to_cpu(pport_stats, sizeof(*pport_stats));
566         drvs->rx_pause_frames = pport_stats->rx_pause_frames_lo;
567         drvs->rx_crc_errors = pport_stats->rx_crc_errors_lo;
568         drvs->rx_control_frames = pport_stats->rx_control_frames_lo;
569         drvs->rx_in_range_errors = pport_stats->rx_in_range_errors;
570         drvs->rx_frame_too_long = pport_stats->rx_frames_too_long_lo;
571         drvs->rx_dropped_runt = pport_stats->rx_dropped_runt;
572         drvs->rx_ip_checksum_errs = pport_stats->rx_ip_checksum_errors;
573         drvs->rx_tcp_checksum_errs = pport_stats->rx_tcp_checksum_errors;
574         drvs->rx_udp_checksum_errs = pport_stats->rx_udp_checksum_errors;
575         drvs->rx_dropped_tcp_length =
576                                 pport_stats->rx_dropped_invalid_tcp_length;
577         drvs->rx_dropped_too_small = pport_stats->rx_dropped_too_small;
578         drvs->rx_dropped_too_short = pport_stats->rx_dropped_too_short;
579         drvs->rx_out_range_errors = pport_stats->rx_out_of_range_errors;
580         drvs->rx_dropped_header_too_small =
581                                 pport_stats->rx_dropped_header_too_small;
582         drvs->rx_input_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
583         drvs->rx_address_filtered =
584                                         pport_stats->rx_address_filtered +
585                                         pport_stats->rx_vlan_filtered;
586         drvs->rx_alignment_symbol_errors = pport_stats->rx_symbol_errors_lo;
587         drvs->rxpp_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
588         drvs->tx_pauseframes = pport_stats->tx_pause_frames_lo;
589         drvs->tx_controlframes = pport_stats->tx_control_frames_lo;
590         drvs->jabber_events = pport_stats->rx_jabbers;
591         drvs->forwarded_packets = pport_stats->num_forwards_lo;
592         drvs->rx_drops_mtu = pport_stats->rx_drops_mtu_lo;
593         drvs->rx_drops_too_many_frags =
594                                 pport_stats->rx_drops_too_many_frags_lo;
595 }
596
597 static void accumulate_16bit_val(u32 *acc, u16 val)
598 {
599 #define lo(x)                   (x & 0xFFFF)
600 #define hi(x)                   (x & 0xFFFF0000)
601         bool wrapped = val < lo(*acc);
602         u32 newacc = hi(*acc) + val;
603
604         if (wrapped)
605                 newacc += 65536;
606         ACCESS_ONCE(*acc) = newacc;
607 }
608
609 static void populate_erx_stats(struct be_adapter *adapter,
610                                struct be_rx_obj *rxo, u32 erx_stat)
611 {
612         if (!BEx_chip(adapter))
613                 rx_stats(rxo)->rx_drops_no_frags = erx_stat;
614         else
615                 /* below erx HW counter can actually wrap around after
616                  * 65535. Driver accumulates a 32-bit value
617                  */
618                 accumulate_16bit_val(&rx_stats(rxo)->rx_drops_no_frags,
619                                      (u16)erx_stat);
620 }
621
622 void be_parse_stats(struct be_adapter *adapter)
623 {
624         struct be_erx_stats_v2 *erx = be_erx_stats_from_cmd(adapter);
625         struct be_rx_obj *rxo;
626         int i;
627         u32 erx_stat;
628
629         if (lancer_chip(adapter)) {
630                 populate_lancer_stats(adapter);
631         } else {
632                 if (BE2_chip(adapter))
633                         populate_be_v0_stats(adapter);
634                 else if (BE3_chip(adapter))
635                         /* for BE3 */
636                         populate_be_v1_stats(adapter);
637                 else
638                         populate_be_v2_stats(adapter);
639
640                 /* erx_v2 is longer than v0, v1. use v2 for v0, v1 access */
641                 for_all_rx_queues(adapter, rxo, i) {
642                         erx_stat = erx->rx_drops_no_fragments[rxo->q.id];
643                         populate_erx_stats(adapter, rxo, erx_stat);
644                 }
645         }
646 }
647
648 static struct rtnl_link_stats64 *be_get_stats64(struct net_device *netdev,
649                                                 struct rtnl_link_stats64 *stats)
650 {
651         struct be_adapter *adapter = netdev_priv(netdev);
652         struct be_drv_stats *drvs = &adapter->drv_stats;
653         struct be_rx_obj *rxo;
654         struct be_tx_obj *txo;
655         u64 pkts, bytes;
656         unsigned int start;
657         int i;
658
659         for_all_rx_queues(adapter, rxo, i) {
660                 const struct be_rx_stats *rx_stats = rx_stats(rxo);
661
662                 do {
663                         start = u64_stats_fetch_begin_irq(&rx_stats->sync);
664                         pkts = rx_stats(rxo)->rx_pkts;
665                         bytes = rx_stats(rxo)->rx_bytes;
666                 } while (u64_stats_fetch_retry_irq(&rx_stats->sync, start));
667                 stats->rx_packets += pkts;
668                 stats->rx_bytes += bytes;
669                 stats->multicast += rx_stats(rxo)->rx_mcast_pkts;
670                 stats->rx_dropped += rx_stats(rxo)->rx_drops_no_skbs +
671                                         rx_stats(rxo)->rx_drops_no_frags;
672         }
673
674         for_all_tx_queues(adapter, txo, i) {
675                 const struct be_tx_stats *tx_stats = tx_stats(txo);
676
677                 do {
678                         start = u64_stats_fetch_begin_irq(&tx_stats->sync);
679                         pkts = tx_stats(txo)->tx_pkts;
680                         bytes = tx_stats(txo)->tx_bytes;
681                 } while (u64_stats_fetch_retry_irq(&tx_stats->sync, start));
682                 stats->tx_packets += pkts;
683                 stats->tx_bytes += bytes;
684         }
685
686         /* bad pkts received */
687         stats->rx_errors = drvs->rx_crc_errors +
688                 drvs->rx_alignment_symbol_errors +
689                 drvs->rx_in_range_errors +
690                 drvs->rx_out_range_errors +
691                 drvs->rx_frame_too_long +
692                 drvs->rx_dropped_too_small +
693                 drvs->rx_dropped_too_short +
694                 drvs->rx_dropped_header_too_small +
695                 drvs->rx_dropped_tcp_length +
696                 drvs->rx_dropped_runt;
697
698         /* detailed rx errors */
699         stats->rx_length_errors = drvs->rx_in_range_errors +
700                 drvs->rx_out_range_errors +
701                 drvs->rx_frame_too_long;
702
703         stats->rx_crc_errors = drvs->rx_crc_errors;
704
705         /* frame alignment errors */
706         stats->rx_frame_errors = drvs->rx_alignment_symbol_errors;
707
708         /* receiver fifo overrun */
709         /* drops_no_pbuf is no per i/f, it's per BE card */
710         stats->rx_fifo_errors = drvs->rxpp_fifo_overflow_drop +
711                                 drvs->rx_input_fifo_overflow_drop +
712                                 drvs->rx_drops_no_pbuf;
713         return stats;
714 }
715
716 void be_link_status_update(struct be_adapter *adapter, u8 link_status)
717 {
718         struct net_device *netdev = adapter->netdev;
719
720         if (!(adapter->flags & BE_FLAGS_LINK_STATUS_INIT)) {
721                 netif_carrier_off(netdev);
722                 adapter->flags |= BE_FLAGS_LINK_STATUS_INIT;
723         }
724
725         if (link_status)
726                 netif_carrier_on(netdev);
727         else
728                 netif_carrier_off(netdev);
729
730         netdev_info(netdev, "Link is %s\n", link_status ? "Up" : "Down");
731 }
732
733 static int be_gso_hdr_len(struct sk_buff *skb)
734 {
735         if (skb->encapsulation)
736                 return skb_inner_transport_offset(skb) +
737                        inner_tcp_hdrlen(skb);
738         return skb_transport_offset(skb) + tcp_hdrlen(skb);
739 }
740
741 static void be_tx_stats_update(struct be_tx_obj *txo, struct sk_buff *skb)
742 {
743         struct be_tx_stats *stats = tx_stats(txo);
744         u32 tx_pkts = skb_shinfo(skb)->gso_segs ? : 1;
745         /* Account for headers which get duplicated in TSO pkt */
746         u32 dup_hdr_len = tx_pkts > 1 ? be_gso_hdr_len(skb) * (tx_pkts - 1) : 0;
747
748         u64_stats_update_begin(&stats->sync);
749         stats->tx_reqs++;
750         stats->tx_bytes += skb->len + dup_hdr_len;
751         stats->tx_pkts += tx_pkts;
752         if (skb->encapsulation && skb->ip_summed == CHECKSUM_PARTIAL)
753                 stats->tx_vxlan_offload_pkts += tx_pkts;
754         u64_stats_update_end(&stats->sync);
755 }
756
757 /* Returns number of WRBs needed for the skb */
758 static u32 skb_wrb_cnt(struct sk_buff *skb)
759 {
760         /* +1 for the header wrb */
761         return 1 + (skb_headlen(skb) ? 1 : 0) + skb_shinfo(skb)->nr_frags;
762 }
763
764 static inline void wrb_fill(struct be_eth_wrb *wrb, u64 addr, int len)
765 {
766         wrb->frag_pa_hi = cpu_to_le32(upper_32_bits(addr));
767         wrb->frag_pa_lo = cpu_to_le32(lower_32_bits(addr));
768         wrb->frag_len = cpu_to_le32(len & ETH_WRB_FRAG_LEN_MASK);
769         wrb->rsvd0 = 0;
770 }
771
772 /* A dummy wrb is just all zeros. Using a separate routine for dummy-wrb
773  * to avoid the swap and shift/mask operations in wrb_fill().
774  */
775 static inline void wrb_fill_dummy(struct be_eth_wrb *wrb)
776 {
777         wrb->frag_pa_hi = 0;
778         wrb->frag_pa_lo = 0;
779         wrb->frag_len = 0;
780         wrb->rsvd0 = 0;
781 }
782
783 static inline u16 be_get_tx_vlan_tag(struct be_adapter *adapter,
784                                      struct sk_buff *skb)
785 {
786         u8 vlan_prio;
787         u16 vlan_tag;
788
789         vlan_tag = skb_vlan_tag_get(skb);
790         vlan_prio = (vlan_tag & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
791         /* If vlan priority provided by OS is NOT in available bmap */
792         if (!(adapter->vlan_prio_bmap & (1 << vlan_prio)))
793                 vlan_tag = (vlan_tag & ~VLAN_PRIO_MASK) |
794                                 adapter->recommended_prio_bits;
795
796         return vlan_tag;
797 }
798
799 /* Used only for IP tunnel packets */
800 static u16 skb_inner_ip_proto(struct sk_buff *skb)
801 {
802         return (inner_ip_hdr(skb)->version == 4) ?
803                 inner_ip_hdr(skb)->protocol : inner_ipv6_hdr(skb)->nexthdr;
804 }
805
806 static u16 skb_ip_proto(struct sk_buff *skb)
807 {
808         return (ip_hdr(skb)->version == 4) ?
809                 ip_hdr(skb)->protocol : ipv6_hdr(skb)->nexthdr;
810 }
811
812 static inline bool be_is_txq_full(struct be_tx_obj *txo)
813 {
814         return atomic_read(&txo->q.used) + BE_MAX_TX_FRAG_COUNT >= txo->q.len;
815 }
816
817 static inline bool be_can_txq_wake(struct be_tx_obj *txo)
818 {
819         return atomic_read(&txo->q.used) < txo->q.len / 2;
820 }
821
822 static inline bool be_is_tx_compl_pending(struct be_tx_obj *txo)
823 {
824         return atomic_read(&txo->q.used) > txo->pend_wrb_cnt;
825 }
826
827 static void be_get_wrb_params_from_skb(struct be_adapter *adapter,
828                                        struct sk_buff *skb,
829                                        struct be_wrb_params *wrb_params)
830 {
831         u16 proto;
832
833         if (skb_is_gso(skb)) {
834                 BE_WRB_F_SET(wrb_params->features, LSO, 1);
835                 wrb_params->lso_mss = skb_shinfo(skb)->gso_size;
836                 if (skb_is_gso_v6(skb) && !lancer_chip(adapter))
837                         BE_WRB_F_SET(wrb_params->features, LSO6, 1);
838         } else if (skb->ip_summed == CHECKSUM_PARTIAL) {
839                 if (skb->encapsulation) {
840                         BE_WRB_F_SET(wrb_params->features, IPCS, 1);
841                         proto = skb_inner_ip_proto(skb);
842                 } else {
843                         proto = skb_ip_proto(skb);
844                 }
845                 if (proto == IPPROTO_TCP)
846                         BE_WRB_F_SET(wrb_params->features, TCPCS, 1);
847                 else if (proto == IPPROTO_UDP)
848                         BE_WRB_F_SET(wrb_params->features, UDPCS, 1);
849         }
850
851         if (skb_vlan_tag_present(skb)) {
852                 BE_WRB_F_SET(wrb_params->features, VLAN, 1);
853                 wrb_params->vlan_tag = be_get_tx_vlan_tag(adapter, skb);
854         }
855
856         BE_WRB_F_SET(wrb_params->features, CRC, 1);
857 }
858
859 static void wrb_fill_hdr(struct be_adapter *adapter,
860                          struct be_eth_hdr_wrb *hdr,
861                          struct be_wrb_params *wrb_params,
862                          struct sk_buff *skb)
863 {
864         memset(hdr, 0, sizeof(*hdr));
865
866         SET_TX_WRB_HDR_BITS(crc, hdr,
867                             BE_WRB_F_GET(wrb_params->features, CRC));
868         SET_TX_WRB_HDR_BITS(ipcs, hdr,
869                             BE_WRB_F_GET(wrb_params->features, IPCS));
870         SET_TX_WRB_HDR_BITS(tcpcs, hdr,
871                             BE_WRB_F_GET(wrb_params->features, TCPCS));
872         SET_TX_WRB_HDR_BITS(udpcs, hdr,
873                             BE_WRB_F_GET(wrb_params->features, UDPCS));
874
875         SET_TX_WRB_HDR_BITS(lso, hdr,
876                             BE_WRB_F_GET(wrb_params->features, LSO));
877         SET_TX_WRB_HDR_BITS(lso6, hdr,
878                             BE_WRB_F_GET(wrb_params->features, LSO6));
879         SET_TX_WRB_HDR_BITS(lso_mss, hdr, wrb_params->lso_mss);
880
881         /* Hack to skip HW VLAN tagging needs evt = 1, compl = 0. When this
882          * hack is not needed, the evt bit is set while ringing DB.
883          */
884         SET_TX_WRB_HDR_BITS(event, hdr,
885                             BE_WRB_F_GET(wrb_params->features, VLAN_SKIP_HW));
886         SET_TX_WRB_HDR_BITS(vlan, hdr,
887                             BE_WRB_F_GET(wrb_params->features, VLAN));
888         SET_TX_WRB_HDR_BITS(vlan_tag, hdr, wrb_params->vlan_tag);
889
890         SET_TX_WRB_HDR_BITS(num_wrb, hdr, skb_wrb_cnt(skb));
891         SET_TX_WRB_HDR_BITS(len, hdr, skb->len);
892         SET_TX_WRB_HDR_BITS(mgmt, hdr,
893                             BE_WRB_F_GET(wrb_params->features, OS2BMC));
894 }
895
896 static void unmap_tx_frag(struct device *dev, struct be_eth_wrb *wrb,
897                           bool unmap_single)
898 {
899         dma_addr_t dma;
900         u32 frag_len = le32_to_cpu(wrb->frag_len);
901
902
903         dma = (u64)le32_to_cpu(wrb->frag_pa_hi) << 32 |
904                 (u64)le32_to_cpu(wrb->frag_pa_lo);
905         if (frag_len) {
906                 if (unmap_single)
907                         dma_unmap_single(dev, dma, frag_len, DMA_TO_DEVICE);
908                 else
909                         dma_unmap_page(dev, dma, frag_len, DMA_TO_DEVICE);
910         }
911 }
912
913 /* Grab a WRB header for xmit */
914 static u32 be_tx_get_wrb_hdr(struct be_tx_obj *txo)
915 {
916         u32 head = txo->q.head;
917
918         queue_head_inc(&txo->q);
919         return head;
920 }
921
922 /* Set up the WRB header for xmit */
923 static void be_tx_setup_wrb_hdr(struct be_adapter *adapter,
924                                 struct be_tx_obj *txo,
925                                 struct be_wrb_params *wrb_params,
926                                 struct sk_buff *skb, u16 head)
927 {
928         u32 num_frags = skb_wrb_cnt(skb);
929         struct be_queue_info *txq = &txo->q;
930         struct be_eth_hdr_wrb *hdr = queue_index_node(txq, head);
931
932         wrb_fill_hdr(adapter, hdr, wrb_params, skb);
933         be_dws_cpu_to_le(hdr, sizeof(*hdr));
934
935         BUG_ON(txo->sent_skb_list[head]);
936         txo->sent_skb_list[head] = skb;
937         txo->last_req_hdr = head;
938         atomic_add(num_frags, &txq->used);
939         txo->last_req_wrb_cnt = num_frags;
940         txo->pend_wrb_cnt += num_frags;
941 }
942
943 /* Setup a WRB fragment (buffer descriptor) for xmit */
944 static void be_tx_setup_wrb_frag(struct be_tx_obj *txo, dma_addr_t busaddr,
945                                  int len)
946 {
947         struct be_eth_wrb *wrb;
948         struct be_queue_info *txq = &txo->q;
949
950         wrb = queue_head_node(txq);
951         wrb_fill(wrb, busaddr, len);
952         queue_head_inc(txq);
953 }
954
955 /* Bring the queue back to the state it was in before be_xmit_enqueue() routine
956  * was invoked. The producer index is restored to the previous packet and the
957  * WRBs of the current packet are unmapped. Invoked to handle tx setup errors.
958  */
959 static void be_xmit_restore(struct be_adapter *adapter,
960                             struct be_tx_obj *txo, u32 head, bool map_single,
961                             u32 copied)
962 {
963         struct device *dev;
964         struct be_eth_wrb *wrb;
965         struct be_queue_info *txq = &txo->q;
966
967         dev = &adapter->pdev->dev;
968         txq->head = head;
969
970         /* skip the first wrb (hdr); it's not mapped */
971         queue_head_inc(txq);
972         while (copied) {
973                 wrb = queue_head_node(txq);
974                 unmap_tx_frag(dev, wrb, map_single);
975                 map_single = false;
976                 copied -= le32_to_cpu(wrb->frag_len);
977                 queue_head_inc(txq);
978         }
979
980         txq->head = head;
981 }
982
983 /* Enqueue the given packet for transmit. This routine allocates WRBs for the
984  * packet, dma maps the packet buffers and sets up the WRBs. Returns the number
985  * of WRBs used up by the packet.
986  */
987 static u32 be_xmit_enqueue(struct be_adapter *adapter, struct be_tx_obj *txo,
988                            struct sk_buff *skb,
989                            struct be_wrb_params *wrb_params)
990 {
991         u32 i, copied = 0, wrb_cnt = skb_wrb_cnt(skb);
992         struct device *dev = &adapter->pdev->dev;
993         struct be_queue_info *txq = &txo->q;
994         bool map_single = false;
995         u32 head = txq->head;
996         dma_addr_t busaddr;
997         int len;
998
999         head = be_tx_get_wrb_hdr(txo);
1000
1001         if (skb->len > skb->data_len) {
1002                 len = skb_headlen(skb);
1003
1004                 busaddr = dma_map_single(dev, skb->data, len, DMA_TO_DEVICE);
1005                 if (dma_mapping_error(dev, busaddr))
1006                         goto dma_err;
1007                 map_single = true;
1008                 be_tx_setup_wrb_frag(txo, busaddr, len);
1009                 copied += len;
1010         }
1011
1012         for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1013                 const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
1014                 len = skb_frag_size(frag);
1015
1016                 busaddr = skb_frag_dma_map(dev, frag, 0, len, DMA_TO_DEVICE);
1017                 if (dma_mapping_error(dev, busaddr))
1018                         goto dma_err;
1019                 be_tx_setup_wrb_frag(txo, busaddr, len);
1020                 copied += len;
1021         }
1022
1023         be_tx_setup_wrb_hdr(adapter, txo, wrb_params, skb, head);
1024
1025         be_tx_stats_update(txo, skb);
1026         return wrb_cnt;
1027
1028 dma_err:
1029         adapter->drv_stats.dma_map_errors++;
1030         be_xmit_restore(adapter, txo, head, map_single, copied);
1031         return 0;
1032 }
1033
1034 static inline int qnq_async_evt_rcvd(struct be_adapter *adapter)
1035 {
1036         return adapter->flags & BE_FLAGS_QNQ_ASYNC_EVT_RCVD;
1037 }
1038
1039 static struct sk_buff *be_insert_vlan_in_pkt(struct be_adapter *adapter,
1040                                              struct sk_buff *skb,
1041                                              struct be_wrb_params
1042                                              *wrb_params)
1043 {
1044         u16 vlan_tag = 0;
1045
1046         skb = skb_share_check(skb, GFP_ATOMIC);
1047         if (unlikely(!skb))
1048                 return skb;
1049
1050         if (skb_vlan_tag_present(skb))
1051                 vlan_tag = be_get_tx_vlan_tag(adapter, skb);
1052
1053         if (qnq_async_evt_rcvd(adapter) && adapter->pvid) {
1054                 if (!vlan_tag)
1055                         vlan_tag = adapter->pvid;
1056                 /* f/w workaround to set skip_hw_vlan = 1, informs the F/W to
1057                  * skip VLAN insertion
1058                  */
1059                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1060         }
1061
1062         if (vlan_tag) {
1063                 skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1064                                                 vlan_tag);
1065                 if (unlikely(!skb))
1066                         return skb;
1067                 skb->vlan_tci = 0;
1068         }
1069
1070         /* Insert the outer VLAN, if any */
1071         if (adapter->qnq_vid) {
1072                 vlan_tag = adapter->qnq_vid;
1073                 skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1074                                                 vlan_tag);
1075                 if (unlikely(!skb))
1076                         return skb;
1077                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1078         }
1079
1080         return skb;
1081 }
1082
1083 static bool be_ipv6_exthdr_check(struct sk_buff *skb)
1084 {
1085         struct ethhdr *eh = (struct ethhdr *)skb->data;
1086         u16 offset = ETH_HLEN;
1087
1088         if (eh->h_proto == htons(ETH_P_IPV6)) {
1089                 struct ipv6hdr *ip6h = (struct ipv6hdr *)(skb->data + offset);
1090
1091                 offset += sizeof(struct ipv6hdr);
1092                 if (ip6h->nexthdr != NEXTHDR_TCP &&
1093                     ip6h->nexthdr != NEXTHDR_UDP) {
1094                         struct ipv6_opt_hdr *ehdr =
1095                                 (struct ipv6_opt_hdr *)(skb->data + offset);
1096
1097                         /* offending pkt: 2nd byte following IPv6 hdr is 0xff */
1098                         if (ehdr->hdrlen == 0xff)
1099                                 return true;
1100                 }
1101         }
1102         return false;
1103 }
1104
1105 static int be_vlan_tag_tx_chk(struct be_adapter *adapter, struct sk_buff *skb)
1106 {
1107         return skb_vlan_tag_present(skb) || adapter->pvid || adapter->qnq_vid;
1108 }
1109
1110 static int be_ipv6_tx_stall_chk(struct be_adapter *adapter, struct sk_buff *skb)
1111 {
1112         return BE3_chip(adapter) && be_ipv6_exthdr_check(skb);
1113 }
1114
1115 static struct sk_buff *be_lancer_xmit_workarounds(struct be_adapter *adapter,
1116                                                   struct sk_buff *skb,
1117                                                   struct be_wrb_params
1118                                                   *wrb_params)
1119 {
1120         struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
1121         unsigned int eth_hdr_len;
1122         struct iphdr *ip;
1123
1124         /* For padded packets, BE HW modifies tot_len field in IP header
1125          * incorrecly when VLAN tag is inserted by HW.
1126          * For padded packets, Lancer computes incorrect checksum.
1127          */
1128         eth_hdr_len = ntohs(skb->protocol) == ETH_P_8021Q ?
1129                                                 VLAN_ETH_HLEN : ETH_HLEN;
1130         if (skb->len <= 60 &&
1131             (lancer_chip(adapter) || skb_vlan_tag_present(skb)) &&
1132             is_ipv4_pkt(skb)) {
1133                 ip = (struct iphdr *)ip_hdr(skb);
1134                 pskb_trim(skb, eth_hdr_len + ntohs(ip->tot_len));
1135         }
1136
1137         /* If vlan tag is already inlined in the packet, skip HW VLAN
1138          * tagging in pvid-tagging mode
1139          */
1140         if (be_pvid_tagging_enabled(adapter) &&
1141             veh->h_vlan_proto == htons(ETH_P_8021Q))
1142                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1143
1144         /* HW has a bug wherein it will calculate CSUM for VLAN
1145          * pkts even though it is disabled.
1146          * Manually insert VLAN in pkt.
1147          */
1148         if (skb->ip_summed != CHECKSUM_PARTIAL &&
1149             skb_vlan_tag_present(skb)) {
1150                 skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1151                 if (unlikely(!skb))
1152                         goto err;
1153         }
1154
1155         /* HW may lockup when VLAN HW tagging is requested on
1156          * certain ipv6 packets. Drop such pkts if the HW workaround to
1157          * skip HW tagging is not enabled by FW.
1158          */
1159         if (unlikely(be_ipv6_tx_stall_chk(adapter, skb) &&
1160                      (adapter->pvid || adapter->qnq_vid) &&
1161                      !qnq_async_evt_rcvd(adapter)))
1162                 goto tx_drop;
1163
1164         /* Manual VLAN tag insertion to prevent:
1165          * ASIC lockup when the ASIC inserts VLAN tag into
1166          * certain ipv6 packets. Insert VLAN tags in driver,
1167          * and set event, completion, vlan bits accordingly
1168          * in the Tx WRB.
1169          */
1170         if (be_ipv6_tx_stall_chk(adapter, skb) &&
1171             be_vlan_tag_tx_chk(adapter, skb)) {
1172                 skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1173                 if (unlikely(!skb))
1174                         goto err;
1175         }
1176
1177         return skb;
1178 tx_drop:
1179         dev_kfree_skb_any(skb);
1180 err:
1181         return NULL;
1182 }
1183
1184 static struct sk_buff *be_xmit_workarounds(struct be_adapter *adapter,
1185                                            struct sk_buff *skb,
1186                                            struct be_wrb_params *wrb_params)
1187 {
1188         int err;
1189
1190         /* Lancer, SH and BE3 in SRIOV mode have a bug wherein
1191          * packets that are 32b or less may cause a transmit stall
1192          * on that port. The workaround is to pad such packets
1193          * (len <= 32 bytes) to a minimum length of 36b.
1194          */
1195         if (skb->len <= 32) {
1196                 if (skb_put_padto(skb, 36))
1197                         return NULL;
1198         }
1199
1200         if (BEx_chip(adapter) || lancer_chip(adapter)) {
1201                 skb = be_lancer_xmit_workarounds(adapter, skb, wrb_params);
1202                 if (!skb)
1203                         return NULL;
1204         }
1205
1206         /* The stack can send us skbs with length greater than
1207          * what the HW can handle. Trim the extra bytes.
1208          */
1209         WARN_ON_ONCE(skb->len > BE_MAX_GSO_SIZE);
1210         err = pskb_trim(skb, BE_MAX_GSO_SIZE);
1211         WARN_ON(err);
1212
1213         return skb;
1214 }
1215
1216 static void be_xmit_flush(struct be_adapter *adapter, struct be_tx_obj *txo)
1217 {
1218         struct be_queue_info *txq = &txo->q;
1219         struct be_eth_hdr_wrb *hdr = queue_index_node(txq, txo->last_req_hdr);
1220
1221         /* Mark the last request eventable if it hasn't been marked already */
1222         if (!(hdr->dw[2] & cpu_to_le32(TX_HDR_WRB_EVT)))
1223                 hdr->dw[2] |= cpu_to_le32(TX_HDR_WRB_EVT | TX_HDR_WRB_COMPL);
1224
1225         /* compose a dummy wrb if there are odd set of wrbs to notify */
1226         if (!lancer_chip(adapter) && (txo->pend_wrb_cnt & 1)) {
1227                 wrb_fill_dummy(queue_head_node(txq));
1228                 queue_head_inc(txq);
1229                 atomic_inc(&txq->used);
1230                 txo->pend_wrb_cnt++;
1231                 hdr->dw[2] &= ~cpu_to_le32(TX_HDR_WRB_NUM_MASK <<
1232                                            TX_HDR_WRB_NUM_SHIFT);
1233                 hdr->dw[2] |= cpu_to_le32((txo->last_req_wrb_cnt + 1) <<
1234                                           TX_HDR_WRB_NUM_SHIFT);
1235         }
1236         be_txq_notify(adapter, txo, txo->pend_wrb_cnt);
1237         txo->pend_wrb_cnt = 0;
1238 }
1239
1240 /* OS2BMC related */
1241
1242 #define DHCP_CLIENT_PORT        68
1243 #define DHCP_SERVER_PORT        67
1244 #define NET_BIOS_PORT1          137
1245 #define NET_BIOS_PORT2          138
1246 #define DHCPV6_RAS_PORT         547
1247
1248 #define is_mc_allowed_on_bmc(adapter, eh)       \
1249         (!is_multicast_filt_enabled(adapter) && \
1250          is_multicast_ether_addr(eh->h_dest) && \
1251          !is_broadcast_ether_addr(eh->h_dest))
1252
1253 #define is_bc_allowed_on_bmc(adapter, eh)       \
1254         (!is_broadcast_filt_enabled(adapter) && \
1255          is_broadcast_ether_addr(eh->h_dest))
1256
1257 #define is_arp_allowed_on_bmc(adapter, skb)     \
1258         (is_arp(skb) && is_arp_filt_enabled(adapter))
1259
1260 #define is_broadcast_packet(eh, adapter)        \
1261                 (is_multicast_ether_addr(eh->h_dest) && \
1262                 !compare_ether_addr(eh->h_dest, adapter->netdev->broadcast))
1263
1264 #define is_arp(skb)     (skb->protocol == htons(ETH_P_ARP))
1265
1266 #define is_arp_filt_enabled(adapter)    \
1267                 (adapter->bmc_filt_mask & (BMC_FILT_BROADCAST_ARP))
1268
1269 #define is_dhcp_client_filt_enabled(adapter)    \
1270                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_CLIENT)
1271
1272 #define is_dhcp_srvr_filt_enabled(adapter)      \
1273                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_SERVER)
1274
1275 #define is_nbios_filt_enabled(adapter)  \
1276                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_NET_BIOS)
1277
1278 #define is_ipv6_na_filt_enabled(adapter)        \
1279                 (adapter->bmc_filt_mask &       \
1280                         BMC_FILT_MULTICAST_IPV6_NEIGH_ADVER)
1281
1282 #define is_ipv6_ra_filt_enabled(adapter)        \
1283                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RA)
1284
1285 #define is_ipv6_ras_filt_enabled(adapter)       \
1286                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RAS)
1287
1288 #define is_broadcast_filt_enabled(adapter)      \
1289                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST)
1290
1291 #define is_multicast_filt_enabled(adapter)      \
1292                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST)
1293
1294 static bool be_send_pkt_to_bmc(struct be_adapter *adapter,
1295                                struct sk_buff **skb)
1296 {
1297         struct ethhdr *eh = (struct ethhdr *)(*skb)->data;
1298         bool os2bmc = false;
1299
1300         if (!be_is_os2bmc_enabled(adapter))
1301                 goto done;
1302
1303         if (!is_multicast_ether_addr(eh->h_dest))
1304                 goto done;
1305
1306         if (is_mc_allowed_on_bmc(adapter, eh) ||
1307             is_bc_allowed_on_bmc(adapter, eh) ||
1308             is_arp_allowed_on_bmc(adapter, (*skb))) {
1309                 os2bmc = true;
1310                 goto done;
1311         }
1312
1313         if ((*skb)->protocol == htons(ETH_P_IPV6)) {
1314                 struct ipv6hdr *hdr = ipv6_hdr((*skb));
1315                 u8 nexthdr = hdr->nexthdr;
1316
1317                 if (nexthdr == IPPROTO_ICMPV6) {
1318                         struct icmp6hdr *icmp6 = icmp6_hdr((*skb));
1319
1320                         switch (icmp6->icmp6_type) {
1321                         case NDISC_ROUTER_ADVERTISEMENT:
1322                                 os2bmc = is_ipv6_ra_filt_enabled(adapter);
1323                                 goto done;
1324                         case NDISC_NEIGHBOUR_ADVERTISEMENT:
1325                                 os2bmc = is_ipv6_na_filt_enabled(adapter);
1326                                 goto done;
1327                         default:
1328                                 break;
1329                         }
1330                 }
1331         }
1332
1333         if (is_udp_pkt((*skb))) {
1334                 struct udphdr *udp = udp_hdr((*skb));
1335
1336                 switch (ntohs(udp->dest)) {
1337                 case DHCP_CLIENT_PORT:
1338                         os2bmc = is_dhcp_client_filt_enabled(adapter);
1339                         goto done;
1340                 case DHCP_SERVER_PORT:
1341                         os2bmc = is_dhcp_srvr_filt_enabled(adapter);
1342                         goto done;
1343                 case NET_BIOS_PORT1:
1344                 case NET_BIOS_PORT2:
1345                         os2bmc = is_nbios_filt_enabled(adapter);
1346                         goto done;
1347                 case DHCPV6_RAS_PORT:
1348                         os2bmc = is_ipv6_ras_filt_enabled(adapter);
1349                         goto done;
1350                 default:
1351                         break;
1352                 }
1353         }
1354 done:
1355         /* For packets over a vlan, which are destined
1356          * to BMC, asic expects the vlan to be inline in the packet.
1357          */
1358         if (os2bmc)
1359                 *skb = be_insert_vlan_in_pkt(adapter, *skb, NULL);
1360
1361         return os2bmc;
1362 }
1363
1364 static netdev_tx_t be_xmit(struct sk_buff *skb, struct net_device *netdev)
1365 {
1366         struct be_adapter *adapter = netdev_priv(netdev);
1367         u16 q_idx = skb_get_queue_mapping(skb);
1368         struct be_tx_obj *txo = &adapter->tx_obj[q_idx];
1369         struct be_wrb_params wrb_params = { 0 };
1370         bool flush = !skb->xmit_more;
1371         u16 wrb_cnt;
1372
1373         skb = be_xmit_workarounds(adapter, skb, &wrb_params);
1374         if (unlikely(!skb))
1375                 goto drop;
1376
1377         be_get_wrb_params_from_skb(adapter, skb, &wrb_params);
1378
1379         wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1380         if (unlikely(!wrb_cnt)) {
1381                 dev_kfree_skb_any(skb);
1382                 goto drop;
1383         }
1384
1385         /* if os2bmc is enabled and if the pkt is destined to bmc,
1386          * enqueue the pkt a 2nd time with mgmt bit set.
1387          */
1388         if (be_send_pkt_to_bmc(adapter, &skb)) {
1389                 BE_WRB_F_SET(wrb_params.features, OS2BMC, 1);
1390                 wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1391                 if (unlikely(!wrb_cnt))
1392                         goto drop;
1393                 else
1394                         skb_get(skb);
1395         }
1396
1397         if (be_is_txq_full(txo)) {
1398                 netif_stop_subqueue(netdev, q_idx);
1399                 tx_stats(txo)->tx_stops++;
1400         }
1401
1402         if (flush || __netif_subqueue_stopped(netdev, q_idx))
1403                 be_xmit_flush(adapter, txo);
1404
1405         return NETDEV_TX_OK;
1406 drop:
1407         tx_stats(txo)->tx_drv_drops++;
1408         /* Flush the already enqueued tx requests */
1409         if (flush && txo->pend_wrb_cnt)
1410                 be_xmit_flush(adapter, txo);
1411
1412         return NETDEV_TX_OK;
1413 }
1414
1415 static inline bool be_in_all_promisc(struct be_adapter *adapter)
1416 {
1417         return (adapter->if_flags & BE_IF_FLAGS_ALL_PROMISCUOUS) ==
1418                         BE_IF_FLAGS_ALL_PROMISCUOUS;
1419 }
1420
1421 static int be_set_vlan_promisc(struct be_adapter *adapter)
1422 {
1423         struct device *dev = &adapter->pdev->dev;
1424         int status;
1425
1426         if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS)
1427                 return 0;
1428
1429         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, ON);
1430         if (!status) {
1431                 dev_info(dev, "Enabled VLAN promiscuous mode\n");
1432                 adapter->if_flags |= BE_IF_FLAGS_VLAN_PROMISCUOUS;
1433         } else {
1434                 dev_err(dev, "Failed to enable VLAN promiscuous mode\n");
1435         }
1436         return status;
1437 }
1438
1439 static int be_clear_vlan_promisc(struct be_adapter *adapter)
1440 {
1441         struct device *dev = &adapter->pdev->dev;
1442         int status;
1443
1444         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, OFF);
1445         if (!status) {
1446                 dev_info(dev, "Disabling VLAN promiscuous mode\n");
1447                 adapter->if_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
1448         }
1449         return status;
1450 }
1451
1452 /*
1453  * A max of 64 (BE_NUM_VLANS_SUPPORTED) vlans can be configured in BE.
1454  * If the user configures more, place BE in vlan promiscuous mode.
1455  */
1456 static int be_vid_config(struct be_adapter *adapter)
1457 {
1458         struct device *dev = &adapter->pdev->dev;
1459         u16 vids[BE_NUM_VLANS_SUPPORTED];
1460         u16 num = 0, i = 0;
1461         int status = 0;
1462
1463         /* No need to change the VLAN state if the I/F is in promiscuous */
1464         if (adapter->netdev->flags & IFF_PROMISC)
1465                 return 0;
1466
1467         if (adapter->vlans_added > be_max_vlans(adapter))
1468                 return be_set_vlan_promisc(adapter);
1469
1470         if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
1471                 status = be_clear_vlan_promisc(adapter);
1472                 if (status)
1473                         return status;
1474         }
1475         /* Construct VLAN Table to give to HW */
1476         for_each_set_bit(i, adapter->vids, VLAN_N_VID)
1477                 vids[num++] = cpu_to_le16(i);
1478
1479         status = be_cmd_vlan_config(adapter, adapter->if_handle, vids, num, 0);
1480         if (status) {
1481                 dev_err(dev, "Setting HW VLAN filtering failed\n");
1482                 /* Set to VLAN promisc mode as setting VLAN filter failed */
1483                 if (addl_status(status) == MCC_ADDL_STATUS_INSUFFICIENT_VLANS ||
1484                     addl_status(status) ==
1485                                 MCC_ADDL_STATUS_INSUFFICIENT_RESOURCES)
1486                         return be_set_vlan_promisc(adapter);
1487         }
1488         return status;
1489 }
1490
1491 static int be_vlan_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
1492 {
1493         struct be_adapter *adapter = netdev_priv(netdev);
1494         int status = 0;
1495
1496         mutex_lock(&adapter->rx_filter_lock);
1497
1498         /* Packets with VID 0 are always received by Lancer by default */
1499         if (lancer_chip(adapter) && vid == 0)
1500                 goto done;
1501
1502         if (test_bit(vid, adapter->vids))
1503                 goto done;
1504
1505         set_bit(vid, adapter->vids);
1506         adapter->vlans_added++;
1507
1508         status = be_vid_config(adapter);
1509 done:
1510         mutex_unlock(&adapter->rx_filter_lock);
1511         return status;
1512 }
1513
1514 static int be_vlan_rem_vid(struct net_device *netdev, __be16 proto, u16 vid)
1515 {
1516         struct be_adapter *adapter = netdev_priv(netdev);
1517         int status = 0;
1518
1519         mutex_lock(&adapter->rx_filter_lock);
1520
1521         /* Packets with VID 0 are always received by Lancer by default */
1522         if (lancer_chip(adapter) && vid == 0)
1523                 goto done;
1524
1525         if (!test_bit(vid, adapter->vids))
1526                 goto done;
1527
1528         clear_bit(vid, adapter->vids);
1529         adapter->vlans_added--;
1530
1531         status = be_vid_config(adapter);
1532 done:
1533         mutex_unlock(&adapter->rx_filter_lock);
1534         return status;
1535 }
1536
1537 static void be_set_all_promisc(struct be_adapter *adapter)
1538 {
1539         be_cmd_rx_filter(adapter, BE_IF_FLAGS_ALL_PROMISCUOUS, ON);
1540         adapter->if_flags |= BE_IF_FLAGS_ALL_PROMISCUOUS;
1541 }
1542
1543 static void be_set_mc_promisc(struct be_adapter *adapter)
1544 {
1545         int status;
1546
1547         if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS)
1548                 return;
1549
1550         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MCAST_PROMISCUOUS, ON);
1551         if (!status)
1552                 adapter->if_flags |= BE_IF_FLAGS_MCAST_PROMISCUOUS;
1553 }
1554
1555 static void be_set_uc_promisc(struct be_adapter *adapter)
1556 {
1557         int status;
1558
1559         if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS)
1560                 return;
1561
1562         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, ON);
1563         if (!status)
1564                 adapter->if_flags |= BE_IF_FLAGS_PROMISCUOUS;
1565 }
1566
1567 static void be_clear_uc_promisc(struct be_adapter *adapter)
1568 {
1569         int status;
1570
1571         if (!(adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS))
1572                 return;
1573
1574         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, OFF);
1575         if (!status)
1576                 adapter->if_flags &= ~BE_IF_FLAGS_PROMISCUOUS;
1577 }
1578
1579 /* The below 2 functions are the callback args for __dev_mc_sync/dev_uc_sync().
1580  * We use a single callback function for both sync and unsync. We really don't
1581  * add/remove addresses through this callback. But, we use it to detect changes
1582  * to the uc/mc lists. The entire uc/mc list is programmed in be_set_rx_mode().
1583  */
1584 static int be_uc_list_update(struct net_device *netdev,
1585                              const unsigned char *addr)
1586 {
1587         struct be_adapter *adapter = netdev_priv(netdev);
1588
1589         adapter->update_uc_list = true;
1590         return 0;
1591 }
1592
1593 static int be_mc_list_update(struct net_device *netdev,
1594                              const unsigned char *addr)
1595 {
1596         struct be_adapter *adapter = netdev_priv(netdev);
1597
1598         adapter->update_mc_list = true;
1599         return 0;
1600 }
1601
1602 static void be_set_mc_list(struct be_adapter *adapter)
1603 {
1604         struct net_device *netdev = adapter->netdev;
1605         struct netdev_hw_addr *ha;
1606         bool mc_promisc = false;
1607         int status;
1608
1609         netif_addr_lock_bh(netdev);
1610         __dev_mc_sync(netdev, be_mc_list_update, be_mc_list_update);
1611
1612         if (netdev->flags & IFF_PROMISC) {
1613                 adapter->update_mc_list = false;
1614         } else if (netdev->flags & IFF_ALLMULTI ||
1615                    netdev_mc_count(netdev) > be_max_mc(adapter)) {
1616                 /* Enable multicast promisc if num configured exceeds
1617                  * what we support
1618                  */
1619                 mc_promisc = true;
1620                 adapter->update_mc_list = false;
1621         } else if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS) {
1622                 /* Update mc-list unconditionally if the iface was previously
1623                  * in mc-promisc mode and now is out of that mode.
1624                  */
1625                 adapter->update_mc_list = true;
1626         }
1627
1628         if (adapter->update_mc_list) {
1629                 int i = 0;
1630
1631                 /* cache the mc-list in adapter */
1632                 netdev_for_each_mc_addr(ha, netdev) {
1633                         ether_addr_copy(adapter->mc_list[i].mac, ha->addr);
1634                         i++;
1635                 }
1636                 adapter->mc_count = netdev_mc_count(netdev);
1637         }
1638         netif_addr_unlock_bh(netdev);
1639
1640         if (mc_promisc) {
1641                 be_set_mc_promisc(adapter);
1642         } else if (adapter->update_mc_list) {
1643                 status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, ON);
1644                 if (!status)
1645                         adapter->if_flags &= ~BE_IF_FLAGS_MCAST_PROMISCUOUS;
1646                 else
1647                         be_set_mc_promisc(adapter);
1648
1649                 adapter->update_mc_list = false;
1650         }
1651 }
1652
1653 static void be_clear_mc_list(struct be_adapter *adapter)
1654 {
1655         struct net_device *netdev = adapter->netdev;
1656
1657         __dev_mc_unsync(netdev, NULL);
1658         be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, OFF);
1659         adapter->mc_count = 0;
1660 }
1661
1662 static int be_uc_mac_add(struct be_adapter *adapter, int uc_idx)
1663 {
1664         if (ether_addr_equal(adapter->uc_list[uc_idx].mac, adapter->dev_mac)) {
1665                 adapter->pmac_id[uc_idx + 1] = adapter->pmac_id[0];
1666                 return 0;
1667         }
1668
1669         return be_cmd_pmac_add(adapter, adapter->uc_list[uc_idx].mac,
1670                                adapter->if_handle,
1671                                &adapter->pmac_id[uc_idx + 1], 0);
1672 }
1673
1674 static void be_uc_mac_del(struct be_adapter *adapter, int pmac_id)
1675 {
1676         if (pmac_id == adapter->pmac_id[0])
1677                 return;
1678
1679         be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
1680 }
1681
1682 static void be_set_uc_list(struct be_adapter *adapter)
1683 {
1684         struct net_device *netdev = adapter->netdev;
1685         struct netdev_hw_addr *ha;
1686         bool uc_promisc = false;
1687         int curr_uc_macs = 0, i;
1688
1689         netif_addr_lock_bh(netdev);
1690         __dev_uc_sync(netdev, be_uc_list_update, be_uc_list_update);
1691
1692         if (netdev->flags & IFF_PROMISC) {
1693                 adapter->update_uc_list = false;
1694         } else if (netdev_uc_count(netdev) > (be_max_uc(adapter) - 1)) {
1695                 uc_promisc = true;
1696                 adapter->update_uc_list = false;
1697         }  else if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS) {
1698                 /* Update uc-list unconditionally if the iface was previously
1699                  * in uc-promisc mode and now is out of that mode.
1700                  */
1701                 adapter->update_uc_list = true;
1702         }
1703
1704         if (adapter->update_uc_list) {
1705                 /* cache the uc-list in adapter array */
1706                 i = 0;
1707                 netdev_for_each_uc_addr(ha, netdev) {
1708                         ether_addr_copy(adapter->uc_list[i].mac, ha->addr);
1709                         i++;
1710                 }
1711                 curr_uc_macs = netdev_uc_count(netdev);
1712         }
1713         netif_addr_unlock_bh(netdev);
1714
1715         if (uc_promisc) {
1716                 be_set_uc_promisc(adapter);
1717         } else if (adapter->update_uc_list) {
1718                 be_clear_uc_promisc(adapter);
1719
1720                 for (i = 0; i < adapter->uc_macs; i++)
1721                         be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1722
1723                 for (i = 0; i < curr_uc_macs; i++)
1724                         be_uc_mac_add(adapter, i);
1725                 adapter->uc_macs = curr_uc_macs;
1726                 adapter->update_uc_list = false;
1727         }
1728 }
1729
1730 static void be_clear_uc_list(struct be_adapter *adapter)
1731 {
1732         struct net_device *netdev = adapter->netdev;
1733         int i;
1734
1735         __dev_uc_unsync(netdev, NULL);
1736         for (i = 0; i < adapter->uc_macs; i++)
1737                 be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1738
1739         adapter->uc_macs = 0;
1740 }
1741
1742 static void __be_set_rx_mode(struct be_adapter *adapter)
1743 {
1744         struct net_device *netdev = adapter->netdev;
1745
1746         mutex_lock(&adapter->rx_filter_lock);
1747
1748         if (netdev->flags & IFF_PROMISC) {
1749                 if (!be_in_all_promisc(adapter))
1750                         be_set_all_promisc(adapter);
1751         } else if (be_in_all_promisc(adapter)) {
1752                 /* We need to re-program the vlan-list or clear
1753                  * vlan-promisc mode (if needed) when the interface
1754                  * comes out of promisc mode.
1755                  */
1756                 be_vid_config(adapter);
1757         }
1758
1759         be_set_uc_list(adapter);
1760         be_set_mc_list(adapter);
1761
1762         mutex_unlock(&adapter->rx_filter_lock);
1763 }
1764
1765 static void be_work_set_rx_mode(struct work_struct *work)
1766 {
1767         struct be_cmd_work *cmd_work =
1768                                 container_of(work, struct be_cmd_work, work);
1769
1770         __be_set_rx_mode(cmd_work->adapter);
1771         kfree(cmd_work);
1772 }
1773
1774 static int be_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
1775 {
1776         struct be_adapter *adapter = netdev_priv(netdev);
1777         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1778         int status;
1779
1780         if (!sriov_enabled(adapter))
1781                 return -EPERM;
1782
1783         if (!is_valid_ether_addr(mac) || vf >= adapter->num_vfs)
1784                 return -EINVAL;
1785
1786         /* Proceed further only if user provided MAC is different
1787          * from active MAC
1788          */
1789         if (ether_addr_equal(mac, vf_cfg->mac_addr))
1790                 return 0;
1791
1792         if (BEx_chip(adapter)) {
1793                 be_cmd_pmac_del(adapter, vf_cfg->if_handle, vf_cfg->pmac_id,
1794                                 vf + 1);
1795
1796                 status = be_cmd_pmac_add(adapter, mac, vf_cfg->if_handle,
1797                                          &vf_cfg->pmac_id, vf + 1);
1798         } else {
1799                 status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
1800                                         vf + 1);
1801         }
1802
1803         if (status) {
1804                 dev_err(&adapter->pdev->dev, "MAC %pM set on VF %d Failed: %#x",
1805                         mac, vf, status);
1806                 return be_cmd_status(status);
1807         }
1808
1809         ether_addr_copy(vf_cfg->mac_addr, mac);
1810
1811         return 0;
1812 }
1813
1814 static int be_get_vf_config(struct net_device *netdev, int vf,
1815                             struct ifla_vf_info *vi)
1816 {
1817         struct be_adapter *adapter = netdev_priv(netdev);
1818         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1819
1820         if (!sriov_enabled(adapter))
1821                 return -EPERM;
1822
1823         if (vf >= adapter->num_vfs)
1824                 return -EINVAL;
1825
1826         vi->vf = vf;
1827         vi->max_tx_rate = vf_cfg->tx_rate;
1828         vi->min_tx_rate = 0;
1829         vi->vlan = vf_cfg->vlan_tag & VLAN_VID_MASK;
1830         vi->qos = vf_cfg->vlan_tag >> VLAN_PRIO_SHIFT;
1831         memcpy(&vi->mac, vf_cfg->mac_addr, ETH_ALEN);
1832         vi->linkstate = adapter->vf_cfg[vf].plink_tracking;
1833         vi->spoofchk = adapter->vf_cfg[vf].spoofchk;
1834
1835         return 0;
1836 }
1837
1838 static int be_set_vf_tvt(struct be_adapter *adapter, int vf, u16 vlan)
1839 {
1840         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1841         u16 vids[BE_NUM_VLANS_SUPPORTED];
1842         int vf_if_id = vf_cfg->if_handle;
1843         int status;
1844
1845         /* Enable Transparent VLAN Tagging */
1846         status = be_cmd_set_hsw_config(adapter, vlan, vf + 1, vf_if_id, 0, 0);
1847         if (status)
1848                 return status;
1849
1850         /* Clear pre-programmed VLAN filters on VF if any, if TVT is enabled */
1851         vids[0] = 0;
1852         status = be_cmd_vlan_config(adapter, vf_if_id, vids, 1, vf + 1);
1853         if (!status)
1854                 dev_info(&adapter->pdev->dev,
1855                          "Cleared guest VLANs on VF%d", vf);
1856
1857         /* After TVT is enabled, disallow VFs to program VLAN filters */
1858         if (vf_cfg->privileges & BE_PRIV_FILTMGMT) {
1859                 status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges &
1860                                                   ~BE_PRIV_FILTMGMT, vf + 1);
1861                 if (!status)
1862                         vf_cfg->privileges &= ~BE_PRIV_FILTMGMT;
1863         }
1864         return 0;
1865 }
1866
1867 static int be_clear_vf_tvt(struct be_adapter *adapter, int vf)
1868 {
1869         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1870         struct device *dev = &adapter->pdev->dev;
1871         int status;
1872
1873         /* Reset Transparent VLAN Tagging. */
1874         status = be_cmd_set_hsw_config(adapter, BE_RESET_VLAN_TAG_ID, vf + 1,
1875                                        vf_cfg->if_handle, 0, 0);
1876         if (status)
1877                 return status;
1878
1879         /* Allow VFs to program VLAN filtering */
1880         if (!(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
1881                 status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges |
1882                                                   BE_PRIV_FILTMGMT, vf + 1);
1883                 if (!status) {
1884                         vf_cfg->privileges |= BE_PRIV_FILTMGMT;
1885                         dev_info(dev, "VF%d: FILTMGMT priv enabled", vf);
1886                 }
1887         }
1888
1889         dev_info(dev,
1890                  "Disable/re-enable i/f in VM to clear Transparent VLAN tag");
1891         return 0;
1892 }
1893
1894 static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos,
1895                           __be16 vlan_proto)
1896 {
1897         struct be_adapter *adapter = netdev_priv(netdev);
1898         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1899         int status;
1900
1901         if (!sriov_enabled(adapter))
1902                 return -EPERM;
1903
1904         if (vf >= adapter->num_vfs || vlan > 4095 || qos > 7)
1905                 return -EINVAL;
1906
1907         if (vlan_proto != htons(ETH_P_8021Q))
1908                 return -EPROTONOSUPPORT;
1909
1910         if (vlan || qos) {
1911                 vlan |= qos << VLAN_PRIO_SHIFT;
1912                 status = be_set_vf_tvt(adapter, vf, vlan);
1913         } else {
1914                 status = be_clear_vf_tvt(adapter, vf);
1915         }
1916
1917         if (status) {
1918                 dev_err(&adapter->pdev->dev,
1919                         "VLAN %d config on VF %d failed : %#x\n", vlan, vf,
1920                         status);
1921                 return be_cmd_status(status);
1922         }
1923
1924         vf_cfg->vlan_tag = vlan;
1925         return 0;
1926 }
1927
1928 static int be_set_vf_tx_rate(struct net_device *netdev, int vf,
1929                              int min_tx_rate, int max_tx_rate)
1930 {
1931         struct be_adapter *adapter = netdev_priv(netdev);
1932         struct device *dev = &adapter->pdev->dev;
1933         int percent_rate, status = 0;
1934         u16 link_speed = 0;
1935         u8 link_status;
1936
1937         if (!sriov_enabled(adapter))
1938                 return -EPERM;
1939
1940         if (vf >= adapter->num_vfs)
1941                 return -EINVAL;
1942
1943         if (min_tx_rate)
1944                 return -EINVAL;
1945
1946         if (!max_tx_rate)
1947                 goto config_qos;
1948
1949         status = be_cmd_link_status_query(adapter, &link_speed,
1950                                           &link_status, 0);
1951         if (status)
1952                 goto err;
1953
1954         if (!link_status) {
1955                 dev_err(dev, "TX-rate setting not allowed when link is down\n");
1956                 status = -ENETDOWN;
1957                 goto err;
1958         }
1959
1960         if (max_tx_rate < 100 || max_tx_rate > link_speed) {
1961                 dev_err(dev, "TX-rate must be between 100 and %d Mbps\n",
1962                         link_speed);
1963                 status = -EINVAL;
1964                 goto err;
1965         }
1966
1967         /* On Skyhawk the QOS setting must be done only as a % value */
1968         percent_rate = link_speed / 100;
1969         if (skyhawk_chip(adapter) && (max_tx_rate % percent_rate)) {
1970                 dev_err(dev, "TX-rate must be a multiple of %d Mbps\n",
1971                         percent_rate);
1972                 status = -EINVAL;
1973                 goto err;
1974         }
1975
1976 config_qos:
1977         status = be_cmd_config_qos(adapter, max_tx_rate, link_speed, vf + 1);
1978         if (status)
1979                 goto err;
1980
1981         adapter->vf_cfg[vf].tx_rate = max_tx_rate;
1982         return 0;
1983
1984 err:
1985         dev_err(dev, "TX-rate setting of %dMbps on VF%d failed\n",
1986                 max_tx_rate, vf);
1987         return be_cmd_status(status);
1988 }
1989
1990 static int be_set_vf_link_state(struct net_device *netdev, int vf,
1991                                 int link_state)
1992 {
1993         struct be_adapter *adapter = netdev_priv(netdev);
1994         int status;
1995
1996         if (!sriov_enabled(adapter))
1997                 return -EPERM;
1998
1999         if (vf >= adapter->num_vfs)
2000                 return -EINVAL;
2001
2002         status = be_cmd_set_logical_link_config(adapter, link_state, vf+1);
2003         if (status) {
2004                 dev_err(&adapter->pdev->dev,
2005                         "Link state change on VF %d failed: %#x\n", vf, status);
2006                 return be_cmd_status(status);
2007         }
2008
2009         adapter->vf_cfg[vf].plink_tracking = link_state;
2010
2011         return 0;
2012 }
2013
2014 static int be_set_vf_spoofchk(struct net_device *netdev, int vf, bool enable)
2015 {
2016         struct be_adapter *adapter = netdev_priv(netdev);
2017         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
2018         u8 spoofchk;
2019         int status;
2020
2021         if (!sriov_enabled(adapter))
2022                 return -EPERM;
2023
2024         if (vf >= adapter->num_vfs)
2025                 return -EINVAL;
2026
2027         if (BEx_chip(adapter))
2028                 return -EOPNOTSUPP;
2029
2030         if (enable == vf_cfg->spoofchk)
2031                 return 0;
2032
2033         spoofchk = enable ? ENABLE_MAC_SPOOFCHK : DISABLE_MAC_SPOOFCHK;
2034
2035         status = be_cmd_set_hsw_config(adapter, 0, vf + 1, vf_cfg->if_handle,
2036                                        0, spoofchk);
2037         if (status) {
2038                 dev_err(&adapter->pdev->dev,
2039                         "Spoofchk change on VF %d failed: %#x\n", vf, status);
2040                 return be_cmd_status(status);
2041         }
2042
2043         vf_cfg->spoofchk = enable;
2044         return 0;
2045 }
2046
2047 static void be_aic_update(struct be_aic_obj *aic, u64 rx_pkts, u64 tx_pkts,
2048                           ulong now)
2049 {
2050         aic->rx_pkts_prev = rx_pkts;
2051         aic->tx_reqs_prev = tx_pkts;
2052         aic->jiffies = now;
2053 }
2054
2055 static int be_get_new_eqd(struct be_eq_obj *eqo)
2056 {
2057         struct be_adapter *adapter = eqo->adapter;
2058         int eqd, start;
2059         struct be_aic_obj *aic;
2060         struct be_rx_obj *rxo;
2061         struct be_tx_obj *txo;
2062         u64 rx_pkts = 0, tx_pkts = 0;
2063         ulong now;
2064         u32 pps, delta;
2065         int i;
2066
2067         aic = &adapter->aic_obj[eqo->idx];
2068         if (!aic->enable) {
2069                 if (aic->jiffies)
2070                         aic->jiffies = 0;
2071                 eqd = aic->et_eqd;
2072                 return eqd;
2073         }
2074
2075         for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
2076                 do {
2077                         start = u64_stats_fetch_begin_irq(&rxo->stats.sync);
2078                         rx_pkts += rxo->stats.rx_pkts;
2079                 } while (u64_stats_fetch_retry_irq(&rxo->stats.sync, start));
2080         }
2081
2082         for_all_tx_queues_on_eq(adapter, eqo, txo, i) {
2083                 do {
2084                         start = u64_stats_fetch_begin_irq(&txo->stats.sync);
2085                         tx_pkts += txo->stats.tx_reqs;
2086                 } while (u64_stats_fetch_retry_irq(&txo->stats.sync, start));
2087         }
2088
2089         /* Skip, if wrapped around or first calculation */
2090         now = jiffies;
2091         if (!aic->jiffies || time_before(now, aic->jiffies) ||
2092             rx_pkts < aic->rx_pkts_prev ||
2093             tx_pkts < aic->tx_reqs_prev) {
2094                 be_aic_update(aic, rx_pkts, tx_pkts, now);
2095                 return aic->prev_eqd;
2096         }
2097
2098         delta = jiffies_to_msecs(now - aic->jiffies);
2099         if (delta == 0)
2100                 return aic->prev_eqd;
2101
2102         pps = (((u32)(rx_pkts - aic->rx_pkts_prev) * 1000) / delta) +
2103                 (((u32)(tx_pkts - aic->tx_reqs_prev) * 1000) / delta);
2104         eqd = (pps / 15000) << 2;
2105
2106         if (eqd < 8)
2107                 eqd = 0;
2108         eqd = min_t(u32, eqd, aic->max_eqd);
2109         eqd = max_t(u32, eqd, aic->min_eqd);
2110
2111         be_aic_update(aic, rx_pkts, tx_pkts, now);
2112
2113         return eqd;
2114 }
2115
2116 /* For Skyhawk-R only */
2117 static u32 be_get_eq_delay_mult_enc(struct be_eq_obj *eqo)
2118 {
2119         struct be_adapter *adapter = eqo->adapter;
2120         struct be_aic_obj *aic = &adapter->aic_obj[eqo->idx];
2121         ulong now = jiffies;
2122         int eqd;
2123         u32 mult_enc;
2124
2125         if (!aic->enable)
2126                 return 0;
2127
2128         if (jiffies_to_msecs(now - aic->jiffies) < 1)
2129                 eqd = aic->prev_eqd;
2130         else
2131                 eqd = be_get_new_eqd(eqo);
2132
2133         if (eqd > 100)
2134                 mult_enc = R2I_DLY_ENC_1;
2135         else if (eqd > 60)
2136                 mult_enc = R2I_DLY_ENC_2;
2137         else if (eqd > 20)
2138                 mult_enc = R2I_DLY_ENC_3;
2139         else
2140                 mult_enc = R2I_DLY_ENC_0;
2141
2142         aic->prev_eqd = eqd;
2143
2144         return mult_enc;
2145 }
2146
2147 void be_eqd_update(struct be_adapter *adapter, bool force_update)
2148 {
2149         struct be_set_eqd set_eqd[MAX_EVT_QS];
2150         struct be_aic_obj *aic;
2151         struct be_eq_obj *eqo;
2152         int i, num = 0, eqd;
2153
2154         for_all_evt_queues(adapter, eqo, i) {
2155                 aic = &adapter->aic_obj[eqo->idx];
2156                 eqd = be_get_new_eqd(eqo);
2157                 if (force_update || eqd != aic->prev_eqd) {
2158                         set_eqd[num].delay_multiplier = (eqd * 65)/100;
2159                         set_eqd[num].eq_id = eqo->q.id;
2160                         aic->prev_eqd = eqd;
2161                         num++;
2162                 }
2163         }
2164
2165         if (num)
2166                 be_cmd_modify_eqd(adapter, set_eqd, num);
2167 }
2168
2169 static void be_rx_stats_update(struct be_rx_obj *rxo,
2170                                struct be_rx_compl_info *rxcp)
2171 {
2172         struct be_rx_stats *stats = rx_stats(rxo);
2173
2174         u64_stats_update_begin(&stats->sync);
2175         stats->rx_compl++;
2176         stats->rx_bytes += rxcp->pkt_size;
2177         stats->rx_pkts++;
2178         if (rxcp->tunneled)
2179                 stats->rx_vxlan_offload_pkts++;
2180         if (rxcp->pkt_type == BE_MULTICAST_PACKET)
2181                 stats->rx_mcast_pkts++;
2182         if (rxcp->err)
2183                 stats->rx_compl_err++;
2184         u64_stats_update_end(&stats->sync);
2185 }
2186
2187 static inline bool csum_passed(struct be_rx_compl_info *rxcp)
2188 {
2189         /* L4 checksum is not reliable for non TCP/UDP packets.
2190          * Also ignore ipcksm for ipv6 pkts
2191          */
2192         return (rxcp->tcpf || rxcp->udpf) && rxcp->l4_csum &&
2193                 (rxcp->ip_csum || rxcp->ipv6) && !rxcp->err;
2194 }
2195
2196 static struct be_rx_page_info *get_rx_page_info(struct be_rx_obj *rxo)
2197 {
2198         struct be_adapter *adapter = rxo->adapter;
2199         struct be_rx_page_info *rx_page_info;
2200         struct be_queue_info *rxq = &rxo->q;
2201         u32 frag_idx = rxq->tail;
2202
2203         rx_page_info = &rxo->page_info_tbl[frag_idx];
2204         BUG_ON(!rx_page_info->page);
2205
2206         if (rx_page_info->last_frag) {
2207                 dma_unmap_page(&adapter->pdev->dev,
2208                                dma_unmap_addr(rx_page_info, bus),
2209                                adapter->big_page_size, DMA_FROM_DEVICE);
2210                 rx_page_info->last_frag = false;
2211         } else {
2212                 dma_sync_single_for_cpu(&adapter->pdev->dev,
2213                                         dma_unmap_addr(rx_page_info, bus),
2214                                         rx_frag_size, DMA_FROM_DEVICE);
2215         }
2216
2217         queue_tail_inc(rxq);
2218         atomic_dec(&rxq->used);
2219         return rx_page_info;
2220 }
2221
2222 /* Throwaway the data in the Rx completion */
2223 static void be_rx_compl_discard(struct be_rx_obj *rxo,
2224                                 struct be_rx_compl_info *rxcp)
2225 {
2226         struct be_rx_page_info *page_info;
2227         u16 i, num_rcvd = rxcp->num_rcvd;
2228
2229         for (i = 0; i < num_rcvd; i++) {
2230                 page_info = get_rx_page_info(rxo);
2231                 put_page(page_info->page);
2232                 memset(page_info, 0, sizeof(*page_info));
2233         }
2234 }
2235
2236 /*
2237  * skb_fill_rx_data forms a complete skb for an ether frame
2238  * indicated by rxcp.
2239  */
2240 static void skb_fill_rx_data(struct be_rx_obj *rxo, struct sk_buff *skb,
2241                              struct be_rx_compl_info *rxcp)
2242 {
2243         struct be_rx_page_info *page_info;
2244         u16 i, j;
2245         u16 hdr_len, curr_frag_len, remaining;
2246         u8 *start;
2247
2248         page_info = get_rx_page_info(rxo);
2249         start = page_address(page_info->page) + page_info->page_offset;
2250         prefetch(start);
2251
2252         /* Copy data in the first descriptor of this completion */
2253         curr_frag_len = min(rxcp->pkt_size, rx_frag_size);
2254
2255         skb->len = curr_frag_len;
2256         if (curr_frag_len <= BE_HDR_LEN) { /* tiny packet */
2257                 memcpy(skb->data, start, curr_frag_len);
2258                 /* Complete packet has now been moved to data */
2259                 put_page(page_info->page);
2260                 skb->data_len = 0;
2261                 skb->tail += curr_frag_len;
2262         } else {
2263                 hdr_len = ETH_HLEN;
2264                 memcpy(skb->data, start, hdr_len);
2265                 skb_shinfo(skb)->nr_frags = 1;
2266                 skb_frag_set_page(skb, 0, page_info->page);
2267                 skb_shinfo(skb)->frags[0].page_offset =
2268                                         page_info->page_offset + hdr_len;
2269                 skb_frag_size_set(&skb_shinfo(skb)->frags[0],
2270                                   curr_frag_len - hdr_len);
2271                 skb->data_len = curr_frag_len - hdr_len;
2272                 skb->truesize += rx_frag_size;
2273                 skb->tail += hdr_len;
2274         }
2275         page_info->page = NULL;
2276
2277         if (rxcp->pkt_size <= rx_frag_size) {
2278                 BUG_ON(rxcp->num_rcvd != 1);
2279                 return;
2280         }
2281
2282         /* More frags present for this completion */
2283         remaining = rxcp->pkt_size - curr_frag_len;
2284         for (i = 1, j = 0; i < rxcp->num_rcvd; i++) {
2285                 page_info = get_rx_page_info(rxo);
2286                 curr_frag_len = min(remaining, rx_frag_size);
2287
2288                 /* Coalesce all frags from the same physical page in one slot */
2289                 if (page_info->page_offset == 0) {
2290                         /* Fresh page */
2291                         j++;
2292                         skb_frag_set_page(skb, j, page_info->page);
2293                         skb_shinfo(skb)->frags[j].page_offset =
2294                                                         page_info->page_offset;
2295                         skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2296                         skb_shinfo(skb)->nr_frags++;
2297                 } else {
2298                         put_page(page_info->page);
2299                 }
2300
2301                 skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2302                 skb->len += curr_frag_len;
2303                 skb->data_len += curr_frag_len;
2304                 skb->truesize += rx_frag_size;
2305                 remaining -= curr_frag_len;
2306                 page_info->page = NULL;
2307         }
2308         BUG_ON(j > MAX_SKB_FRAGS);
2309 }
2310
2311 /* Process the RX completion indicated by rxcp when GRO is disabled */
2312 static void be_rx_compl_process(struct be_rx_obj *rxo, struct napi_struct *napi,
2313                                 struct be_rx_compl_info *rxcp)
2314 {
2315         struct be_adapter *adapter = rxo->adapter;
2316         struct net_device *netdev = adapter->netdev;
2317         struct sk_buff *skb;
2318
2319         skb = netdev_alloc_skb_ip_align(netdev, BE_RX_SKB_ALLOC_SIZE);
2320         if (unlikely(!skb)) {
2321                 rx_stats(rxo)->rx_drops_no_skbs++;
2322                 be_rx_compl_discard(rxo, rxcp);
2323                 return;
2324         }
2325
2326         skb_fill_rx_data(rxo, skb, rxcp);
2327
2328         if (likely((netdev->features & NETIF_F_RXCSUM) && csum_passed(rxcp)))
2329                 skb->ip_summed = CHECKSUM_UNNECESSARY;
2330         else
2331                 skb_checksum_none_assert(skb);
2332
2333         skb->protocol = eth_type_trans(skb, netdev);
2334         skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2335         if (netdev->features & NETIF_F_RXHASH)
2336                 skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2337
2338         skb->csum_level = rxcp->tunneled;
2339         skb_mark_napi_id(skb, napi);
2340
2341         if (rxcp->vlanf)
2342                 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2343
2344         netif_receive_skb(skb);
2345 }
2346
2347 /* Process the RX completion indicated by rxcp when GRO is enabled */
2348 static void be_rx_compl_process_gro(struct be_rx_obj *rxo,
2349                                     struct napi_struct *napi,
2350                                     struct be_rx_compl_info *rxcp)
2351 {
2352         struct be_adapter *adapter = rxo->adapter;
2353         struct be_rx_page_info *page_info;
2354         struct sk_buff *skb = NULL;
2355         u16 remaining, curr_frag_len;
2356         u16 i, j;
2357
2358         skb = napi_get_frags(napi);
2359         if (!skb) {
2360                 be_rx_compl_discard(rxo, rxcp);
2361                 return;
2362         }
2363
2364         remaining = rxcp->pkt_size;
2365         for (i = 0, j = -1; i < rxcp->num_rcvd; i++) {
2366                 page_info = get_rx_page_info(rxo);
2367
2368                 curr_frag_len = min(remaining, rx_frag_size);
2369
2370                 /* Coalesce all frags from the same physical page in one slot */
2371                 if (i == 0 || page_info->page_offset == 0) {
2372                         /* First frag or Fresh page */
2373                         j++;
2374                         skb_frag_set_page(skb, j, page_info->page);
2375                         skb_shinfo(skb)->frags[j].page_offset =
2376                                                         page_info->page_offset;
2377                         skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2378                 } else {
2379                         put_page(page_info->page);
2380                 }
2381                 skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2382                 skb->truesize += rx_frag_size;
2383                 remaining -= curr_frag_len;
2384                 memset(page_info, 0, sizeof(*page_info));
2385         }
2386         BUG_ON(j > MAX_SKB_FRAGS);
2387
2388         skb_shinfo(skb)->nr_frags = j + 1;
2389         skb->len = rxcp->pkt_size;
2390         skb->data_len = rxcp->pkt_size;
2391         skb->ip_summed = CHECKSUM_UNNECESSARY;
2392         skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2393         if (adapter->netdev->features & NETIF_F_RXHASH)
2394                 skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2395
2396         skb->csum_level = rxcp->tunneled;
2397
2398         if (rxcp->vlanf)
2399                 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2400
2401         napi_gro_frags(napi);
2402 }
2403
2404 static void be_parse_rx_compl_v1(struct be_eth_rx_compl *compl,
2405                                  struct be_rx_compl_info *rxcp)
2406 {
2407         rxcp->pkt_size = GET_RX_COMPL_V1_BITS(pktsize, compl);
2408         rxcp->vlanf = GET_RX_COMPL_V1_BITS(vtp, compl);
2409         rxcp->err = GET_RX_COMPL_V1_BITS(err, compl);
2410         rxcp->tcpf = GET_RX_COMPL_V1_BITS(tcpf, compl);
2411         rxcp->udpf = GET_RX_COMPL_V1_BITS(udpf, compl);
2412         rxcp->ip_csum = GET_RX_COMPL_V1_BITS(ipcksm, compl);
2413         rxcp->l4_csum = GET_RX_COMPL_V1_BITS(l4_cksm, compl);
2414         rxcp->ipv6 = GET_RX_COMPL_V1_BITS(ip_version, compl);
2415         rxcp->num_rcvd = GET_RX_COMPL_V1_BITS(numfrags, compl);
2416         rxcp->pkt_type = GET_RX_COMPL_V1_BITS(cast_enc, compl);
2417         rxcp->rss_hash = GET_RX_COMPL_V1_BITS(rsshash, compl);
2418         if (rxcp->vlanf) {
2419                 rxcp->qnq = GET_RX_COMPL_V1_BITS(qnq, compl);
2420                 rxcp->vlan_tag = GET_RX_COMPL_V1_BITS(vlan_tag, compl);
2421         }
2422         rxcp->port = GET_RX_COMPL_V1_BITS(port, compl);
2423         rxcp->tunneled =
2424                 GET_RX_COMPL_V1_BITS(tunneled, compl);
2425 }
2426
2427 static void be_parse_rx_compl_v0(struct be_eth_rx_compl *compl,
2428                                  struct be_rx_compl_info *rxcp)
2429 {
2430         rxcp->pkt_size = GET_RX_COMPL_V0_BITS(pktsize, compl);
2431         rxcp->vlanf = GET_RX_COMPL_V0_BITS(vtp, compl);
2432         rxcp->err = GET_RX_COMPL_V0_BITS(err, compl);
2433         rxcp->tcpf = GET_RX_COMPL_V0_BITS(tcpf, compl);
2434         rxcp->udpf = GET_RX_COMPL_V0_BITS(udpf, compl);
2435         rxcp->ip_csum = GET_RX_COMPL_V0_BITS(ipcksm, compl);
2436         rxcp->l4_csum = GET_RX_COMPL_V0_BITS(l4_cksm, compl);
2437         rxcp->ipv6 = GET_RX_COMPL_V0_BITS(ip_version, compl);
2438         rxcp->num_rcvd = GET_RX_COMPL_V0_BITS(numfrags, compl);
2439         rxcp->pkt_type = GET_RX_COMPL_V0_BITS(cast_enc, compl);
2440         rxcp->rss_hash = GET_RX_COMPL_V0_BITS(rsshash, compl);
2441         if (rxcp->vlanf) {
2442                 rxcp->qnq = GET_RX_COMPL_V0_BITS(qnq, compl);
2443                 rxcp->vlan_tag = GET_RX_COMPL_V0_BITS(vlan_tag, compl);
2444         }
2445         rxcp->port = GET_RX_COMPL_V0_BITS(port, compl);
2446         rxcp->ip_frag = GET_RX_COMPL_V0_BITS(ip_frag, compl);
2447 }
2448
2449 static struct be_rx_compl_info *be_rx_compl_get(struct be_rx_obj *rxo)
2450 {
2451         struct be_eth_rx_compl *compl = queue_tail_node(&rxo->cq);
2452         struct be_rx_compl_info *rxcp = &rxo->rxcp;
2453         struct be_adapter *adapter = rxo->adapter;
2454
2455         /* For checking the valid bit it is Ok to use either definition as the
2456          * valid bit is at the same position in both v0 and v1 Rx compl */
2457         if (compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] == 0)
2458                 return NULL;
2459
2460         rmb();
2461         be_dws_le_to_cpu(compl, sizeof(*compl));
2462
2463         if (adapter->be3_native)
2464                 be_parse_rx_compl_v1(compl, rxcp);
2465         else
2466                 be_parse_rx_compl_v0(compl, rxcp);
2467
2468         if (rxcp->ip_frag)
2469                 rxcp->l4_csum = 0;
2470
2471         if (rxcp->vlanf) {
2472                 /* In QNQ modes, if qnq bit is not set, then the packet was
2473                  * tagged only with the transparent outer vlan-tag and must
2474                  * not be treated as a vlan packet by host
2475                  */
2476                 if (be_is_qnq_mode(adapter) && !rxcp->qnq)
2477                         rxcp->vlanf = 0;
2478
2479                 if (!lancer_chip(adapter))
2480                         rxcp->vlan_tag = swab16(rxcp->vlan_tag);
2481
2482                 if (adapter->pvid == (rxcp->vlan_tag & VLAN_VID_MASK) &&
2483                     !test_bit(rxcp->vlan_tag, adapter->vids))
2484                         rxcp->vlanf = 0;
2485         }
2486
2487         /* As the compl has been parsed, reset it; we wont touch it again */
2488         compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] = 0;
2489
2490         queue_tail_inc(&rxo->cq);
2491         return rxcp;
2492 }
2493
2494 static inline struct page *be_alloc_pages(u32 size, gfp_t gfp)
2495 {
2496         u32 order = get_order(size);
2497
2498         if (order > 0)
2499                 gfp |= __GFP_COMP;
2500         return  alloc_pages(gfp, order);
2501 }
2502
2503 /*
2504  * Allocate a page, split it to fragments of size rx_frag_size and post as
2505  * receive buffers to BE
2506  */
2507 static void be_post_rx_frags(struct be_rx_obj *rxo, gfp_t gfp, u32 frags_needed)
2508 {
2509         struct be_adapter *adapter = rxo->adapter;
2510         struct be_rx_page_info *page_info = NULL, *prev_page_info = NULL;
2511         struct be_queue_info *rxq = &rxo->q;
2512         struct page *pagep = NULL;
2513         struct device *dev = &adapter->pdev->dev;
2514         struct be_eth_rx_d *rxd;
2515         u64 page_dmaaddr = 0, frag_dmaaddr;
2516         u32 posted, page_offset = 0, notify = 0;
2517
2518         page_info = &rxo->page_info_tbl[rxq->head];
2519         for (posted = 0; posted < frags_needed && !page_info->page; posted++) {
2520                 if (!pagep) {
2521                         pagep = be_alloc_pages(adapter->big_page_size, gfp);
2522                         if (unlikely(!pagep)) {
2523                                 rx_stats(rxo)->rx_post_fail++;
2524                                 break;
2525                         }
2526                         page_dmaaddr = dma_map_page(dev, pagep, 0,
2527                                                     adapter->big_page_size,
2528                                                     DMA_FROM_DEVICE);
2529                         if (dma_mapping_error(dev, page_dmaaddr)) {
2530                                 put_page(pagep);
2531                                 pagep = NULL;
2532                                 adapter->drv_stats.dma_map_errors++;
2533                                 break;
2534                         }
2535                         page_offset = 0;
2536                 } else {
2537                         get_page(pagep);
2538                         page_offset += rx_frag_size;
2539                 }
2540                 page_info->page_offset = page_offset;
2541                 page_info->page = pagep;
2542
2543                 rxd = queue_head_node(rxq);
2544                 frag_dmaaddr = page_dmaaddr + page_info->page_offset;
2545                 rxd->fragpa_lo = cpu_to_le32(frag_dmaaddr & 0xFFFFFFFF);
2546                 rxd->fragpa_hi = cpu_to_le32(upper_32_bits(frag_dmaaddr));
2547
2548                 /* Any space left in the current big page for another frag? */
2549                 if ((page_offset + rx_frag_size + rx_frag_size) >
2550                                         adapter->big_page_size) {
2551                         pagep = NULL;
2552                         page_info->last_frag = true;
2553                         dma_unmap_addr_set(page_info, bus, page_dmaaddr);
2554                 } else {
2555                         dma_unmap_addr_set(page_info, bus, frag_dmaaddr);
2556                 }
2557
2558                 prev_page_info = page_info;
2559                 queue_head_inc(rxq);
2560                 page_info = &rxo->page_info_tbl[rxq->head];
2561         }
2562
2563         /* Mark the last frag of a page when we break out of the above loop
2564          * with no more slots available in the RXQ
2565          */
2566         if (pagep) {
2567                 prev_page_info->last_frag = true;
2568                 dma_unmap_addr_set(prev_page_info, bus, page_dmaaddr);
2569         }
2570
2571         if (posted) {
2572                 atomic_add(posted, &rxq->used);
2573                 if (rxo->rx_post_starved)
2574                         rxo->rx_post_starved = false;
2575                 do {
2576                         notify = min(MAX_NUM_POST_ERX_DB, posted);
2577                         be_rxq_notify(adapter, rxq->id, notify);
2578                         posted -= notify;
2579                 } while (posted);
2580         } else if (atomic_read(&rxq->used) == 0) {
2581                 /* Let be_worker replenish when memory is available */
2582                 rxo->rx_post_starved = true;
2583         }
2584 }
2585
2586 static struct be_tx_compl_info *be_tx_compl_get(struct be_tx_obj *txo)
2587 {
2588         struct be_queue_info *tx_cq = &txo->cq;
2589         struct be_tx_compl_info *txcp = &txo->txcp;
2590         struct be_eth_tx_compl *compl = queue_tail_node(tx_cq);
2591
2592         if (compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] == 0)
2593                 return NULL;
2594
2595         /* Ensure load ordering of valid bit dword and other dwords below */
2596         rmb();
2597         be_dws_le_to_cpu(compl, sizeof(*compl));
2598
2599         txcp->status = GET_TX_COMPL_BITS(status, compl);
2600         txcp->end_index = GET_TX_COMPL_BITS(wrb_index, compl);
2601
2602         compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] = 0;
2603         queue_tail_inc(tx_cq);
2604         return txcp;
2605 }
2606
2607 static u16 be_tx_compl_process(struct be_adapter *adapter,
2608                                struct be_tx_obj *txo, u16 last_index)
2609 {
2610         struct sk_buff **sent_skbs = txo->sent_skb_list;
2611         struct be_queue_info *txq = &txo->q;
2612         struct sk_buff *skb = NULL;
2613         bool unmap_skb_hdr = false;
2614         struct be_eth_wrb *wrb;
2615         u16 num_wrbs = 0;
2616         u32 frag_index;
2617
2618         do {
2619                 if (sent_skbs[txq->tail]) {
2620                         /* Free skb from prev req */
2621                         if (skb)
2622                                 dev_consume_skb_any(skb);
2623                         skb = sent_skbs[txq->tail];
2624                         sent_skbs[txq->tail] = NULL;
2625                         queue_tail_inc(txq);  /* skip hdr wrb */
2626                         num_wrbs++;
2627                         unmap_skb_hdr = true;
2628                 }
2629                 wrb = queue_tail_node(txq);
2630                 frag_index = txq->tail;
2631                 unmap_tx_frag(&adapter->pdev->dev, wrb,
2632                               (unmap_skb_hdr && skb_headlen(skb)));
2633                 unmap_skb_hdr = false;
2634                 queue_tail_inc(txq);
2635                 num_wrbs++;
2636         } while (frag_index != last_index);
2637         dev_consume_skb_any(skb);
2638
2639         return num_wrbs;
2640 }
2641
2642 /* Return the number of events in the event queue */
2643 static inline int events_get(struct be_eq_obj *eqo)
2644 {
2645         struct be_eq_entry *eqe;
2646         int num = 0;
2647
2648         do {
2649                 eqe = queue_tail_node(&eqo->q);
2650                 if (eqe->evt == 0)
2651                         break;
2652
2653                 rmb();
2654                 eqe->evt = 0;
2655                 num++;
2656                 queue_tail_inc(&eqo->q);
2657         } while (true);
2658
2659         return num;
2660 }
2661
2662 /* Leaves the EQ is disarmed state */
2663 static void be_eq_clean(struct be_eq_obj *eqo)
2664 {
2665         int num = events_get(eqo);
2666
2667         be_eq_notify(eqo->adapter, eqo->q.id, false, true, num, 0);
2668 }
2669
2670 /* Free posted rx buffers that were not used */
2671 static void be_rxq_clean(struct be_rx_obj *rxo)
2672 {
2673         struct be_queue_info *rxq = &rxo->q;
2674         struct be_rx_page_info *page_info;
2675
2676         while (atomic_read(&rxq->used) > 0) {
2677                 page_info = get_rx_page_info(rxo);
2678                 put_page(page_info->page);
2679                 memset(page_info, 0, sizeof(*page_info));
2680         }
2681         BUG_ON(atomic_read(&rxq->used));
2682         rxq->tail = 0;
2683         rxq->head = 0;
2684 }
2685
2686 static void be_rx_cq_clean(struct be_rx_obj *rxo)
2687 {
2688         struct be_queue_info *rx_cq = &rxo->cq;
2689         struct be_rx_compl_info *rxcp;
2690         struct be_adapter *adapter = rxo->adapter;
2691         int flush_wait = 0;
2692
2693         /* Consume pending rx completions.
2694          * Wait for the flush completion (identified by zero num_rcvd)
2695          * to arrive. Notify CQ even when there are no more CQ entries
2696          * for HW to flush partially coalesced CQ entries.
2697          * In Lancer, there is no need to wait for flush compl.
2698          */
2699         for (;;) {
2700                 rxcp = be_rx_compl_get(rxo);
2701                 if (!rxcp) {
2702                         if (lancer_chip(adapter))
2703                                 break;
2704
2705                         if (flush_wait++ > 50 ||
2706                             be_check_error(adapter,
2707                                            BE_ERROR_HW)) {
2708                                 dev_warn(&adapter->pdev->dev,
2709                                          "did not receive flush compl\n");
2710                                 break;
2711                         }
2712                         be_cq_notify(adapter, rx_cq->id, true, 0);
2713                         mdelay(1);
2714                 } else {
2715                         be_rx_compl_discard(rxo, rxcp);
2716                         be_cq_notify(adapter, rx_cq->id, false, 1);
2717                         if (rxcp->num_rcvd == 0)
2718                                 break;
2719                 }
2720         }
2721
2722         /* After cleanup, leave the CQ in unarmed state */
2723         be_cq_notify(adapter, rx_cq->id, false, 0);
2724 }
2725
2726 static void be_tx_compl_clean(struct be_adapter *adapter)
2727 {
2728         struct device *dev = &adapter->pdev->dev;
2729         u16 cmpl = 0, timeo = 0, num_wrbs = 0;
2730         struct be_tx_compl_info *txcp;
2731         struct be_queue_info *txq;
2732         u32 end_idx, notified_idx;
2733         struct be_tx_obj *txo;
2734         int i, pending_txqs;
2735
2736         /* Stop polling for compls when HW has been silent for 10ms */
2737         do {
2738                 pending_txqs = adapter->num_tx_qs;
2739
2740                 for_all_tx_queues(adapter, txo, i) {
2741                         cmpl = 0;
2742                         num_wrbs = 0;
2743                         txq = &txo->q;
2744                         while ((txcp = be_tx_compl_get(txo))) {
2745                                 num_wrbs +=
2746                                         be_tx_compl_process(adapter, txo,
2747                                                             txcp->end_index);
2748                                 cmpl++;
2749                         }
2750                         if (cmpl) {
2751                                 be_cq_notify(adapter, txo->cq.id, false, cmpl);
2752                                 atomic_sub(num_wrbs, &txq->used);
2753                                 timeo = 0;
2754                         }
2755                         if (!be_is_tx_compl_pending(txo))
2756                                 pending_txqs--;
2757                 }
2758
2759                 if (pending_txqs == 0 || ++timeo > 10 ||
2760                     be_check_error(adapter, BE_ERROR_HW))
2761                         break;
2762
2763                 mdelay(1);
2764         } while (true);
2765
2766         /* Free enqueued TX that was never notified to HW */
2767         for_all_tx_queues(adapter, txo, i) {
2768                 txq = &txo->q;
2769
2770                 if (atomic_read(&txq->used)) {
2771                         dev_info(dev, "txq%d: cleaning %d pending tx-wrbs\n",
2772                                  i, atomic_read(&txq->used));
2773                         notified_idx = txq->tail;
2774                         end_idx = txq->tail;
2775                         index_adv(&end_idx, atomic_read(&txq->used) - 1,
2776                                   txq->len);
2777                         /* Use the tx-compl process logic to handle requests
2778                          * that were not sent to the HW.
2779                          */
2780                         num_wrbs = be_tx_compl_process(adapter, txo, end_idx);
2781                         atomic_sub(num_wrbs, &txq->used);
2782                         BUG_ON(atomic_read(&txq->used));
2783                         txo->pend_wrb_cnt = 0;
2784                         /* Since hw was never notified of these requests,
2785                          * reset TXQ indices
2786                          */
2787                         txq->head = notified_idx;
2788                         txq->tail = notified_idx;
2789                 }
2790         }
2791 }
2792
2793 static void be_evt_queues_destroy(struct be_adapter *adapter)
2794 {
2795         struct be_eq_obj *eqo;
2796         int i;
2797
2798         for_all_evt_queues(adapter, eqo, i) {
2799                 if (eqo->q.created) {
2800                         be_eq_clean(eqo);
2801                         be_cmd_q_destroy(adapter, &eqo->q, QTYPE_EQ);
2802                         netif_napi_del(&eqo->napi);
2803                         free_cpumask_var(eqo->affinity_mask);
2804                 }
2805                 be_queue_free(adapter, &eqo->q);
2806         }
2807 }
2808
2809 static int be_evt_queues_create(struct be_adapter *adapter)
2810 {
2811         struct be_queue_info *eq;
2812         struct be_eq_obj *eqo;
2813         struct be_aic_obj *aic;
2814         int i, rc;
2815
2816         /* need enough EQs to service both RX and TX queues */
2817         adapter->num_evt_qs = min_t(u16, num_irqs(adapter),
2818                                     max(adapter->cfg_num_rx_irqs,
2819                                         adapter->cfg_num_tx_irqs));
2820
2821         for_all_evt_queues(adapter, eqo, i) {
2822                 int numa_node = dev_to_node(&adapter->pdev->dev);
2823
2824                 aic = &adapter->aic_obj[i];
2825                 eqo->adapter = adapter;
2826                 eqo->idx = i;
2827                 aic->max_eqd = BE_MAX_EQD;
2828                 aic->enable = true;
2829
2830                 eq = &eqo->q;
2831                 rc = be_queue_alloc(adapter, eq, EVNT_Q_LEN,
2832                                     sizeof(struct be_eq_entry));
2833                 if (rc)
2834                         return rc;
2835
2836                 rc = be_cmd_eq_create(adapter, eqo);
2837                 if (rc)
2838                         return rc;
2839
2840                 if (!zalloc_cpumask_var(&eqo->affinity_mask, GFP_KERNEL))
2841                         return -ENOMEM;
2842                 cpumask_set_cpu(cpumask_local_spread(i, numa_node),
2843                                 eqo->affinity_mask);
2844                 netif_napi_add(adapter->netdev, &eqo->napi, be_poll,
2845                                BE_NAPI_WEIGHT);
2846         }
2847         return 0;
2848 }
2849
2850 static void be_mcc_queues_destroy(struct be_adapter *adapter)
2851 {
2852         struct be_queue_info *q;
2853
2854         q = &adapter->mcc_obj.q;
2855         if (q->created)
2856                 be_cmd_q_destroy(adapter, q, QTYPE_MCCQ);
2857         be_queue_free(adapter, q);
2858
2859         q = &adapter->mcc_obj.cq;
2860         if (q->created)
2861                 be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2862         be_queue_free(adapter, q);
2863 }
2864
2865 /* Must be called only after TX qs are created as MCC shares TX EQ */
2866 static int be_mcc_queues_create(struct be_adapter *adapter)
2867 {
2868         struct be_queue_info *q, *cq;
2869
2870         cq = &adapter->mcc_obj.cq;
2871         if (be_queue_alloc(adapter, cq, MCC_CQ_LEN,
2872                            sizeof(struct be_mcc_compl)))
2873                 goto err;
2874
2875         /* Use the default EQ for MCC completions */
2876         if (be_cmd_cq_create(adapter, cq, &mcc_eqo(adapter)->q, true, 0))
2877                 goto mcc_cq_free;
2878
2879         q = &adapter->mcc_obj.q;
2880         if (be_queue_alloc(adapter, q, MCC_Q_LEN, sizeof(struct be_mcc_wrb)))
2881                 goto mcc_cq_destroy;
2882
2883         if (be_cmd_mccq_create(adapter, q, cq))
2884                 goto mcc_q_free;
2885
2886         return 0;
2887
2888 mcc_q_free:
2889         be_queue_free(adapter, q);
2890 mcc_cq_destroy:
2891         be_cmd_q_destroy(adapter, cq, QTYPE_CQ);
2892 mcc_cq_free:
2893         be_queue_free(adapter, cq);
2894 err:
2895         return -1;
2896 }
2897
2898 static void be_tx_queues_destroy(struct be_adapter *adapter)
2899 {
2900         struct be_queue_info *q;
2901         struct be_tx_obj *txo;
2902         u8 i;
2903
2904         for_all_tx_queues(adapter, txo, i) {
2905                 q = &txo->q;
2906                 if (q->created)
2907                         be_cmd_q_destroy(adapter, q, QTYPE_TXQ);
2908                 be_queue_free(adapter, q);
2909
2910                 q = &txo->cq;
2911                 if (q->created)
2912                         be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2913                 be_queue_free(adapter, q);
2914         }
2915 }
2916
2917 static int be_tx_qs_create(struct be_adapter *adapter)
2918 {
2919         struct be_queue_info *cq;
2920         struct be_tx_obj *txo;
2921         struct be_eq_obj *eqo;
2922         int status, i;
2923
2924         adapter->num_tx_qs = min(adapter->num_evt_qs, adapter->cfg_num_tx_irqs);
2925
2926         for_all_tx_queues(adapter, txo, i) {
2927                 cq = &txo->cq;
2928                 status = be_queue_alloc(adapter, cq, TX_CQ_LEN,
2929                                         sizeof(struct be_eth_tx_compl));
2930                 if (status)
2931                         return status;
2932
2933                 u64_stats_init(&txo->stats.sync);
2934                 u64_stats_init(&txo->stats.sync_compl);
2935
2936                 /* If num_evt_qs is less than num_tx_qs, then more than
2937                  * one txq share an eq
2938                  */
2939                 eqo = &adapter->eq_obj[i % adapter->num_evt_qs];
2940                 status = be_cmd_cq_create(adapter, cq, &eqo->q, false, 3);
2941                 if (status)
2942                         return status;
2943
2944                 status = be_queue_alloc(adapter, &txo->q, TX_Q_LEN,
2945                                         sizeof(struct be_eth_wrb));
2946                 if (status)
2947                         return status;
2948
2949                 status = be_cmd_txq_create(adapter, txo);
2950                 if (status)
2951                         return status;
2952
2953                 netif_set_xps_queue(adapter->netdev, eqo->affinity_mask,
2954                                     eqo->idx);
2955         }
2956
2957         dev_info(&adapter->pdev->dev, "created %d TX queue(s)\n",
2958                  adapter->num_tx_qs);
2959         return 0;
2960 }
2961
2962 static void be_rx_cqs_destroy(struct be_adapter *adapter)
2963 {
2964         struct be_queue_info *q;
2965         struct be_rx_obj *rxo;
2966         int i;
2967
2968         for_all_rx_queues(adapter, rxo, i) {
2969                 q = &rxo->cq;
2970                 if (q->created)
2971                         be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2972                 be_queue_free(adapter, q);
2973         }
2974 }
2975
2976 static int be_rx_cqs_create(struct be_adapter *adapter)
2977 {
2978         struct be_queue_info *eq, *cq;
2979         struct be_rx_obj *rxo;
2980         int rc, i;
2981
2982         adapter->num_rss_qs =
2983                         min(adapter->num_evt_qs, adapter->cfg_num_rx_irqs);
2984
2985         /* We'll use RSS only if atleast 2 RSS rings are supported. */
2986         if (adapter->num_rss_qs < 2)
2987                 adapter->num_rss_qs = 0;
2988
2989         adapter->num_rx_qs = adapter->num_rss_qs + adapter->need_def_rxq;
2990
2991         /* When the interface is not capable of RSS rings (and there is no
2992          * need to create a default RXQ) we'll still need one RXQ
2993          */
2994         if (adapter->num_rx_qs == 0)
2995                 adapter->num_rx_qs = 1;
2996
2997         adapter->big_page_size = (1 << get_order(rx_frag_size)) * PAGE_SIZE;
2998         for_all_rx_queues(adapter, rxo, i) {
2999                 rxo->adapter = adapter;
3000                 cq = &rxo->cq;
3001                 rc = be_queue_alloc(adapter, cq, RX_CQ_LEN,
3002                                     sizeof(struct be_eth_rx_compl));
3003                 if (rc)
3004                         return rc;
3005
3006                 u64_stats_init(&rxo->stats.sync);
3007                 eq = &adapter->eq_obj[i % adapter->num_evt_qs].q;
3008                 rc = be_cmd_cq_create(adapter, cq, eq, false, 3);
3009                 if (rc)
3010                         return rc;
3011         }
3012
3013         dev_info(&adapter->pdev->dev,
3014                  "created %d RX queue(s)\n", adapter->num_rx_qs);
3015         return 0;
3016 }
3017
3018 static irqreturn_t be_intx(int irq, void *dev)
3019 {
3020         struct be_eq_obj *eqo = dev;
3021         struct be_adapter *adapter = eqo->adapter;
3022         int num_evts = 0;
3023
3024         /* IRQ is not expected when NAPI is scheduled as the EQ
3025          * will not be armed.
3026          * But, this can happen on Lancer INTx where it takes
3027          * a while to de-assert INTx or in BE2 where occasionaly
3028          * an interrupt may be raised even when EQ is unarmed.
3029          * If NAPI is already scheduled, then counting & notifying
3030          * events will orphan them.
3031          */
3032         if (napi_schedule_prep(&eqo->napi)) {
3033                 num_evts = events_get(eqo);
3034                 __napi_schedule(&eqo->napi);
3035                 if (num_evts)
3036                         eqo->spurious_intr = 0;
3037         }
3038         be_eq_notify(adapter, eqo->q.id, false, true, num_evts, 0);
3039
3040         /* Return IRQ_HANDLED only for the the first spurious intr
3041          * after a valid intr to stop the kernel from branding
3042          * this irq as a bad one!
3043          */
3044         if (num_evts || eqo->spurious_intr++ == 0)
3045                 return IRQ_HANDLED;
3046         else
3047                 return IRQ_NONE;
3048 }
3049
3050 static irqreturn_t be_msix(int irq, void *dev)
3051 {
3052         struct be_eq_obj *eqo = dev;
3053
3054         be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
3055         napi_schedule(&eqo->napi);
3056         return IRQ_HANDLED;
3057 }
3058
3059 static inline bool do_gro(struct be_rx_compl_info *rxcp)
3060 {
3061         return (rxcp->tcpf && !rxcp->err && rxcp->l4_csum) ? true : false;
3062 }
3063
3064 static int be_process_rx(struct be_rx_obj *rxo, struct napi_struct *napi,
3065                          int budget, int polling)
3066 {
3067         struct be_adapter *adapter = rxo->adapter;
3068         struct be_queue_info *rx_cq = &rxo->cq;
3069         struct be_rx_compl_info *rxcp;
3070         u32 work_done;
3071         u32 frags_consumed = 0;
3072
3073         for (work_done = 0; work_done < budget; work_done++) {
3074                 rxcp = be_rx_compl_get(rxo);
3075                 if (!rxcp)
3076                         break;
3077
3078                 /* Is it a flush compl that has no data */
3079                 if (unlikely(rxcp->num_rcvd == 0))
3080                         goto loop_continue;
3081
3082                 /* Discard compl with partial DMA Lancer B0 */
3083                 if (unlikely(!rxcp->pkt_size)) {
3084                         be_rx_compl_discard(rxo, rxcp);
3085                         goto loop_continue;
3086                 }
3087
3088                 /* On BE drop pkts that arrive due to imperfect filtering in
3089                  * promiscuous mode on some skews
3090                  */
3091                 if (unlikely(rxcp->port != adapter->port_num &&
3092                              !lancer_chip(adapter))) {
3093                         be_rx_compl_discard(rxo, rxcp);
3094                         goto loop_continue;
3095                 }
3096
3097                 /* Don't do gro when we're busy_polling */
3098                 if (do_gro(rxcp) && polling != BUSY_POLLING)
3099                         be_rx_compl_process_gro(rxo, napi, rxcp);
3100                 else
3101                         be_rx_compl_process(rxo, napi, rxcp);
3102
3103 loop_continue:
3104                 frags_consumed += rxcp->num_rcvd;
3105                 be_rx_stats_update(rxo, rxcp);
3106         }
3107
3108         if (work_done) {
3109                 be_cq_notify(adapter, rx_cq->id, true, work_done);
3110
3111                 /* When an rx-obj gets into post_starved state, just
3112                  * let be_worker do the posting.
3113                  */
3114                 if (atomic_read(&rxo->q.used) < RX_FRAGS_REFILL_WM &&
3115                     !rxo->rx_post_starved)
3116                         be_post_rx_frags(rxo, GFP_ATOMIC,
3117                                          max_t(u32, MAX_RX_POST,
3118                                                frags_consumed));
3119         }
3120
3121         return work_done;
3122 }
3123
3124 static inline void be_update_tx_err(struct be_tx_obj *txo, u8 status)
3125 {
3126         switch (status) {
3127         case BE_TX_COMP_HDR_PARSE_ERR:
3128                 tx_stats(txo)->tx_hdr_parse_err++;
3129                 break;
3130         case BE_TX_COMP_NDMA_ERR:
3131                 tx_stats(txo)->tx_dma_err++;
3132                 break;
3133         case BE_TX_COMP_ACL_ERR:
3134                 tx_stats(txo)->tx_spoof_check_err++;
3135                 break;
3136         }
3137 }
3138
3139 static inline void lancer_update_tx_err(struct be_tx_obj *txo, u8 status)
3140 {
3141         switch (status) {
3142         case LANCER_TX_COMP_LSO_ERR:
3143                 tx_stats(txo)->tx_tso_err++;
3144                 break;
3145         case LANCER_TX_COMP_HSW_DROP_MAC_ERR:
3146         case LANCER_TX_COMP_HSW_DROP_VLAN_ERR:
3147                 tx_stats(txo)->tx_spoof_check_err++;
3148                 break;
3149         case LANCER_TX_COMP_QINQ_ERR:
3150                 tx_stats(txo)->tx_qinq_err++;
3151                 break;
3152         case LANCER_TX_COMP_PARITY_ERR:
3153                 tx_stats(txo)->tx_internal_parity_err++;
3154                 break;
3155         case LANCER_TX_COMP_DMA_ERR:
3156                 tx_stats(txo)->tx_dma_err++;
3157                 break;
3158         }
3159 }
3160
3161 static void be_process_tx(struct be_adapter *adapter, struct be_tx_obj *txo,
3162                           int idx)
3163 {
3164         int num_wrbs = 0, work_done = 0;
3165         struct be_tx_compl_info *txcp;
3166
3167         while ((txcp = be_tx_compl_get(txo))) {
3168                 num_wrbs += be_tx_compl_process(adapter, txo, txcp->end_index);
3169                 work_done++;
3170
3171                 if (txcp->status) {
3172                         if (lancer_chip(adapter))
3173                                 lancer_update_tx_err(txo, txcp->status);
3174                         else
3175                                 be_update_tx_err(txo, txcp->status);
3176                 }
3177         }
3178
3179         if (work_done) {
3180                 be_cq_notify(adapter, txo->cq.id, true, work_done);
3181                 atomic_sub(num_wrbs, &txo->q.used);
3182
3183                 /* As Tx wrbs have been freed up, wake up netdev queue
3184                  * if it was stopped due to lack of tx wrbs.  */
3185                 if (__netif_subqueue_stopped(adapter->netdev, idx) &&
3186                     be_can_txq_wake(txo)) {
3187                         netif_wake_subqueue(adapter->netdev, idx);
3188                 }
3189
3190                 u64_stats_update_begin(&tx_stats(txo)->sync_compl);
3191                 tx_stats(txo)->tx_compl += work_done;
3192                 u64_stats_update_end(&tx_stats(txo)->sync_compl);
3193         }
3194 }
3195
3196 #ifdef CONFIG_NET_RX_BUSY_POLL
3197 static inline bool be_lock_napi(struct be_eq_obj *eqo)
3198 {
3199         bool status = true;
3200
3201         spin_lock(&eqo->lock); /* BH is already disabled */
3202         if (eqo->state & BE_EQ_LOCKED) {
3203                 WARN_ON(eqo->state & BE_EQ_NAPI);
3204                 eqo->state |= BE_EQ_NAPI_YIELD;
3205                 status = false;
3206         } else {
3207                 eqo->state = BE_EQ_NAPI;
3208         }
3209         spin_unlock(&eqo->lock);
3210         return status;
3211 }
3212
3213 static inline void be_unlock_napi(struct be_eq_obj *eqo)
3214 {
3215         spin_lock(&eqo->lock); /* BH is already disabled */
3216
3217         WARN_ON(eqo->state & (BE_EQ_POLL | BE_EQ_NAPI_YIELD));
3218         eqo->state = BE_EQ_IDLE;
3219
3220         spin_unlock(&eqo->lock);
3221 }
3222
3223 static inline bool be_lock_busy_poll(struct be_eq_obj *eqo)
3224 {
3225         bool status = true;
3226
3227         spin_lock_bh(&eqo->lock);
3228         if (eqo->state & BE_EQ_LOCKED) {
3229                 eqo->state |= BE_EQ_POLL_YIELD;
3230                 status = false;
3231         } else {
3232                 eqo->state |= BE_EQ_POLL;
3233         }
3234         spin_unlock_bh(&eqo->lock);
3235         return status;
3236 }
3237
3238 static inline void be_unlock_busy_poll(struct be_eq_obj *eqo)
3239 {
3240         spin_lock_bh(&eqo->lock);
3241
3242         WARN_ON(eqo->state & (BE_EQ_NAPI));
3243         eqo->state = BE_EQ_IDLE;
3244
3245         spin_unlock_bh(&eqo->lock);
3246 }
3247
3248 static inline void be_enable_busy_poll(struct be_eq_obj *eqo)
3249 {
3250         spin_lock_init(&eqo->lock);
3251         eqo->state = BE_EQ_IDLE;
3252 }
3253
3254 static inline void be_disable_busy_poll(struct be_eq_obj *eqo)
3255 {
3256         local_bh_disable();
3257
3258         /* It's enough to just acquire napi lock on the eqo to stop
3259          * be_busy_poll() from processing any queueus.
3260          */
3261         while (!be_lock_napi(eqo))
3262                 mdelay(1);
3263
3264         local_bh_enable();
3265 }
3266
3267 #else /* CONFIG_NET_RX_BUSY_POLL */
3268
3269 static inline bool be_lock_napi(struct be_eq_obj *eqo)
3270 {
3271         return true;
3272 }
3273
3274 static inline void be_unlock_napi(struct be_eq_obj *eqo)
3275 {
3276 }
3277
3278 static inline bool be_lock_busy_poll(struct be_eq_obj *eqo)
3279 {
3280         return false;
3281 }
3282
3283 static inline void be_unlock_busy_poll(struct be_eq_obj *eqo)
3284 {
3285 }
3286
3287 static inline void be_enable_busy_poll(struct be_eq_obj *eqo)
3288 {
3289 }
3290
3291 static inline void be_disable_busy_poll(struct be_eq_obj *eqo)
3292 {
3293 }
3294 #endif /* CONFIG_NET_RX_BUSY_POLL */
3295
3296 int be_poll(struct napi_struct *napi, int budget)
3297 {
3298         struct be_eq_obj *eqo = container_of(napi, struct be_eq_obj, napi);
3299         struct be_adapter *adapter = eqo->adapter;
3300         int max_work = 0, work, i, num_evts;
3301         struct be_rx_obj *rxo;
3302         struct be_tx_obj *txo;
3303         u32 mult_enc = 0;
3304
3305         num_evts = events_get(eqo);
3306
3307         for_all_tx_queues_on_eq(adapter, eqo, txo, i)
3308                 be_process_tx(adapter, txo, i);
3309
3310         if (be_lock_napi(eqo)) {
3311                 /* This loop will iterate twice for EQ0 in which
3312                  * completions of the last RXQ (default one) are also processed
3313                  * For other EQs the loop iterates only once
3314                  */
3315                 for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
3316                         work = be_process_rx(rxo, napi, budget, NAPI_POLLING);
3317                         max_work = max(work, max_work);
3318                 }
3319                 be_unlock_napi(eqo);
3320         } else {
3321                 max_work = budget;
3322         }
3323
3324         if (is_mcc_eqo(eqo))
3325                 be_process_mcc(adapter);
3326
3327         if (max_work < budget) {
3328                 napi_complete(napi);
3329
3330                 /* Skyhawk EQ_DB has a provision to set the rearm to interrupt
3331                  * delay via a delay multiplier encoding value
3332                  */
3333                 if (skyhawk_chip(adapter))
3334                         mult_enc = be_get_eq_delay_mult_enc(eqo);
3335
3336                 be_eq_notify(adapter, eqo->q.id, true, false, num_evts,
3337                              mult_enc);
3338         } else {
3339                 /* As we'll continue in polling mode, count and clear events */
3340                 be_eq_notify(adapter, eqo->q.id, false, false, num_evts, 0);
3341         }
3342         return max_work;
3343 }
3344
3345 #ifdef CONFIG_NET_RX_BUSY_POLL
3346 static int be_busy_poll(struct napi_struct *napi)
3347 {
3348         struct be_eq_obj *eqo = container_of(napi, struct be_eq_obj, napi);
3349         struct be_adapter *adapter = eqo->adapter;
3350         struct be_rx_obj *rxo;
3351         int i, work = 0;
3352
3353         if (!be_lock_busy_poll(eqo))
3354                 return LL_FLUSH_BUSY;
3355
3356         for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
3357                 work = be_process_rx(rxo, napi, 4, BUSY_POLLING);
3358                 if (work)
3359                         break;
3360         }
3361
3362         be_unlock_busy_poll(eqo);
3363         return work;
3364 }
3365 #endif
3366
3367 void be_detect_error(struct be_adapter *adapter)
3368 {
3369         u32 ue_lo = 0, ue_hi = 0, ue_lo_mask = 0, ue_hi_mask = 0;
3370         u32 sliport_status = 0, sliport_err1 = 0, sliport_err2 = 0;
3371         u32 i;
3372         struct device *dev = &adapter->pdev->dev;
3373
3374         if (be_check_error(adapter, BE_ERROR_HW))
3375                 return;
3376
3377         if (lancer_chip(adapter)) {
3378                 sliport_status = ioread32(adapter->db + SLIPORT_STATUS_OFFSET);
3379                 if (sliport_status & SLIPORT_STATUS_ERR_MASK) {
3380                         be_set_error(adapter, BE_ERROR_UE);
3381                         sliport_err1 = ioread32(adapter->db +
3382                                                 SLIPORT_ERROR1_OFFSET);
3383                         sliport_err2 = ioread32(adapter->db +
3384                                                 SLIPORT_ERROR2_OFFSET);
3385                         /* Do not log error messages if its a FW reset */
3386                         if (sliport_err1 == SLIPORT_ERROR_FW_RESET1 &&
3387                             sliport_err2 == SLIPORT_ERROR_FW_RESET2) {
3388                                 dev_info(dev, "Firmware update in progress\n");
3389                         } else {
3390                                 dev_err(dev, "Error detected in the card\n");
3391                                 dev_err(dev, "ERR: sliport status 0x%x\n",
3392                                         sliport_status);
3393                                 dev_err(dev, "ERR: sliport error1 0x%x\n",
3394                                         sliport_err1);
3395                                 dev_err(dev, "ERR: sliport error2 0x%x\n",
3396                                         sliport_err2);
3397                         }
3398                 }
3399         } else {
3400                 ue_lo = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_LOW);
3401                 ue_hi = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_HIGH);
3402                 ue_lo_mask = ioread32(adapter->pcicfg +
3403                                       PCICFG_UE_STATUS_LOW_MASK);
3404                 ue_hi_mask = ioread32(adapter->pcicfg +
3405                                       PCICFG_UE_STATUS_HI_MASK);
3406
3407                 ue_lo = (ue_lo & ~ue_lo_mask);
3408                 ue_hi = (ue_hi & ~ue_hi_mask);
3409
3410                 /* On certain platforms BE hardware can indicate spurious UEs.
3411                  * Allow HW to stop working completely in case of a real UE.
3412                  * Hence not setting the hw_error for UE detection.
3413                  */
3414
3415                 if (ue_lo || ue_hi) {
3416                         dev_err(dev, "Error detected in the adapter");
3417                         if (skyhawk_chip(adapter))
3418                                 be_set_error(adapter, BE_ERROR_UE);
3419
3420                         for (i = 0; ue_lo; ue_lo >>= 1, i++) {
3421                                 if (ue_lo & 1)
3422                                         dev_err(dev, "UE: %s bit set\n",
3423                                                 ue_status_low_desc[i]);
3424                         }
3425                         for (i = 0; ue_hi; ue_hi >>= 1, i++) {
3426                                 if (ue_hi & 1)
3427                                         dev_err(dev, "UE: %s bit set\n",
3428                                                 ue_status_hi_desc[i]);
3429                         }
3430                 }
3431         }
3432 }
3433
3434 static void be_msix_disable(struct be_adapter *adapter)
3435 {
3436         if (msix_enabled(adapter)) {
3437                 pci_disable_msix(adapter->pdev);
3438                 adapter->num_msix_vec = 0;
3439                 adapter->num_msix_roce_vec = 0;
3440         }
3441 }
3442
3443 static int be_msix_enable(struct be_adapter *adapter)
3444 {
3445         unsigned int i, max_roce_eqs;
3446         struct device *dev = &adapter->pdev->dev;
3447         int num_vec;
3448
3449         /* If RoCE is supported, program the max number of vectors that
3450          * could be used for NIC and RoCE, else, just program the number
3451          * we'll use initially.
3452          */
3453         if (be_roce_supported(adapter)) {
3454                 max_roce_eqs =
3455                         be_max_func_eqs(adapter) - be_max_nic_eqs(adapter);
3456                 max_roce_eqs = min(max_roce_eqs, num_online_cpus());
3457                 num_vec = be_max_any_irqs(adapter) + max_roce_eqs;
3458         } else {
3459                 num_vec = max(adapter->cfg_num_rx_irqs,
3460                               adapter->cfg_num_tx_irqs);
3461         }
3462
3463         for (i = 0; i < num_vec; i++)
3464                 adapter->msix_entries[i].entry = i;
3465
3466         num_vec = pci_enable_msix_range(adapter->pdev, adapter->msix_entries,
3467                                         MIN_MSIX_VECTORS, num_vec);
3468         if (num_vec < 0)
3469                 goto fail;
3470
3471         if (be_roce_supported(adapter) && num_vec > MIN_MSIX_VECTORS) {
3472                 adapter->num_msix_roce_vec = num_vec / 2;
3473                 dev_info(dev, "enabled %d MSI-x vector(s) for RoCE\n",
3474                          adapter->num_msix_roce_vec);
3475         }
3476
3477         adapter->num_msix_vec = num_vec - adapter->num_msix_roce_vec;
3478
3479         dev_info(dev, "enabled %d MSI-x vector(s) for NIC\n",
3480                  adapter->num_msix_vec);
3481         return 0;
3482
3483 fail:
3484         dev_warn(dev, "MSIx enable failed\n");
3485
3486         /* INTx is not supported in VFs, so fail probe if enable_msix fails */
3487         if (be_virtfn(adapter))
3488                 return num_vec;
3489         return 0;
3490 }
3491
3492 static inline int be_msix_vec_get(struct be_adapter *adapter,
3493                                   struct be_eq_obj *eqo)
3494 {
3495         return adapter->msix_entries[eqo->msix_idx].vector;
3496 }
3497
3498 static int be_msix_register(struct be_adapter *adapter)
3499 {
3500         struct net_device *netdev = adapter->netdev;
3501         struct be_eq_obj *eqo;
3502         int status, i, vec;
3503
3504         for_all_evt_queues(adapter, eqo, i) {
3505                 sprintf(eqo->desc, "%s-q%d", netdev->name, i);
3506                 vec = be_msix_vec_get(adapter, eqo);
3507                 status = request_irq(vec, be_msix, 0, eqo->desc, eqo);
3508                 if (status)
3509                         goto err_msix;
3510
3511                 irq_set_affinity_hint(vec, eqo->affinity_mask);
3512         }
3513
3514         return 0;
3515 err_msix:
3516         for (i--; i >= 0; i--) {
3517                 eqo = &adapter->eq_obj[i];
3518                 free_irq(be_msix_vec_get(adapter, eqo), eqo);
3519         }
3520         dev_warn(&adapter->pdev->dev, "MSIX Request IRQ failed - err %d\n",
3521                  status);
3522         be_msix_disable(adapter);
3523         return status;
3524 }
3525
3526 static int be_irq_register(struct be_adapter *adapter)
3527 {
3528         struct net_device *netdev = adapter->netdev;
3529         int status;
3530
3531         if (msix_enabled(adapter)) {
3532                 status = be_msix_register(adapter);
3533                 if (status == 0)
3534                         goto done;
3535                 /* INTx is not supported for VF */
3536                 if (be_virtfn(adapter))
3537                         return status;
3538         }
3539
3540         /* INTx: only the first EQ is used */
3541         netdev->irq = adapter->pdev->irq;
3542         status = request_irq(netdev->irq, be_intx, IRQF_SHARED, netdev->name,
3543                              &adapter->eq_obj[0]);
3544         if (status) {
3545                 dev_err(&adapter->pdev->dev,
3546                         "INTx request IRQ failed - err %d\n", status);
3547                 return status;
3548         }
3549 done:
3550         adapter->isr_registered = true;
3551         return 0;
3552 }
3553
3554 static void be_irq_unregister(struct be_adapter *adapter)
3555 {
3556         struct net_device *netdev = adapter->netdev;
3557         struct be_eq_obj *eqo;
3558         int i, vec;
3559
3560         if (!adapter->isr_registered)
3561                 return;
3562
3563         /* INTx */
3564         if (!msix_enabled(adapter)) {
3565                 free_irq(netdev->irq, &adapter->eq_obj[0]);
3566                 goto done;
3567         }
3568
3569         /* MSIx */
3570         for_all_evt_queues(adapter, eqo, i) {
3571                 vec = be_msix_vec_get(adapter, eqo);
3572                 irq_set_affinity_hint(vec, NULL);
3573                 free_irq(vec, eqo);
3574         }
3575
3576 done:
3577         adapter->isr_registered = false;
3578 }
3579
3580 static void be_rx_qs_destroy(struct be_adapter *adapter)
3581 {
3582         struct rss_info *rss = &adapter->rss_info;
3583         struct be_queue_info *q;
3584         struct be_rx_obj *rxo;
3585         int i;
3586
3587         for_all_rx_queues(adapter, rxo, i) {
3588                 q = &rxo->q;
3589                 if (q->created) {
3590                         /* If RXQs are destroyed while in an "out of buffer"
3591                          * state, there is a possibility of an HW stall on
3592                          * Lancer. So, post 64 buffers to each queue to relieve
3593                          * the "out of buffer" condition.
3594                          * Make sure there's space in the RXQ before posting.
3595                          */
3596                         if (lancer_chip(adapter)) {
3597                                 be_rx_cq_clean(rxo);
3598                                 if (atomic_read(&q->used) == 0)
3599                                         be_post_rx_frags(rxo, GFP_KERNEL,
3600                                                          MAX_RX_POST);
3601                         }
3602
3603                         be_cmd_rxq_destroy(adapter, q);
3604                         be_rx_cq_clean(rxo);
3605                         be_rxq_clean(rxo);
3606                 }
3607                 be_queue_free(adapter, q);
3608         }
3609
3610         if (rss->rss_flags) {
3611                 rss->rss_flags = RSS_ENABLE_NONE;
3612                 be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3613                                   128, rss->rss_hkey);
3614         }
3615 }
3616
3617 static void be_disable_if_filters(struct be_adapter *adapter)
3618 {
3619         /* Don't delete MAC on BE3 VFs without FILTMGMT privilege  */
3620         if (!BEx_chip(adapter) || !be_virtfn(adapter) ||
3621             check_privilege(adapter, BE_PRIV_FILTMGMT))
3622                 be_dev_mac_del(adapter, adapter->pmac_id[0]);
3623
3624         be_clear_uc_list(adapter);
3625         be_clear_mc_list(adapter);
3626
3627         /* The IFACE flags are enabled in the open path and cleared
3628          * in the close path. When a VF gets detached from the host and
3629          * assigned to a VM the following happens:
3630          *      - VF's IFACE flags get cleared in the detach path
3631          *      - IFACE create is issued by the VF in the attach path
3632          * Due to a bug in the BE3/Skyhawk-R FW
3633          * (Lancer FW doesn't have the bug), the IFACE capability flags
3634          * specified along with the IFACE create cmd issued by a VF are not
3635          * honoured by FW.  As a consequence, if a *new* driver
3636          * (that enables/disables IFACE flags in open/close)
3637          * is loaded in the host and an *old* driver is * used by a VM/VF,
3638          * the IFACE gets created *without* the needed flags.
3639          * To avoid this, disable RX-filter flags only for Lancer.
3640          */
3641         if (lancer_chip(adapter)) {
3642                 be_cmd_rx_filter(adapter, BE_IF_ALL_FILT_FLAGS, OFF);
3643                 adapter->if_flags &= ~BE_IF_ALL_FILT_FLAGS;
3644         }
3645 }
3646
3647 static int be_close(struct net_device *netdev)
3648 {
3649         struct be_adapter *adapter = netdev_priv(netdev);
3650         struct be_eq_obj *eqo;
3651         int i;
3652
3653         /* This protection is needed as be_close() may be called even when the
3654          * adapter is in cleared state (after eeh perm failure)
3655          */
3656         if (!(adapter->flags & BE_FLAGS_SETUP_DONE))
3657                 return 0;
3658
3659         /* Before attempting cleanup ensure all the pending cmds in the
3660          * config_wq have finished execution
3661          */
3662         flush_workqueue(be_wq);
3663
3664         be_disable_if_filters(adapter);
3665
3666         if (adapter->flags & BE_FLAGS_NAPI_ENABLED) {
3667                 for_all_evt_queues(adapter, eqo, i) {
3668                         napi_disable(&eqo->napi);
3669                         be_disable_busy_poll(eqo);
3670                 }
3671                 adapter->flags &= ~BE_FLAGS_NAPI_ENABLED;
3672         }
3673
3674         be_async_mcc_disable(adapter);
3675
3676         /* Wait for all pending tx completions to arrive so that
3677          * all tx skbs are freed.
3678          */
3679         netif_tx_disable(netdev);
3680         be_tx_compl_clean(adapter);
3681
3682         be_rx_qs_destroy(adapter);
3683
3684         for_all_evt_queues(adapter, eqo, i) {
3685                 if (msix_enabled(adapter))
3686                         synchronize_irq(be_msix_vec_get(adapter, eqo));
3687                 else
3688                         synchronize_irq(netdev->irq);
3689                 be_eq_clean(eqo);
3690         }
3691
3692         be_irq_unregister(adapter);
3693
3694         return 0;
3695 }
3696
3697 static int be_rx_qs_create(struct be_adapter *adapter)
3698 {
3699         struct rss_info *rss = &adapter->rss_info;
3700         u8 rss_key[RSS_HASH_KEY_LEN];
3701         struct be_rx_obj *rxo;
3702         int rc, i, j;
3703
3704         for_all_rx_queues(adapter, rxo, i) {
3705                 rc = be_queue_alloc(adapter, &rxo->q, RX_Q_LEN,
3706                                     sizeof(struct be_eth_rx_d));
3707                 if (rc)
3708                         return rc;
3709         }
3710
3711         if (adapter->need_def_rxq || !adapter->num_rss_qs) {
3712                 rxo = default_rxo(adapter);
3713                 rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3714                                        rx_frag_size, adapter->if_handle,
3715                                        false, &rxo->rss_id);
3716                 if (rc)
3717                         return rc;
3718         }
3719
3720         for_all_rss_queues(adapter, rxo, i) {
3721                 rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3722                                        rx_frag_size, adapter->if_handle,
3723                                        true, &rxo->rss_id);
3724                 if (rc)
3725                         return rc;
3726         }
3727
3728         if (be_multi_rxq(adapter)) {
3729                 for (j = 0; j < RSS_INDIR_TABLE_LEN; j += adapter->num_rss_qs) {
3730                         for_all_rss_queues(adapter, rxo, i) {
3731                                 if ((j + i) >= RSS_INDIR_TABLE_LEN)
3732                                         break;
3733                                 rss->rsstable[j + i] = rxo->rss_id;
3734                                 rss->rss_queue[j + i] = i;
3735                         }
3736                 }
3737                 rss->rss_flags = RSS_ENABLE_TCP_IPV4 | RSS_ENABLE_IPV4 |
3738                         RSS_ENABLE_TCP_IPV6 | RSS_ENABLE_IPV6;
3739
3740                 if (!BEx_chip(adapter))
3741                         rss->rss_flags |= RSS_ENABLE_UDP_IPV4 |
3742                                 RSS_ENABLE_UDP_IPV6;
3743
3744                 netdev_rss_key_fill(rss_key, RSS_HASH_KEY_LEN);
3745                 rc = be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3746                                        RSS_INDIR_TABLE_LEN, rss_key);
3747                 if (rc) {
3748                         rss->rss_flags = RSS_ENABLE_NONE;
3749                         return rc;
3750                 }
3751
3752                 memcpy(rss->rss_hkey, rss_key, RSS_HASH_KEY_LEN);
3753         } else {
3754                 /* Disable RSS, if only default RX Q is created */
3755                 rss->rss_flags = RSS_ENABLE_NONE;
3756         }
3757
3758
3759         /* Post 1 less than RXQ-len to avoid head being equal to tail,
3760          * which is a queue empty condition
3761          */
3762         for_all_rx_queues(adapter, rxo, i)
3763                 be_post_rx_frags(rxo, GFP_KERNEL, RX_Q_LEN - 1);
3764
3765         return 0;
3766 }
3767
3768 static int be_enable_if_filters(struct be_adapter *adapter)
3769 {
3770         int status;
3771
3772         status = be_cmd_rx_filter(adapter, BE_IF_FILT_FLAGS_BASIC, ON);
3773         if (status)
3774                 return status;
3775
3776         /* Don't add MAC on BE3 VFs without FILTMGMT privilege */
3777         if (!BEx_chip(adapter) || !be_virtfn(adapter) ||
3778             check_privilege(adapter, BE_PRIV_FILTMGMT)) {
3779                 status = be_dev_mac_add(adapter, adapter->netdev->dev_addr);
3780                 if (status)
3781                         return status;
3782                 ether_addr_copy(adapter->dev_mac, adapter->netdev->dev_addr);
3783         }
3784
3785         if (adapter->vlans_added)
3786                 be_vid_config(adapter);
3787
3788         __be_set_rx_mode(adapter);
3789
3790         return 0;
3791 }
3792
3793 static int be_open(struct net_device *netdev)
3794 {
3795         struct be_adapter *adapter = netdev_priv(netdev);
3796         struct be_eq_obj *eqo;
3797         struct be_rx_obj *rxo;
3798         struct be_tx_obj *txo;
3799         u8 link_status;
3800         int status, i;
3801
3802         status = be_rx_qs_create(adapter);
3803         if (status)
3804                 goto err;
3805
3806         status = be_enable_if_filters(adapter);
3807         if (status)
3808                 goto err;
3809
3810         status = be_irq_register(adapter);
3811         if (status)
3812                 goto err;
3813
3814         for_all_rx_queues(adapter, rxo, i)
3815                 be_cq_notify(adapter, rxo->cq.id, true, 0);
3816
3817         for_all_tx_queues(adapter, txo, i)
3818                 be_cq_notify(adapter, txo->cq.id, true, 0);
3819
3820         be_async_mcc_enable(adapter);
3821
3822         for_all_evt_queues(adapter, eqo, i) {
3823                 napi_enable(&eqo->napi);
3824                 be_enable_busy_poll(eqo);
3825                 be_eq_notify(adapter, eqo->q.id, true, true, 0, 0);
3826         }
3827         adapter->flags |= BE_FLAGS_NAPI_ENABLED;
3828
3829         status = be_cmd_link_status_query(adapter, NULL, &link_status, 0);
3830         if (!status)
3831                 be_link_status_update(adapter, link_status);
3832
3833         netif_tx_start_all_queues(netdev);
3834         if (skyhawk_chip(adapter))
3835                 udp_tunnel_get_rx_info(netdev);
3836
3837         return 0;
3838 err:
3839         be_close(adapter->netdev);
3840         return -EIO;
3841 }
3842
3843 static void be_vf_eth_addr_generate(struct be_adapter *adapter, u8 *mac)
3844 {
3845         u32 addr;
3846
3847         addr = jhash(adapter->netdev->dev_addr, ETH_ALEN, 0);
3848
3849         mac[5] = (u8)(addr & 0xFF);
3850         mac[4] = (u8)((addr >> 8) & 0xFF);
3851         mac[3] = (u8)((addr >> 16) & 0xFF);
3852         /* Use the OUI from the current MAC address */
3853         memcpy(mac, adapter->netdev->dev_addr, 3);
3854 }
3855
3856 /*
3857  * Generate a seed MAC address from the PF MAC Address using jhash.
3858  * MAC Address for VFs are assigned incrementally starting from the seed.
3859  * These addresses are programmed in the ASIC by the PF and the VF driver
3860  * queries for the MAC address during its probe.
3861  */
3862 static int be_vf_eth_addr_config(struct be_adapter *adapter)
3863 {
3864         u32 vf;
3865         int status = 0;
3866         u8 mac[ETH_ALEN];
3867         struct be_vf_cfg *vf_cfg;
3868
3869         be_vf_eth_addr_generate(adapter, mac);
3870
3871         for_all_vfs(adapter, vf_cfg, vf) {
3872                 if (BEx_chip(adapter))
3873                         status = be_cmd_pmac_add(adapter, mac,
3874                                                  vf_cfg->if_handle,
3875                                                  &vf_cfg->pmac_id, vf + 1);
3876                 else
3877                         status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
3878                                                 vf + 1);
3879
3880                 if (status)
3881                         dev_err(&adapter->pdev->dev,
3882                                 "Mac address assignment failed for VF %d\n",
3883                                 vf);
3884                 else
3885                         memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3886
3887                 mac[5] += 1;
3888         }
3889         return status;
3890 }
3891
3892 static int be_vfs_mac_query(struct be_adapter *adapter)
3893 {
3894         int status, vf;
3895         u8 mac[ETH_ALEN];
3896         struct be_vf_cfg *vf_cfg;
3897
3898         for_all_vfs(adapter, vf_cfg, vf) {
3899                 status = be_cmd_get_active_mac(adapter, vf_cfg->pmac_id,
3900                                                mac, vf_cfg->if_handle,
3901                                                false, vf+1);
3902                 if (status)
3903                         return status;
3904                 memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3905         }
3906         return 0;
3907 }
3908
3909 static void be_vf_clear(struct be_adapter *adapter)
3910 {
3911         struct be_vf_cfg *vf_cfg;
3912         u32 vf;
3913
3914         if (pci_vfs_assigned(adapter->pdev)) {
3915                 dev_warn(&adapter->pdev->dev,
3916                          "VFs are assigned to VMs: not disabling VFs\n");
3917                 goto done;
3918         }
3919
3920         pci_disable_sriov(adapter->pdev);
3921
3922         for_all_vfs(adapter, vf_cfg, vf) {
3923                 if (BEx_chip(adapter))
3924                         be_cmd_pmac_del(adapter, vf_cfg->if_handle,
3925                                         vf_cfg->pmac_id, vf + 1);
3926                 else
3927                         be_cmd_set_mac(adapter, NULL, vf_cfg->if_handle,
3928                                        vf + 1);
3929
3930                 be_cmd_if_destroy(adapter, vf_cfg->if_handle, vf + 1);
3931         }
3932
3933         if (BE3_chip(adapter))
3934                 be_cmd_set_hsw_config(adapter, 0, 0,
3935                                       adapter->if_handle,
3936                                       PORT_FWD_TYPE_PASSTHRU, 0);
3937 done:
3938         kfree(adapter->vf_cfg);
3939         adapter->num_vfs = 0;
3940         adapter->flags &= ~BE_FLAGS_SRIOV_ENABLED;
3941 }
3942
3943 static void be_clear_queues(struct be_adapter *adapter)
3944 {
3945         be_mcc_queues_destroy(adapter);
3946         be_rx_cqs_destroy(adapter);
3947         be_tx_queues_destroy(adapter);
3948         be_evt_queues_destroy(adapter);
3949 }
3950
3951 static void be_cancel_worker(struct be_adapter *adapter)
3952 {
3953         if (adapter->flags & BE_FLAGS_WORKER_SCHEDULED) {
3954                 cancel_delayed_work_sync(&adapter->work);
3955                 adapter->flags &= ~BE_FLAGS_WORKER_SCHEDULED;
3956         }
3957 }
3958
3959 static void be_cancel_err_detection(struct be_adapter *adapter)
3960 {
3961         struct be_error_recovery *err_rec = &adapter->error_recovery;
3962
3963         if (!be_err_recovery_workq)
3964                 return;
3965
3966         if (adapter->flags & BE_FLAGS_ERR_DETECTION_SCHEDULED) {
3967                 cancel_delayed_work_sync(&err_rec->err_detection_work);
3968                 adapter->flags &= ~BE_FLAGS_ERR_DETECTION_SCHEDULED;
3969         }
3970 }
3971
3972 static void be_disable_vxlan_offloads(struct be_adapter *adapter)
3973 {
3974         struct net_device *netdev = adapter->netdev;
3975
3976         if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS)
3977                 be_cmd_manage_iface(adapter, adapter->if_handle,
3978                                     OP_CONVERT_TUNNEL_TO_NORMAL);
3979
3980         if (adapter->vxlan_port)
3981                 be_cmd_set_vxlan_port(adapter, 0);
3982
3983         adapter->flags &= ~BE_FLAGS_VXLAN_OFFLOADS;
3984         adapter->vxlan_port = 0;
3985
3986         netdev->hw_enc_features = 0;
3987         netdev->hw_features &= ~(NETIF_F_GSO_UDP_TUNNEL);
3988         netdev->features &= ~(NETIF_F_GSO_UDP_TUNNEL);
3989 }
3990
3991 static void be_calculate_vf_res(struct be_adapter *adapter, u16 num_vfs,
3992                                 struct be_resources *vft_res)
3993 {
3994         struct be_resources res = adapter->pool_res;
3995         u32 vf_if_cap_flags = res.vf_if_cap_flags;
3996         struct be_resources res_mod = {0};
3997         u16 num_vf_qs = 1;
3998
3999         /* Distribute the queue resources among the PF and it's VFs */
4000         if (num_vfs) {
4001                 /* Divide the rx queues evenly among the VFs and the PF, capped
4002                  * at VF-EQ-count. Any remainder queues belong to the PF.
4003                  */
4004                 num_vf_qs = min(SH_VF_MAX_NIC_EQS,
4005                                 res.max_rss_qs / (num_vfs + 1));
4006
4007                 /* Skyhawk-R chip supports only MAX_PORT_RSS_TABLES
4008                  * RSS Tables per port. Provide RSS on VFs, only if number of
4009                  * VFs requested is less than it's PF Pool's RSS Tables limit.
4010                  */
4011                 if (num_vfs >= be_max_pf_pool_rss_tables(adapter))
4012                         num_vf_qs = 1;
4013         }
4014
4015         /* Resource with fields set to all '1's by GET_PROFILE_CONFIG cmd,
4016          * which are modifiable using SET_PROFILE_CONFIG cmd.
4017          */
4018         be_cmd_get_profile_config(adapter, &res_mod, NULL, ACTIVE_PROFILE_TYPE,
4019                                   RESOURCE_MODIFIABLE, 0);
4020
4021         /* If RSS IFACE capability flags are modifiable for a VF, set the
4022          * capability flag as valid and set RSS and DEFQ_RSS IFACE flags if
4023          * more than 1 RSSQ is available for a VF.
4024          * Otherwise, provision only 1 queue pair for VF.
4025          */
4026         if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_RSS) {
4027                 vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
4028                 if (num_vf_qs > 1) {
4029                         vf_if_cap_flags |= BE_IF_FLAGS_RSS;
4030                         if (res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS)
4031                                 vf_if_cap_flags |= BE_IF_FLAGS_DEFQ_RSS;
4032                 } else {
4033                         vf_if_cap_flags &= ~(BE_IF_FLAGS_RSS |
4034                                              BE_IF_FLAGS_DEFQ_RSS);
4035                 }
4036         } else {
4037                 num_vf_qs = 1;
4038         }
4039
4040         if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
4041                 vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
4042                 vf_if_cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4043         }
4044
4045         vft_res->vf_if_cap_flags = vf_if_cap_flags;
4046         vft_res->max_rx_qs = num_vf_qs;
4047         vft_res->max_rss_qs = num_vf_qs;
4048         vft_res->max_tx_qs = res.max_tx_qs / (num_vfs + 1);
4049         vft_res->max_cq_count = res.max_cq_count / (num_vfs + 1);
4050
4051         /* Distribute unicast MACs, VLANs, IFACE count and MCCQ count equally
4052          * among the PF and it's VFs, if the fields are changeable
4053          */
4054         if (res_mod.max_uc_mac == FIELD_MODIFIABLE)
4055                 vft_res->max_uc_mac = res.max_uc_mac / (num_vfs + 1);
4056
4057         if (res_mod.max_vlans == FIELD_MODIFIABLE)
4058                 vft_res->max_vlans = res.max_vlans / (num_vfs + 1);
4059
4060         if (res_mod.max_iface_count == FIELD_MODIFIABLE)
4061                 vft_res->max_iface_count = res.max_iface_count / (num_vfs + 1);
4062
4063         if (res_mod.max_mcc_count == FIELD_MODIFIABLE)
4064                 vft_res->max_mcc_count = res.max_mcc_count / (num_vfs + 1);
4065 }
4066
4067 static void be_if_destroy(struct be_adapter *adapter)
4068 {
4069         be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4070
4071         kfree(adapter->pmac_id);
4072         adapter->pmac_id = NULL;
4073
4074         kfree(adapter->mc_list);
4075         adapter->mc_list = NULL;
4076
4077         kfree(adapter->uc_list);
4078         adapter->uc_list = NULL;
4079 }
4080
4081 static int be_clear(struct be_adapter *adapter)
4082 {
4083         struct pci_dev *pdev = adapter->pdev;
4084         struct  be_resources vft_res = {0};
4085
4086         be_cancel_worker(adapter);
4087
4088         flush_workqueue(be_wq);
4089
4090         if (sriov_enabled(adapter))
4091                 be_vf_clear(adapter);
4092
4093         /* Re-configure FW to distribute resources evenly across max-supported
4094          * number of VFs, only when VFs are not already enabled.
4095          */
4096         if (skyhawk_chip(adapter) && be_physfn(adapter) &&
4097             !pci_vfs_assigned(pdev)) {
4098                 be_calculate_vf_res(adapter,
4099                                     pci_sriov_get_totalvfs(pdev),
4100                                     &vft_res);
4101                 be_cmd_set_sriov_config(adapter, adapter->pool_res,
4102                                         pci_sriov_get_totalvfs(pdev),
4103                                         &vft_res);
4104         }
4105
4106         be_disable_vxlan_offloads(adapter);
4107
4108         be_if_destroy(adapter);
4109
4110         be_clear_queues(adapter);
4111
4112         be_msix_disable(adapter);
4113         adapter->flags &= ~BE_FLAGS_SETUP_DONE;
4114         return 0;
4115 }
4116
4117 static int be_vfs_if_create(struct be_adapter *adapter)
4118 {
4119         struct be_resources res = {0};
4120         u32 cap_flags, en_flags, vf;
4121         struct be_vf_cfg *vf_cfg;
4122         int status;
4123
4124         /* If a FW profile exists, then cap_flags are updated */
4125         cap_flags = BE_VF_IF_EN_FLAGS;
4126
4127         for_all_vfs(adapter, vf_cfg, vf) {
4128                 if (!BE3_chip(adapter)) {
4129                         status = be_cmd_get_profile_config(adapter, &res, NULL,
4130                                                            ACTIVE_PROFILE_TYPE,
4131                                                            RESOURCE_LIMITS,
4132                                                            vf + 1);
4133                         if (!status) {
4134                                 cap_flags = res.if_cap_flags;
4135                                 /* Prevent VFs from enabling VLAN promiscuous
4136                                  * mode
4137                                  */
4138                                 cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4139                         }
4140                 }
4141
4142                 /* PF should enable IF flags during proxy if_create call */
4143                 en_flags = cap_flags & BE_VF_IF_EN_FLAGS;
4144                 status = be_cmd_if_create(adapter, cap_flags, en_flags,
4145                                           &vf_cfg->if_handle, vf + 1);
4146                 if (status)
4147                         return status;
4148         }
4149
4150         return 0;
4151 }
4152
4153 static int be_vf_setup_init(struct be_adapter *adapter)
4154 {
4155         struct be_vf_cfg *vf_cfg;
4156         int vf;
4157
4158         adapter->vf_cfg = kcalloc(adapter->num_vfs, sizeof(*vf_cfg),
4159                                   GFP_KERNEL);
4160         if (!adapter->vf_cfg)
4161                 return -ENOMEM;
4162
4163         for_all_vfs(adapter, vf_cfg, vf) {
4164                 vf_cfg->if_handle = -1;
4165                 vf_cfg->pmac_id = -1;
4166         }
4167         return 0;
4168 }
4169
4170 static int be_vf_setup(struct be_adapter *adapter)
4171 {
4172         struct device *dev = &adapter->pdev->dev;
4173         struct be_vf_cfg *vf_cfg;
4174         int status, old_vfs, vf;
4175         bool spoofchk;
4176
4177         old_vfs = pci_num_vf(adapter->pdev);
4178
4179         status = be_vf_setup_init(adapter);
4180         if (status)
4181                 goto err;
4182
4183         if (old_vfs) {
4184                 for_all_vfs(adapter, vf_cfg, vf) {
4185                         status = be_cmd_get_if_id(adapter, vf_cfg, vf);
4186                         if (status)
4187                                 goto err;
4188                 }
4189
4190                 status = be_vfs_mac_query(adapter);
4191                 if (status)
4192                         goto err;
4193         } else {
4194                 status = be_vfs_if_create(adapter);
4195                 if (status)
4196                         goto err;
4197
4198                 status = be_vf_eth_addr_config(adapter);
4199                 if (status)
4200                         goto err;
4201         }
4202
4203         for_all_vfs(adapter, vf_cfg, vf) {
4204                 /* Allow VFs to programs MAC/VLAN filters */
4205                 status = be_cmd_get_fn_privileges(adapter, &vf_cfg->privileges,
4206                                                   vf + 1);
4207                 if (!status && !(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
4208                         status = be_cmd_set_fn_privileges(adapter,
4209                                                           vf_cfg->privileges |
4210                                                           BE_PRIV_FILTMGMT,
4211                                                           vf + 1);
4212                         if (!status) {
4213                                 vf_cfg->privileges |= BE_PRIV_FILTMGMT;
4214                                 dev_info(dev, "VF%d has FILTMGMT privilege\n",
4215                                          vf);
4216                         }
4217                 }
4218
4219                 /* Allow full available bandwidth */
4220                 if (!old_vfs)
4221                         be_cmd_config_qos(adapter, 0, 0, vf + 1);
4222
4223                 status = be_cmd_get_hsw_config(adapter, NULL, vf + 1,
4224                                                vf_cfg->if_handle, NULL,
4225                                                &spoofchk);
4226                 if (!status)
4227                         vf_cfg->spoofchk = spoofchk;
4228
4229                 if (!old_vfs) {
4230                         be_cmd_enable_vf(adapter, vf + 1);
4231                         be_cmd_set_logical_link_config(adapter,
4232                                                        IFLA_VF_LINK_STATE_AUTO,
4233                                                        vf+1);
4234                 }
4235         }
4236
4237         if (!old_vfs) {
4238                 status = pci_enable_sriov(adapter->pdev, adapter->num_vfs);
4239                 if (status) {
4240                         dev_err(dev, "SRIOV enable failed\n");
4241                         adapter->num_vfs = 0;
4242                         goto err;
4243                 }
4244         }
4245
4246         if (BE3_chip(adapter)) {
4247                 /* On BE3, enable VEB only when SRIOV is enabled */
4248                 status = be_cmd_set_hsw_config(adapter, 0, 0,
4249                                                adapter->if_handle,
4250                                                PORT_FWD_TYPE_VEB, 0);
4251                 if (status)
4252                         goto err;
4253         }
4254
4255         adapter->flags |= BE_FLAGS_SRIOV_ENABLED;
4256         return 0;
4257 err:
4258         dev_err(dev, "VF setup failed\n");
4259         be_vf_clear(adapter);
4260         return status;
4261 }
4262
4263 /* Converting function_mode bits on BE3 to SH mc_type enums */
4264
4265 static u8 be_convert_mc_type(u32 function_mode)
4266 {
4267         if (function_mode & VNIC_MODE && function_mode & QNQ_MODE)
4268                 return vNIC1;
4269         else if (function_mode & QNQ_MODE)
4270                 return FLEX10;
4271         else if (function_mode & VNIC_MODE)
4272                 return vNIC2;
4273         else if (function_mode & UMC_ENABLED)
4274                 return UMC;
4275         else
4276                 return MC_NONE;
4277 }
4278
4279 /* On BE2/BE3 FW does not suggest the supported limits */
4280 static void BEx_get_resources(struct be_adapter *adapter,
4281                               struct be_resources *res)
4282 {
4283         bool use_sriov = adapter->num_vfs ? 1 : 0;
4284
4285         if (be_physfn(adapter))
4286                 res->max_uc_mac = BE_UC_PMAC_COUNT;
4287         else
4288                 res->max_uc_mac = BE_VF_UC_PMAC_COUNT;
4289
4290         adapter->mc_type = be_convert_mc_type(adapter->function_mode);
4291
4292         if (be_is_mc(adapter)) {
4293                 /* Assuming that there are 4 channels per port,
4294                  * when multi-channel is enabled
4295                  */
4296                 if (be_is_qnq_mode(adapter))
4297                         res->max_vlans = BE_NUM_VLANS_SUPPORTED/8;
4298                 else
4299                         /* In a non-qnq multichannel mode, the pvid
4300                          * takes up one vlan entry
4301                          */
4302                         res->max_vlans = (BE_NUM_VLANS_SUPPORTED / 4) - 1;
4303         } else {
4304                 res->max_vlans = BE_NUM_VLANS_SUPPORTED;
4305         }
4306
4307         res->max_mcast_mac = BE_MAX_MC;
4308
4309         /* 1) For BE3 1Gb ports, FW does not support multiple TXQs
4310          * 2) Create multiple TX rings on a BE3-R multi-channel interface
4311          *    *only* if it is RSS-capable.
4312          */
4313         if (BE2_chip(adapter) || use_sriov ||  (adapter->port_num > 1) ||
4314             be_virtfn(adapter) ||
4315             (be_is_mc(adapter) &&
4316              !(adapter->function_caps & BE_FUNCTION_CAPS_RSS))) {
4317                 res->max_tx_qs = 1;
4318         } else if (adapter->function_caps & BE_FUNCTION_CAPS_SUPER_NIC) {
4319                 struct be_resources super_nic_res = {0};
4320
4321                 /* On a SuperNIC profile, the driver needs to use the
4322                  * GET_PROFILE_CONFIG cmd to query the per-function TXQ limits
4323                  */
4324                 be_cmd_get_profile_config(adapter, &super_nic_res, NULL,
4325                                           ACTIVE_PROFILE_TYPE, RESOURCE_LIMITS,
4326                                           0);
4327                 /* Some old versions of BE3 FW don't report max_tx_qs value */
4328                 res->max_tx_qs = super_nic_res.max_tx_qs ? : BE3_MAX_TX_QS;
4329         } else {
4330                 res->max_tx_qs = BE3_MAX_TX_QS;
4331         }
4332
4333         if ((adapter->function_caps & BE_FUNCTION_CAPS_RSS) &&
4334             !use_sriov && be_physfn(adapter))
4335                 res->max_rss_qs = (adapter->be3_native) ?
4336                                            BE3_MAX_RSS_QS : BE2_MAX_RSS_QS;
4337         res->max_rx_qs = res->max_rss_qs + 1;
4338
4339         if (be_physfn(adapter))
4340                 res->max_evt_qs = (be_max_vfs(adapter) > 0) ?
4341                                         BE3_SRIOV_MAX_EVT_QS : BE3_MAX_EVT_QS;
4342         else
4343                 res->max_evt_qs = 1;
4344
4345         res->if_cap_flags = BE_IF_CAP_FLAGS_WANT;
4346         res->if_cap_flags &= ~BE_IF_FLAGS_DEFQ_RSS;
4347         if (!(adapter->function_caps & BE_FUNCTION_CAPS_RSS))
4348                 res->if_cap_flags &= ~BE_IF_FLAGS_RSS;
4349 }
4350
4351 static void be_setup_init(struct be_adapter *adapter)
4352 {
4353         adapter->vlan_prio_bmap = 0xff;
4354         adapter->phy.link_speed = -1;
4355         adapter->if_handle = -1;
4356         adapter->be3_native = false;
4357         adapter->if_flags = 0;
4358         adapter->phy_state = BE_UNKNOWN_PHY_STATE;
4359         if (be_physfn(adapter))
4360                 adapter->cmd_privileges = MAX_PRIVILEGES;
4361         else
4362                 adapter->cmd_privileges = MIN_PRIVILEGES;
4363 }
4364
4365 /* HW supports only MAX_PORT_RSS_TABLES RSS Policy Tables per port.
4366  * However, this HW limitation is not exposed to the host via any SLI cmd.
4367  * As a result, in the case of SRIOV and in particular multi-partition configs
4368  * the driver needs to calcuate a proportional share of RSS Tables per PF-pool
4369  * for distribution between the VFs. This self-imposed limit will determine the
4370  * no: of VFs for which RSS can be enabled.
4371  */
4372 static void be_calculate_pf_pool_rss_tables(struct be_adapter *adapter)
4373 {
4374         struct be_port_resources port_res = {0};
4375         u8 rss_tables_on_port;
4376         u16 max_vfs = be_max_vfs(adapter);
4377
4378         be_cmd_get_profile_config(adapter, NULL, &port_res, SAVED_PROFILE_TYPE,
4379                                   RESOURCE_LIMITS, 0);
4380
4381         rss_tables_on_port = MAX_PORT_RSS_TABLES - port_res.nic_pfs;
4382
4383         /* Each PF Pool's RSS Tables limit =
4384          * PF's Max VFs / Total_Max_VFs on Port * RSS Tables on Port
4385          */
4386         adapter->pool_res.max_rss_tables =
4387                 max_vfs * rss_tables_on_port / port_res.max_vfs;
4388 }
4389
4390 static int be_get_sriov_config(struct be_adapter *adapter)
4391 {
4392         struct be_resources res = {0};
4393         int max_vfs, old_vfs;
4394
4395         be_cmd_get_profile_config(adapter, &res, NULL, ACTIVE_PROFILE_TYPE,
4396                                   RESOURCE_LIMITS, 0);
4397
4398         /* Some old versions of BE3 FW don't report max_vfs value */
4399         if (BE3_chip(adapter) && !res.max_vfs) {
4400                 max_vfs = pci_sriov_get_totalvfs(adapter->pdev);
4401                 res.max_vfs = max_vfs > 0 ? min(MAX_VFS, max_vfs) : 0;
4402         }
4403
4404         adapter->pool_res = res;
4405
4406         /* If during previous unload of the driver, the VFs were not disabled,
4407          * then we cannot rely on the PF POOL limits for the TotalVFs value.
4408          * Instead use the TotalVFs value stored in the pci-dev struct.
4409          */
4410         old_vfs = pci_num_vf(adapter->pdev);
4411         if (old_vfs) {
4412                 dev_info(&adapter->pdev->dev, "%d VFs are already enabled\n",
4413                          old_vfs);
4414
4415                 adapter->pool_res.max_vfs =
4416                         pci_sriov_get_totalvfs(adapter->pdev);
4417                 adapter->num_vfs = old_vfs;
4418         }
4419
4420         if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4421                 be_calculate_pf_pool_rss_tables(adapter);
4422                 dev_info(&adapter->pdev->dev,
4423                          "RSS can be enabled for all VFs if num_vfs <= %d\n",
4424                          be_max_pf_pool_rss_tables(adapter));
4425         }
4426         return 0;
4427 }
4428
4429 static void be_alloc_sriov_res(struct be_adapter *adapter)
4430 {
4431         int old_vfs = pci_num_vf(adapter->pdev);
4432         struct  be_resources vft_res = {0};
4433         int status;
4434
4435         be_get_sriov_config(adapter);
4436
4437         if (!old_vfs)
4438                 pci_sriov_set_totalvfs(adapter->pdev, be_max_vfs(adapter));
4439
4440         /* When the HW is in SRIOV capable configuration, the PF-pool
4441          * resources are given to PF during driver load, if there are no
4442          * old VFs. This facility is not available in BE3 FW.
4443          * Also, this is done by FW in Lancer chip.
4444          */
4445         if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4446                 be_calculate_vf_res(adapter, 0, &vft_res);
4447                 status = be_cmd_set_sriov_config(adapter, adapter->pool_res, 0,
4448                                                  &vft_res);
4449                 if (status)
4450                         dev_err(&adapter->pdev->dev,
4451                                 "Failed to optimize SRIOV resources\n");
4452         }
4453 }
4454
4455 static int be_get_resources(struct be_adapter *adapter)
4456 {
4457         struct device *dev = &adapter->pdev->dev;
4458         struct be_resources res = {0};
4459         int status;
4460
4461         /* For Lancer, SH etc read per-function resource limits from FW.
4462          * GET_FUNC_CONFIG returns per function guaranteed limits.
4463          * GET_PROFILE_CONFIG returns PCI-E related limits PF-pool limits
4464          */
4465         if (BEx_chip(adapter)) {
4466                 BEx_get_resources(adapter, &res);
4467         } else {
4468                 status = be_cmd_get_func_config(adapter, &res);
4469                 if (status)
4470                         return status;
4471
4472                 /* If a deafault RXQ must be created, we'll use up one RSSQ*/
4473                 if (res.max_rss_qs && res.max_rss_qs == res.max_rx_qs &&
4474                     !(res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS))
4475                         res.max_rss_qs -= 1;
4476         }
4477
4478         /* If RoCE is supported stash away half the EQs for RoCE */
4479         res.max_nic_evt_qs = be_roce_supported(adapter) ?
4480                                 res.max_evt_qs / 2 : res.max_evt_qs;
4481         adapter->res = res;
4482
4483         /* If FW supports RSS default queue, then skip creating non-RSS
4484          * queue for non-IP traffic.
4485          */
4486         adapter->need_def_rxq = (be_if_cap_flags(adapter) &
4487                                  BE_IF_FLAGS_DEFQ_RSS) ? 0 : 1;
4488
4489         dev_info(dev, "Max: txqs %d, rxqs %d, rss %d, eqs %d, vfs %d\n",
4490                  be_max_txqs(adapter), be_max_rxqs(adapter),
4491                  be_max_rss(adapter), be_max_nic_eqs(adapter),
4492                  be_max_vfs(adapter));
4493         dev_info(dev, "Max: uc-macs %d, mc-macs %d, vlans %d\n",
4494                  be_max_uc(adapter), be_max_mc(adapter),
4495                  be_max_vlans(adapter));
4496
4497         /* Ensure RX and TX queues are created in pairs at init time */
4498         adapter->cfg_num_rx_irqs =
4499                                 min_t(u16, netif_get_num_default_rss_queues(),
4500                                       be_max_qp_irqs(adapter));
4501         adapter->cfg_num_tx_irqs = adapter->cfg_num_rx_irqs;
4502         return 0;
4503 }
4504
4505 static int be_get_config(struct be_adapter *adapter)
4506 {
4507         int status, level;
4508         u16 profile_id;
4509
4510         status = be_cmd_get_cntl_attributes(adapter);
4511         if (status)
4512                 return status;
4513
4514         status = be_cmd_query_fw_cfg(adapter);
4515         if (status)
4516                 return status;
4517
4518         if (!lancer_chip(adapter) && be_physfn(adapter))
4519                 be_cmd_get_fat_dump_len(adapter, &adapter->fat_dump_len);
4520
4521         if (BEx_chip(adapter)) {
4522                 level = be_cmd_get_fw_log_level(adapter);
4523                 adapter->msg_enable =
4524                         level <= FW_LOG_LEVEL_DEFAULT ? NETIF_MSG_HW : 0;
4525         }
4526
4527         be_cmd_get_acpi_wol_cap(adapter);
4528         pci_enable_wake(adapter->pdev, PCI_D3hot, adapter->wol_en);
4529         pci_enable_wake(adapter->pdev, PCI_D3cold, adapter->wol_en);
4530
4531         be_cmd_query_port_name(adapter);
4532
4533         if (be_physfn(adapter)) {
4534                 status = be_cmd_get_active_profile(adapter, &profile_id);
4535                 if (!status)
4536                         dev_info(&adapter->pdev->dev,
4537                                  "Using profile 0x%x\n", profile_id);
4538         }
4539
4540         return 0;
4541 }
4542
4543 static int be_mac_setup(struct be_adapter *adapter)
4544 {
4545         u8 mac[ETH_ALEN];
4546         int status;
4547
4548         if (is_zero_ether_addr(adapter->netdev->dev_addr)) {
4549                 status = be_cmd_get_perm_mac(adapter, mac);
4550                 if (status)
4551                         return status;
4552
4553                 memcpy(adapter->netdev->dev_addr, mac, ETH_ALEN);
4554                 memcpy(adapter->netdev->perm_addr, mac, ETH_ALEN);
4555         }
4556
4557         return 0;
4558 }
4559
4560 static void be_schedule_worker(struct be_adapter *adapter)
4561 {
4562         queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
4563         adapter->flags |= BE_FLAGS_WORKER_SCHEDULED;
4564 }
4565
4566 static void be_destroy_err_recovery_workq(void)
4567 {
4568         if (!be_err_recovery_workq)
4569                 return;
4570
4571         flush_workqueue(be_err_recovery_workq);
4572         destroy_workqueue(be_err_recovery_workq);
4573         be_err_recovery_workq = NULL;
4574 }
4575
4576 static void be_schedule_err_detection(struct be_adapter *adapter, u32 delay)
4577 {
4578         struct be_error_recovery *err_rec = &adapter->error_recovery;
4579
4580         if (!be_err_recovery_workq)
4581                 return;
4582
4583         queue_delayed_work(be_err_recovery_workq, &err_rec->err_detection_work,
4584                            msecs_to_jiffies(delay));
4585         adapter->flags |= BE_FLAGS_ERR_DETECTION_SCHEDULED;
4586 }
4587
4588 static int be_setup_queues(struct be_adapter *adapter)
4589 {
4590         struct net_device *netdev = adapter->netdev;
4591         int status;
4592
4593         status = be_evt_queues_create(adapter);
4594         if (status)
4595                 goto err;
4596
4597         status = be_tx_qs_create(adapter);
4598         if (status)
4599                 goto err;
4600
4601         status = be_rx_cqs_create(adapter);
4602         if (status)
4603                 goto err;
4604
4605         status = be_mcc_queues_create(adapter);
4606         if (status)
4607                 goto err;
4608
4609         status = netif_set_real_num_rx_queues(netdev, adapter->num_rx_qs);
4610         if (status)
4611                 goto err;
4612
4613         status = netif_set_real_num_tx_queues(netdev, adapter->num_tx_qs);
4614         if (status)
4615                 goto err;
4616
4617         return 0;
4618 err:
4619         dev_err(&adapter->pdev->dev, "queue_setup failed\n");
4620         return status;
4621 }
4622
4623 static int be_if_create(struct be_adapter *adapter)
4624 {
4625         u32 en_flags = BE_IF_FLAGS_RSS | BE_IF_FLAGS_DEFQ_RSS;
4626         u32 cap_flags = be_if_cap_flags(adapter);
4627         int status;
4628
4629         /* alloc required memory for other filtering fields */
4630         adapter->pmac_id = kcalloc(be_max_uc(adapter),
4631                                    sizeof(*adapter->pmac_id), GFP_KERNEL);
4632         if (!adapter->pmac_id)
4633                 return -ENOMEM;
4634
4635         adapter->mc_list = kcalloc(be_max_mc(adapter),
4636                                    sizeof(*adapter->mc_list), GFP_KERNEL);
4637         if (!adapter->mc_list)
4638                 return -ENOMEM;
4639
4640         adapter->uc_list = kcalloc(be_max_uc(adapter),
4641                                    sizeof(*adapter->uc_list), GFP_KERNEL);
4642         if (!adapter->uc_list)
4643                 return -ENOMEM;
4644
4645         if (adapter->cfg_num_rx_irqs == 1)
4646                 cap_flags &= ~(BE_IF_FLAGS_DEFQ_RSS | BE_IF_FLAGS_RSS);
4647
4648         en_flags &= cap_flags;
4649         /* will enable all the needed filter flags in be_open() */
4650         status = be_cmd_if_create(adapter, be_if_cap_flags(adapter), en_flags,
4651                                   &adapter->if_handle, 0);
4652
4653         if (status)
4654                 return status;
4655
4656         return 0;
4657 }
4658
4659 int be_update_queues(struct be_adapter *adapter)
4660 {
4661         struct net_device *netdev = adapter->netdev;
4662         int status;
4663
4664         if (netif_running(netdev))
4665                 be_close(netdev);
4666
4667         be_cancel_worker(adapter);
4668
4669         /* If any vectors have been shared with RoCE we cannot re-program
4670          * the MSIx table.
4671          */
4672         if (!adapter->num_msix_roce_vec)
4673                 be_msix_disable(adapter);
4674
4675         be_clear_queues(adapter);
4676         status = be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4677         if (status)
4678                 return status;
4679
4680         if (!msix_enabled(adapter)) {
4681                 status = be_msix_enable(adapter);
4682                 if (status)
4683                         return status;
4684         }
4685
4686         status = be_if_create(adapter);
4687         if (status)
4688                 return status;
4689
4690         status = be_setup_queues(adapter);
4691         if (status)
4692                 return status;
4693
4694         be_schedule_worker(adapter);
4695
4696         if (netif_running(netdev))
4697                 status = be_open(netdev);
4698
4699         return status;
4700 }
4701
4702 static inline int fw_major_num(const char *fw_ver)
4703 {
4704         int fw_major = 0, i;
4705
4706         i = sscanf(fw_ver, "%d.", &fw_major);
4707         if (i != 1)
4708                 return 0;
4709
4710         return fw_major;
4711 }
4712
4713 /* If it is error recovery, FLR the PF
4714  * Else if any VFs are already enabled don't FLR the PF
4715  */
4716 static bool be_reset_required(struct be_adapter *adapter)
4717 {
4718         if (be_error_recovering(adapter))
4719                 return true;
4720         else
4721                 return pci_num_vf(adapter->pdev) == 0;
4722 }
4723
4724 /* Wait for the FW to be ready and perform the required initialization */
4725 static int be_func_init(struct be_adapter *adapter)
4726 {
4727         int status;
4728
4729         status = be_fw_wait_ready(adapter);
4730         if (status)
4731                 return status;
4732
4733         /* FW is now ready; clear errors to allow cmds/doorbell */
4734         be_clear_error(adapter, BE_CLEAR_ALL);
4735
4736         if (be_reset_required(adapter)) {
4737                 status = be_cmd_reset_function(adapter);
4738                 if (status)
4739                         return status;
4740
4741                 /* Wait for interrupts to quiesce after an FLR */
4742                 msleep(100);
4743         }
4744
4745         /* Tell FW we're ready to fire cmds */
4746         status = be_cmd_fw_init(adapter);
4747         if (status)
4748                 return status;
4749
4750         /* Allow interrupts for other ULPs running on NIC function */
4751         be_intr_set(adapter, true);
4752
4753         return 0;
4754 }
4755
4756 static int be_setup(struct be_adapter *adapter)
4757 {
4758         struct device *dev = &adapter->pdev->dev;
4759         int status;
4760
4761         status = be_func_init(adapter);
4762         if (status)
4763                 return status;
4764
4765         be_setup_init(adapter);
4766
4767         if (!lancer_chip(adapter))
4768                 be_cmd_req_native_mode(adapter);
4769
4770         /* invoke this cmd first to get pf_num and vf_num which are needed
4771          * for issuing profile related cmds
4772          */
4773         if (!BEx_chip(adapter)) {
4774                 status = be_cmd_get_func_config(adapter, NULL);
4775                 if (status)
4776                         return status;
4777         }
4778
4779         status = be_get_config(adapter);
4780         if (status)
4781                 goto err;
4782
4783         if (!BE2_chip(adapter) && be_physfn(adapter))
4784                 be_alloc_sriov_res(adapter);
4785
4786         status = be_get_resources(adapter);
4787         if (status)
4788                 goto err;
4789
4790         status = be_msix_enable(adapter);
4791         if (status)
4792                 goto err;
4793
4794         /* will enable all the needed filter flags in be_open() */
4795         status = be_if_create(adapter);
4796         if (status)
4797                 goto err;
4798
4799         /* Updating real_num_tx/rx_queues() requires rtnl_lock() */
4800         rtnl_lock();
4801         status = be_setup_queues(adapter);
4802         rtnl_unlock();
4803         if (status)
4804                 goto err;
4805
4806         be_cmd_get_fn_privileges(adapter, &adapter->cmd_privileges, 0);
4807
4808         status = be_mac_setup(adapter);
4809         if (status)
4810                 goto err;
4811
4812         be_cmd_get_fw_ver(adapter);
4813         dev_info(dev, "FW version is %s\n", adapter->fw_ver);
4814
4815         if (BE2_chip(adapter) && fw_major_num(adapter->fw_ver) < 4) {
4816                 dev_err(dev, "Firmware on card is old(%s), IRQs may not work",
4817                         adapter->fw_ver);
4818                 dev_err(dev, "Please upgrade firmware to version >= 4.0\n");
4819         }
4820
4821         status = be_cmd_set_flow_control(adapter, adapter->tx_fc,
4822                                          adapter->rx_fc);
4823         if (status)
4824                 be_cmd_get_flow_control(adapter, &adapter->tx_fc,
4825                                         &adapter->rx_fc);
4826
4827         dev_info(&adapter->pdev->dev, "HW Flow control - TX:%d RX:%d\n",
4828                  adapter->tx_fc, adapter->rx_fc);
4829
4830         if (be_physfn(adapter))
4831                 be_cmd_set_logical_link_config(adapter,
4832                                                IFLA_VF_LINK_STATE_AUTO, 0);
4833
4834         /* BE3 EVB echoes broadcast/multicast packets back to PF's vport
4835          * confusing a linux bridge or OVS that it might be connected to.
4836          * Set the EVB to PASSTHRU mode which effectively disables the EVB
4837          * when SRIOV is not enabled.
4838          */
4839         if (BE3_chip(adapter))
4840                 be_cmd_set_hsw_config(adapter, 0, 0, adapter->if_handle,
4841                                       PORT_FWD_TYPE_PASSTHRU, 0);
4842
4843         if (adapter->num_vfs)
4844                 be_vf_setup(adapter);
4845
4846         status = be_cmd_get_phy_info(adapter);
4847         if (!status && be_pause_supported(adapter))
4848                 adapter->phy.fc_autoneg = 1;
4849
4850         if (be_physfn(adapter) && !lancer_chip(adapter))
4851                 be_cmd_set_features(adapter);
4852
4853         be_schedule_worker(adapter);
4854         adapter->flags |= BE_FLAGS_SETUP_DONE;
4855         return 0;
4856 err:
4857         be_clear(adapter);
4858         return status;
4859 }
4860
4861 #ifdef CONFIG_NET_POLL_CONTROLLER
4862 static void be_netpoll(struct net_device *netdev)
4863 {
4864         struct be_adapter *adapter = netdev_priv(netdev);
4865         struct be_eq_obj *eqo;
4866         int i;
4867
4868         for_all_evt_queues(adapter, eqo, i) {
4869                 be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
4870                 napi_schedule(&eqo->napi);
4871         }
4872 }
4873 #endif
4874
4875 int be_load_fw(struct be_adapter *adapter, u8 *fw_file)
4876 {
4877         const struct firmware *fw;
4878         int status;
4879
4880         if (!netif_running(adapter->netdev)) {
4881                 dev_err(&adapter->pdev->dev,
4882                         "Firmware load not allowed (interface is down)\n");
4883                 return -ENETDOWN;
4884         }
4885
4886         status = request_firmware(&fw, fw_file, &adapter->pdev->dev);
4887         if (status)
4888                 goto fw_exit;
4889
4890         dev_info(&adapter->pdev->dev, "Flashing firmware file %s\n", fw_file);
4891
4892         if (lancer_chip(adapter))
4893                 status = lancer_fw_download(adapter, fw);
4894         else
4895                 status = be_fw_download(adapter, fw);
4896
4897         if (!status)
4898                 be_cmd_get_fw_ver(adapter);
4899
4900 fw_exit:
4901         release_firmware(fw);
4902         return status;
4903 }
4904
4905 static int be_ndo_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh,
4906                                  u16 flags)
4907 {
4908         struct be_adapter *adapter = netdev_priv(dev);
4909         struct nlattr *attr, *br_spec;
4910         int rem;
4911         int status = 0;
4912         u16 mode = 0;
4913
4914         if (!sriov_enabled(adapter))
4915                 return -EOPNOTSUPP;
4916
4917         br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
4918         if (!br_spec)
4919                 return -EINVAL;
4920
4921         nla_for_each_nested(attr, br_spec, rem) {
4922                 if (nla_type(attr) != IFLA_BRIDGE_MODE)
4923                         continue;
4924
4925                 if (nla_len(attr) < sizeof(mode))
4926                         return -EINVAL;
4927
4928                 mode = nla_get_u16(attr);
4929                 if (BE3_chip(adapter) && mode == BRIDGE_MODE_VEPA)
4930                         return -EOPNOTSUPP;
4931
4932                 if (mode != BRIDGE_MODE_VEPA && mode != BRIDGE_MODE_VEB)
4933                         return -EINVAL;
4934
4935                 status = be_cmd_set_hsw_config(adapter, 0, 0,
4936                                                adapter->if_handle,
4937                                                mode == BRIDGE_MODE_VEPA ?
4938                                                PORT_FWD_TYPE_VEPA :
4939                                                PORT_FWD_TYPE_VEB, 0);
4940                 if (status)
4941                         goto err;
4942
4943                 dev_info(&adapter->pdev->dev, "enabled switch mode: %s\n",
4944                          mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
4945
4946                 return status;
4947         }
4948 err:
4949         dev_err(&adapter->pdev->dev, "Failed to set switch mode %s\n",
4950                 mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
4951
4952         return status;
4953 }
4954
4955 static int be_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
4956                                  struct net_device *dev, u32 filter_mask,
4957                                  int nlflags)
4958 {
4959         struct be_adapter *adapter = netdev_priv(dev);
4960         int status = 0;
4961         u8 hsw_mode;
4962
4963         /* BE and Lancer chips support VEB mode only */
4964         if (BEx_chip(adapter) || lancer_chip(adapter)) {
4965                 /* VEB is disabled in non-SR-IOV profiles on BE3/Lancer */
4966                 if (!pci_sriov_get_totalvfs(adapter->pdev))
4967                         return 0;
4968                 hsw_mode = PORT_FWD_TYPE_VEB;
4969         } else {
4970                 status = be_cmd_get_hsw_config(adapter, NULL, 0,
4971                                                adapter->if_handle, &hsw_mode,
4972                                                NULL);
4973                 if (status)
4974                         return 0;
4975
4976                 if (hsw_mode == PORT_FWD_TYPE_PASSTHRU)
4977                         return 0;
4978         }
4979
4980         return ndo_dflt_bridge_getlink(skb, pid, seq, dev,
4981                                        hsw_mode == PORT_FWD_TYPE_VEPA ?
4982                                        BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB,
4983                                        0, 0, nlflags, filter_mask, NULL);
4984 }
4985
4986 static struct be_cmd_work *be_alloc_work(struct be_adapter *adapter,
4987                                          void (*func)(struct work_struct *))
4988 {
4989         struct be_cmd_work *work;
4990
4991         work = kzalloc(sizeof(*work), GFP_ATOMIC);
4992         if (!work) {
4993                 dev_err(&adapter->pdev->dev,
4994                         "be_work memory allocation failed\n");
4995                 return NULL;
4996         }
4997
4998         INIT_WORK(&work->work, func);
4999         work->adapter = adapter;
5000         return work;
5001 }
5002
5003 /* VxLAN offload Notes:
5004  *
5005  * The stack defines tunnel offload flags (hw_enc_features) for IP and doesn't
5006  * distinguish various types of transports (VxLAN, GRE, NVGRE ..). So, offload
5007  * is expected to work across all types of IP tunnels once exported. Skyhawk
5008  * supports offloads for either VxLAN or NVGRE, exclusively. So we export VxLAN
5009  * offloads in hw_enc_features only when a VxLAN port is added. If other (non
5010  * VxLAN) tunnels are configured while VxLAN offloads are enabled, offloads for
5011  * those other tunnels are unexported on the fly through ndo_features_check().
5012  *
5013  * Skyhawk supports VxLAN offloads only for one UDP dport. So, if the stack
5014  * adds more than one port, disable offloads and don't re-enable them again
5015  * until after all the tunnels are removed.
5016  */
5017 static void be_work_add_vxlan_port(struct work_struct *work)
5018 {
5019         struct be_cmd_work *cmd_work =
5020                                 container_of(work, struct be_cmd_work, work);
5021         struct be_adapter *adapter = cmd_work->adapter;
5022         struct net_device *netdev = adapter->netdev;
5023         struct device *dev = &adapter->pdev->dev;
5024         __be16 port = cmd_work->info.vxlan_port;
5025         int status;
5026
5027         if (adapter->vxlan_port == port && adapter->vxlan_port_count) {
5028                 adapter->vxlan_port_aliases++;
5029                 goto done;
5030         }
5031
5032         if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS) {
5033                 dev_info(dev,
5034                          "Only one UDP port supported for VxLAN offloads\n");
5035                 dev_info(dev, "Disabling VxLAN offloads\n");
5036                 adapter->vxlan_port_count++;
5037                 goto err;
5038         }
5039
5040         if (adapter->vxlan_port_count++ >= 1)
5041                 goto done;
5042
5043         status = be_cmd_manage_iface(adapter, adapter->if_handle,
5044                                      OP_CONVERT_NORMAL_TO_TUNNEL);
5045         if (status) {
5046                 dev_warn(dev, "Failed to convert normal interface to tunnel\n");
5047                 goto err;
5048         }
5049
5050         status = be_cmd_set_vxlan_port(adapter, port);
5051         if (status) {
5052                 dev_warn(dev, "Failed to add VxLAN port\n");
5053                 goto err;
5054         }
5055         adapter->flags |= BE_FLAGS_VXLAN_OFFLOADS;
5056         adapter->vxlan_port = port;
5057
5058         netdev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
5059                                    NETIF_F_TSO | NETIF_F_TSO6 |
5060                                    NETIF_F_GSO_UDP_TUNNEL;
5061         netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL;
5062         netdev->features |= NETIF_F_GSO_UDP_TUNNEL;
5063
5064         dev_info(dev, "Enabled VxLAN offloads for UDP port %d\n",
5065                  be16_to_cpu(port));
5066         goto done;
5067 err:
5068         be_disable_vxlan_offloads(adapter);
5069 done:
5070         kfree(cmd_work);
5071 }
5072
5073 static void be_work_del_vxlan_port(struct work_struct *work)
5074 {
5075         struct be_cmd_work *cmd_work =
5076                                 container_of(work, struct be_cmd_work, work);
5077         struct be_adapter *adapter = cmd_work->adapter;
5078         __be16 port = cmd_work->info.vxlan_port;
5079
5080         if (adapter->vxlan_port != port)
5081                 goto done;
5082
5083         if (adapter->vxlan_port_aliases) {
5084                 adapter->vxlan_port_aliases--;
5085                 goto out;
5086         }
5087
5088         be_disable_vxlan_offloads(adapter);
5089
5090         dev_info(&adapter->pdev->dev,
5091                  "Disabled VxLAN offloads for UDP port %d\n",
5092                  be16_to_cpu(port));
5093 done:
5094         adapter->vxlan_port_count--;
5095 out:
5096         kfree(cmd_work);
5097 }
5098
5099 static void be_cfg_vxlan_port(struct net_device *netdev,
5100                               struct udp_tunnel_info *ti,
5101                               void (*func)(struct work_struct *))
5102 {
5103         struct be_adapter *adapter = netdev_priv(netdev);
5104         struct be_cmd_work *cmd_work;
5105
5106         if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
5107                 return;
5108
5109         if (lancer_chip(adapter) || BEx_chip(adapter) || be_is_mc(adapter))
5110                 return;
5111
5112         cmd_work = be_alloc_work(adapter, func);
5113         if (cmd_work) {
5114                 cmd_work->info.vxlan_port = ti->port;
5115                 queue_work(be_wq, &cmd_work->work);
5116         }
5117 }
5118
5119 static void be_del_vxlan_port(struct net_device *netdev,
5120                               struct udp_tunnel_info *ti)
5121 {
5122         be_cfg_vxlan_port(netdev, ti, be_work_del_vxlan_port);
5123 }
5124
5125 static void be_add_vxlan_port(struct net_device *netdev,
5126                               struct udp_tunnel_info *ti)
5127 {
5128         be_cfg_vxlan_port(netdev, ti, be_work_add_vxlan_port);
5129 }
5130
5131 static netdev_features_t be_features_check(struct sk_buff *skb,
5132                                            struct net_device *dev,
5133                                            netdev_features_t features)
5134 {
5135         struct be_adapter *adapter = netdev_priv(dev);
5136         u8 l4_hdr = 0;
5137
5138         /* The code below restricts offload features for some tunneled packets.
5139          * Offload features for normal (non tunnel) packets are unchanged.
5140          */
5141         if (!skb->encapsulation ||
5142             !(adapter->flags & BE_FLAGS_VXLAN_OFFLOADS))
5143                 return features;
5144
5145         /* It's an encapsulated packet and VxLAN offloads are enabled. We
5146          * should disable tunnel offload features if it's not a VxLAN packet,
5147          * as tunnel offloads have been enabled only for VxLAN. This is done to
5148          * allow other tunneled traffic like GRE work fine while VxLAN
5149          * offloads are configured in Skyhawk-R.
5150          */
5151         switch (vlan_get_protocol(skb)) {
5152         case htons(ETH_P_IP):
5153                 l4_hdr = ip_hdr(skb)->protocol;
5154                 break;
5155         case htons(ETH_P_IPV6):
5156                 l4_hdr = ipv6_hdr(skb)->nexthdr;
5157                 break;
5158         default:
5159                 return features;
5160         }
5161
5162         if (l4_hdr != IPPROTO_UDP ||
5163             skb->inner_protocol_type != ENCAP_TYPE_ETHER ||
5164             skb->inner_protocol != htons(ETH_P_TEB) ||
5165             skb_inner_mac_header(skb) - skb_transport_header(skb) !=
5166                 sizeof(struct udphdr) + sizeof(struct vxlanhdr) ||
5167             !adapter->vxlan_port ||
5168             udp_hdr(skb)->dest != adapter->vxlan_port)
5169                 return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
5170
5171         return features;
5172 }
5173
5174 static int be_get_phys_port_id(struct net_device *dev,
5175                                struct netdev_phys_item_id *ppid)
5176 {
5177         int i, id_len = CNTL_SERIAL_NUM_WORDS * CNTL_SERIAL_NUM_WORD_SZ + 1;
5178         struct be_adapter *adapter = netdev_priv(dev);
5179         u8 *id;
5180
5181         if (MAX_PHYS_ITEM_ID_LEN < id_len)
5182                 return -ENOSPC;
5183
5184         ppid->id[0] = adapter->hba_port_num + 1;
5185         id = &ppid->id[1];
5186         for (i = CNTL_SERIAL_NUM_WORDS - 1; i >= 0;
5187              i--, id += CNTL_SERIAL_NUM_WORD_SZ)
5188                 memcpy(id, &adapter->serial_num[i], CNTL_SERIAL_NUM_WORD_SZ);
5189
5190         ppid->id_len = id_len;
5191
5192         return 0;
5193 }
5194
5195 static void be_set_rx_mode(struct net_device *dev)
5196 {
5197         struct be_adapter *adapter = netdev_priv(dev);
5198         struct be_cmd_work *work;
5199
5200         work = be_alloc_work(adapter, be_work_set_rx_mode);
5201         if (work)
5202                 queue_work(be_wq, &work->work);
5203 }
5204
5205 static const struct net_device_ops be_netdev_ops = {
5206         .ndo_open               = be_open,
5207         .ndo_stop               = be_close,
5208         .ndo_start_xmit         = be_xmit,
5209         .ndo_set_rx_mode        = be_set_rx_mode,
5210         .ndo_set_mac_address    = be_mac_addr_set,
5211         .ndo_get_stats64        = be_get_stats64,
5212         .ndo_validate_addr      = eth_validate_addr,
5213         .ndo_vlan_rx_add_vid    = be_vlan_add_vid,
5214         .ndo_vlan_rx_kill_vid   = be_vlan_rem_vid,
5215         .ndo_set_vf_mac         = be_set_vf_mac,
5216         .ndo_set_vf_vlan        = be_set_vf_vlan,
5217         .ndo_set_vf_rate        = be_set_vf_tx_rate,
5218         .ndo_get_vf_config      = be_get_vf_config,
5219         .ndo_set_vf_link_state  = be_set_vf_link_state,
5220         .ndo_set_vf_spoofchk    = be_set_vf_spoofchk,
5221 #ifdef CONFIG_NET_POLL_CONTROLLER
5222         .ndo_poll_controller    = be_netpoll,
5223 #endif
5224         .ndo_bridge_setlink     = be_ndo_bridge_setlink,
5225         .ndo_bridge_getlink     = be_ndo_bridge_getlink,
5226 #ifdef CONFIG_NET_RX_BUSY_POLL
5227         .ndo_busy_poll          = be_busy_poll,
5228 #endif
5229         .ndo_udp_tunnel_add     = be_add_vxlan_port,
5230         .ndo_udp_tunnel_del     = be_del_vxlan_port,
5231         .ndo_features_check     = be_features_check,
5232         .ndo_get_phys_port_id   = be_get_phys_port_id,
5233 };
5234
5235 static void be_netdev_init(struct net_device *netdev)
5236 {
5237         struct be_adapter *adapter = netdev_priv(netdev);
5238
5239         netdev->hw_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5240                 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM |
5241                 NETIF_F_HW_VLAN_CTAG_TX;
5242         if ((be_if_cap_flags(adapter) & BE_IF_FLAGS_RSS))
5243                 netdev->hw_features |= NETIF_F_RXHASH;
5244
5245         netdev->features |= netdev->hw_features |
5246                 NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER;
5247
5248         netdev->vlan_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5249                 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
5250
5251         netdev->priv_flags |= IFF_UNICAST_FLT;
5252
5253         netdev->flags |= IFF_MULTICAST;
5254
5255         netif_set_gso_max_size(netdev, BE_MAX_GSO_SIZE - ETH_HLEN);
5256
5257         netdev->netdev_ops = &be_netdev_ops;
5258
5259         netdev->ethtool_ops = &be_ethtool_ops;
5260
5261         /* MTU range: 256 - 9000 */
5262         netdev->min_mtu = BE_MIN_MTU;
5263         netdev->max_mtu = BE_MAX_MTU;
5264 }
5265
5266 static void be_cleanup(struct be_adapter *adapter)
5267 {
5268         struct net_device *netdev = adapter->netdev;
5269
5270         rtnl_lock();
5271         netif_device_detach(netdev);
5272         if (netif_running(netdev))
5273                 be_close(netdev);
5274         rtnl_unlock();
5275
5276         be_clear(adapter);
5277 }
5278
5279 static int be_resume(struct be_adapter *adapter)
5280 {
5281         struct net_device *netdev = adapter->netdev;
5282         int status;
5283
5284         status = be_setup(adapter);
5285         if (status)
5286                 return status;
5287
5288         rtnl_lock();
5289         if (netif_running(netdev))
5290                 status = be_open(netdev);
5291         rtnl_unlock();
5292
5293         if (status)
5294                 return status;
5295
5296         netif_device_attach(netdev);
5297
5298         return 0;
5299 }
5300
5301 static void be_soft_reset(struct be_adapter *adapter)
5302 {
5303         u32 val;
5304
5305         dev_info(&adapter->pdev->dev, "Initiating chip soft reset\n");
5306         val = ioread32(adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5307         val |= SLIPORT_SOFTRESET_SR_MASK;
5308         iowrite32(val, adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5309 }
5310
5311 static bool be_err_is_recoverable(struct be_adapter *adapter)
5312 {
5313         struct be_error_recovery *err_rec = &adapter->error_recovery;
5314         unsigned long initial_idle_time =
5315                 msecs_to_jiffies(ERR_RECOVERY_IDLE_TIME);
5316         unsigned long recovery_interval =
5317                 msecs_to_jiffies(ERR_RECOVERY_INTERVAL);
5318         u16 ue_err_code;
5319         u32 val;
5320
5321         val = be_POST_stage_get(adapter);
5322         if ((val & POST_STAGE_RECOVERABLE_ERR) != POST_STAGE_RECOVERABLE_ERR)
5323                 return false;
5324         ue_err_code = val & POST_ERR_RECOVERY_CODE_MASK;
5325         if (ue_err_code == 0)
5326                 return false;
5327
5328         dev_err(&adapter->pdev->dev, "Recoverable HW error code: 0x%x\n",
5329                 ue_err_code);
5330
5331         if (jiffies - err_rec->probe_time <= initial_idle_time) {
5332                 dev_err(&adapter->pdev->dev,
5333                         "Cannot recover within %lu sec from driver load\n",
5334                         jiffies_to_msecs(initial_idle_time) / MSEC_PER_SEC);
5335                 return false;
5336         }
5337
5338         if (err_rec->last_recovery_time &&
5339             (jiffies - err_rec->last_recovery_time <= recovery_interval)) {
5340                 dev_err(&adapter->pdev->dev,
5341                         "Cannot recover within %lu sec from last recovery\n",
5342                         jiffies_to_msecs(recovery_interval) / MSEC_PER_SEC);
5343                 return false;
5344         }
5345
5346         if (ue_err_code == err_rec->last_err_code) {
5347                 dev_err(&adapter->pdev->dev,
5348                         "Cannot recover from a consecutive TPE error\n");
5349                 return false;
5350         }
5351
5352         err_rec->last_recovery_time = jiffies;
5353         err_rec->last_err_code = ue_err_code;
5354         return true;
5355 }
5356
5357 static int be_tpe_recover(struct be_adapter *adapter)
5358 {
5359         struct be_error_recovery *err_rec = &adapter->error_recovery;
5360         int status = -EAGAIN;
5361         u32 val;
5362
5363         switch (err_rec->recovery_state) {
5364         case ERR_RECOVERY_ST_NONE:
5365                 err_rec->recovery_state = ERR_RECOVERY_ST_DETECT;
5366                 err_rec->resched_delay = ERR_RECOVERY_UE_DETECT_DURATION;
5367                 break;
5368
5369         case ERR_RECOVERY_ST_DETECT:
5370                 val = be_POST_stage_get(adapter);
5371                 if ((val & POST_STAGE_RECOVERABLE_ERR) !=
5372                     POST_STAGE_RECOVERABLE_ERR) {
5373                         dev_err(&adapter->pdev->dev,
5374                                 "Unrecoverable HW error detected: 0x%x\n", val);
5375                         status = -EINVAL;
5376                         err_rec->resched_delay = 0;
5377                         break;
5378                 }
5379
5380                 dev_err(&adapter->pdev->dev, "Recoverable HW error detected\n");
5381
5382                 /* Only PF0 initiates Chip Soft Reset. But PF0 must wait UE2SR
5383                  * milliseconds before it checks for final error status in
5384                  * SLIPORT_SEMAPHORE to determine if recovery criteria is met.
5385                  * If it does, then PF0 initiates a Soft Reset.
5386                  */
5387                 if (adapter->pf_num == 0) {
5388                         err_rec->recovery_state = ERR_RECOVERY_ST_RESET;
5389                         err_rec->resched_delay = err_rec->ue_to_reset_time -
5390                                         ERR_RECOVERY_UE_DETECT_DURATION;
5391                         break;
5392                 }
5393
5394                 err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5395                 err_rec->resched_delay = err_rec->ue_to_poll_time -
5396                                         ERR_RECOVERY_UE_DETECT_DURATION;
5397                 break;
5398
5399         case ERR_RECOVERY_ST_RESET:
5400                 if (!be_err_is_recoverable(adapter)) {
5401                         dev_err(&adapter->pdev->dev,
5402                                 "Failed to meet recovery criteria\n");
5403                         status = -EIO;
5404                         err_rec->resched_delay = 0;
5405                         break;
5406                 }
5407                 be_soft_reset(adapter);
5408                 err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5409                 err_rec->resched_delay = err_rec->ue_to_poll_time -
5410                                         err_rec->ue_to_reset_time;
5411                 break;
5412
5413         case ERR_RECOVERY_ST_PRE_POLL:
5414                 err_rec->recovery_state = ERR_RECOVERY_ST_REINIT;
5415                 err_rec->resched_delay = 0;
5416                 status = 0;                     /* done */
5417                 break;
5418
5419         default:
5420                 status = -EINVAL;
5421                 err_rec->resched_delay = 0;
5422                 break;
5423         }
5424
5425         return status;
5426 }
5427
5428 static int be_err_recover(struct be_adapter *adapter)
5429 {
5430         int status;
5431
5432         if (!lancer_chip(adapter)) {
5433                 if (!adapter->error_recovery.recovery_supported ||
5434                     adapter->priv_flags & BE_DISABLE_TPE_RECOVERY)
5435                         return -EIO;
5436                 status = be_tpe_recover(adapter);
5437                 if (status)
5438                         goto err;
5439         }
5440
5441         /* Wait for adapter to reach quiescent state before
5442          * destroying queues
5443          */
5444         status = be_fw_wait_ready(adapter);
5445         if (status)
5446                 goto err;
5447
5448         adapter->flags |= BE_FLAGS_TRY_RECOVERY;
5449
5450         be_cleanup(adapter);
5451
5452         status = be_resume(adapter);
5453         if (status)
5454                 goto err;
5455
5456         adapter->flags &= ~BE_FLAGS_TRY_RECOVERY;
5457
5458 err:
5459         return status;
5460 }
5461
5462 static void be_err_detection_task(struct work_struct *work)
5463 {
5464         struct be_error_recovery *err_rec =
5465                         container_of(work, struct be_error_recovery,
5466                                      err_detection_work.work);
5467         struct be_adapter *adapter =
5468                         container_of(err_rec, struct be_adapter,
5469                                      error_recovery);
5470         u32 resched_delay = ERR_RECOVERY_DETECTION_DELAY;
5471         struct device *dev = &adapter->pdev->dev;
5472         int recovery_status;
5473
5474         be_detect_error(adapter);
5475         if (!be_check_error(adapter, BE_ERROR_HW))
5476                 goto reschedule_task;
5477
5478         recovery_status = be_err_recover(adapter);
5479         if (!recovery_status) {
5480                 err_rec->recovery_retries = 0;
5481                 err_rec->recovery_state = ERR_RECOVERY_ST_NONE;
5482                 dev_info(dev, "Adapter recovery successful\n");
5483                 goto reschedule_task;
5484         } else if (!lancer_chip(adapter) && err_rec->resched_delay) {
5485                 /* BEx/SH recovery state machine */
5486                 if (adapter->pf_num == 0 &&
5487                     err_rec->recovery_state > ERR_RECOVERY_ST_DETECT)
5488                         dev_err(&adapter->pdev->dev,
5489                                 "Adapter recovery in progress\n");
5490                 resched_delay = err_rec->resched_delay;
5491                 goto reschedule_task;
5492         } else if (lancer_chip(adapter) && be_virtfn(adapter)) {
5493                 /* For VFs, check if PF have allocated resources
5494                  * every second.
5495                  */
5496                 dev_err(dev, "Re-trying adapter recovery\n");
5497                 goto reschedule_task;
5498         } else if (lancer_chip(adapter) && err_rec->recovery_retries++ <
5499                    ERR_RECOVERY_MAX_RETRY_COUNT) {
5500                 /* In case of another error during recovery, it takes 30 sec
5501                  * for adapter to come out of error. Retry error recovery after
5502                  * this time interval.
5503                  */
5504                 dev_err(&adapter->pdev->dev, "Re-trying adapter recovery\n");
5505                 resched_delay = ERR_RECOVERY_RETRY_DELAY;
5506                 goto reschedule_task;
5507         } else {
5508                 dev_err(dev, "Adapter recovery failed\n");
5509                 dev_err(dev, "Please reboot server to recover\n");
5510         }
5511
5512         return;
5513
5514 reschedule_task:
5515         be_schedule_err_detection(adapter, resched_delay);
5516 }
5517
5518 static void be_log_sfp_info(struct be_adapter *adapter)
5519 {
5520         int status;
5521
5522         status = be_cmd_query_sfp_info(adapter);
5523         if (!status) {
5524                 dev_err(&adapter->pdev->dev,
5525                         "Port %c: %s Vendor: %s part no: %s",
5526                         adapter->port_name,
5527                         be_misconfig_evt_port_state[adapter->phy_state],
5528                         adapter->phy.vendor_name,
5529                         adapter->phy.vendor_pn);
5530         }
5531         adapter->flags &= ~BE_FLAGS_PHY_MISCONFIGURED;
5532 }
5533
5534 static void be_worker(struct work_struct *work)
5535 {
5536         struct be_adapter *adapter =
5537                 container_of(work, struct be_adapter, work.work);
5538         struct be_rx_obj *rxo;
5539         int i;
5540
5541         if (be_physfn(adapter) &&
5542             MODULO(adapter->work_counter, adapter->be_get_temp_freq) == 0)
5543                 be_cmd_get_die_temperature(adapter);
5544
5545         /* when interrupts are not yet enabled, just reap any pending
5546          * mcc completions
5547          */
5548         if (!netif_running(adapter->netdev)) {
5549                 local_bh_disable();
5550                 be_process_mcc(adapter);
5551                 local_bh_enable();
5552                 goto reschedule;
5553         }
5554
5555         if (!adapter->stats_cmd_sent) {
5556                 if (lancer_chip(adapter))
5557                         lancer_cmd_get_pport_stats(adapter,
5558                                                    &adapter->stats_cmd);
5559                 else
5560                         be_cmd_get_stats(adapter, &adapter->stats_cmd);
5561         }
5562
5563         for_all_rx_queues(adapter, rxo, i) {
5564                 /* Replenish RX-queues starved due to memory
5565                  * allocation failures.
5566                  */
5567                 if (rxo->rx_post_starved)
5568                         be_post_rx_frags(rxo, GFP_KERNEL, MAX_RX_POST);
5569         }
5570
5571         /* EQ-delay update for Skyhawk is done while notifying EQ */
5572         if (!skyhawk_chip(adapter))
5573                 be_eqd_update(adapter, false);
5574
5575         if (adapter->flags & BE_FLAGS_PHY_MISCONFIGURED)
5576                 be_log_sfp_info(adapter);
5577
5578 reschedule:
5579         adapter->work_counter++;
5580         queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
5581 }
5582
5583 static void be_unmap_pci_bars(struct be_adapter *adapter)
5584 {
5585         if (adapter->csr)
5586                 pci_iounmap(adapter->pdev, adapter->csr);
5587         if (adapter->db)
5588                 pci_iounmap(adapter->pdev, adapter->db);
5589         if (adapter->pcicfg && adapter->pcicfg_mapped)
5590                 pci_iounmap(adapter->pdev, adapter->pcicfg);
5591 }
5592
5593 static int db_bar(struct be_adapter *adapter)
5594 {
5595         if (lancer_chip(adapter) || be_virtfn(adapter))
5596                 return 0;
5597         else
5598                 return 4;
5599 }
5600
5601 static int be_roce_map_pci_bars(struct be_adapter *adapter)
5602 {
5603         if (skyhawk_chip(adapter)) {
5604                 adapter->roce_db.size = 4096;
5605                 adapter->roce_db.io_addr = pci_resource_start(adapter->pdev,
5606                                                               db_bar(adapter));
5607                 adapter->roce_db.total_size = pci_resource_len(adapter->pdev,
5608                                                                db_bar(adapter));
5609         }
5610         return 0;
5611 }
5612
5613 static int be_map_pci_bars(struct be_adapter *adapter)
5614 {
5615         struct pci_dev *pdev = adapter->pdev;
5616         u8 __iomem *addr;
5617         u32 sli_intf;
5618
5619         pci_read_config_dword(adapter->pdev, SLI_INTF_REG_OFFSET, &sli_intf);
5620         adapter->sli_family = (sli_intf & SLI_INTF_FAMILY_MASK) >>
5621                                 SLI_INTF_FAMILY_SHIFT;
5622         adapter->virtfn = (sli_intf & SLI_INTF_FT_MASK) ? 1 : 0;
5623
5624         if (BEx_chip(adapter) && be_physfn(adapter)) {
5625                 adapter->csr = pci_iomap(pdev, 2, 0);
5626                 if (!adapter->csr)
5627                         return -ENOMEM;
5628         }
5629
5630         addr = pci_iomap(pdev, db_bar(adapter), 0);
5631         if (!addr)
5632                 goto pci_map_err;
5633         adapter->db = addr;
5634
5635         if (skyhawk_chip(adapter) || BEx_chip(adapter)) {
5636                 if (be_physfn(adapter)) {
5637                         /* PCICFG is the 2nd BAR in BE2 */
5638                         addr = pci_iomap(pdev, BE2_chip(adapter) ? 1 : 0, 0);
5639                         if (!addr)
5640                                 goto pci_map_err;
5641                         adapter->pcicfg = addr;
5642                         adapter->pcicfg_mapped = true;
5643                 } else {
5644                         adapter->pcicfg = adapter->db + SRIOV_VF_PCICFG_OFFSET;
5645                         adapter->pcicfg_mapped = false;
5646                 }
5647         }
5648
5649         be_roce_map_pci_bars(adapter);
5650         return 0;
5651
5652 pci_map_err:
5653         dev_err(&pdev->dev, "Error in mapping PCI BARs\n");
5654         be_unmap_pci_bars(adapter);
5655         return -ENOMEM;
5656 }
5657
5658 static void be_drv_cleanup(struct be_adapter *adapter)
5659 {
5660         struct be_dma_mem *mem = &adapter->mbox_mem_alloced;
5661         struct device *dev = &adapter->pdev->dev;
5662
5663         if (mem->va)
5664                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5665
5666         mem = &adapter->rx_filter;
5667         if (mem->va)
5668                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5669
5670         mem = &adapter->stats_cmd;
5671         if (mem->va)
5672                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5673 }
5674
5675 /* Allocate and initialize various fields in be_adapter struct */
5676 static int be_drv_init(struct be_adapter *adapter)
5677 {
5678         struct be_dma_mem *mbox_mem_alloc = &adapter->mbox_mem_alloced;
5679         struct be_dma_mem *mbox_mem_align = &adapter->mbox_mem;
5680         struct be_dma_mem *rx_filter = &adapter->rx_filter;
5681         struct be_dma_mem *stats_cmd = &adapter->stats_cmd;
5682         struct device *dev = &adapter->pdev->dev;
5683         int status = 0;
5684
5685         mbox_mem_alloc->size = sizeof(struct be_mcc_mailbox) + 16;
5686         mbox_mem_alloc->va = dma_zalloc_coherent(dev, mbox_mem_alloc->size,
5687                                                  &mbox_mem_alloc->dma,
5688                                                  GFP_KERNEL);
5689         if (!mbox_mem_alloc->va)
5690                 return -ENOMEM;
5691
5692         mbox_mem_align->size = sizeof(struct be_mcc_mailbox);
5693         mbox_mem_align->va = PTR_ALIGN(mbox_mem_alloc->va, 16);
5694         mbox_mem_align->dma = PTR_ALIGN(mbox_mem_alloc->dma, 16);
5695
5696         rx_filter->size = sizeof(struct be_cmd_req_rx_filter);
5697         rx_filter->va = dma_zalloc_coherent(dev, rx_filter->size,
5698                                             &rx_filter->dma, GFP_KERNEL);
5699         if (!rx_filter->va) {
5700                 status = -ENOMEM;
5701                 goto free_mbox;
5702         }
5703
5704         if (lancer_chip(adapter))
5705                 stats_cmd->size = sizeof(struct lancer_cmd_req_pport_stats);
5706         else if (BE2_chip(adapter))
5707                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v0);
5708         else if (BE3_chip(adapter))
5709                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v1);
5710         else
5711                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v2);
5712         stats_cmd->va = dma_zalloc_coherent(dev, stats_cmd->size,
5713                                             &stats_cmd->dma, GFP_KERNEL);
5714         if (!stats_cmd->va) {
5715                 status = -ENOMEM;
5716                 goto free_rx_filter;
5717         }
5718
5719         mutex_init(&adapter->mbox_lock);
5720         mutex_init(&adapter->mcc_lock);
5721         mutex_init(&adapter->rx_filter_lock);
5722         spin_lock_init(&adapter->mcc_cq_lock);
5723         init_completion(&adapter->et_cmd_compl);
5724
5725         pci_save_state(adapter->pdev);
5726
5727         INIT_DELAYED_WORK(&adapter->work, be_worker);
5728
5729         adapter->error_recovery.recovery_state = ERR_RECOVERY_ST_NONE;
5730         adapter->error_recovery.resched_delay = 0;
5731         INIT_DELAYED_WORK(&adapter->error_recovery.err_detection_work,
5732                           be_err_detection_task);
5733
5734         adapter->rx_fc = true;
5735         adapter->tx_fc = true;
5736
5737         /* Must be a power of 2 or else MODULO will BUG_ON */
5738         adapter->be_get_temp_freq = 64;
5739
5740         return 0;
5741
5742 free_rx_filter:
5743         dma_free_coherent(dev, rx_filter->size, rx_filter->va, rx_filter->dma);
5744 free_mbox:
5745         dma_free_coherent(dev, mbox_mem_alloc->size, mbox_mem_alloc->va,
5746                           mbox_mem_alloc->dma);
5747         return status;
5748 }
5749
5750 static void be_remove(struct pci_dev *pdev)
5751 {
5752         struct be_adapter *adapter = pci_get_drvdata(pdev);
5753
5754         if (!adapter)
5755                 return;
5756
5757         be_roce_dev_remove(adapter);
5758         be_intr_set(adapter, false);
5759
5760         be_cancel_err_detection(adapter);
5761
5762         unregister_netdev(adapter->netdev);
5763
5764         be_clear(adapter);
5765
5766         if (!pci_vfs_assigned(adapter->pdev))
5767                 be_cmd_reset_function(adapter);
5768
5769         /* tell fw we're done with firing cmds */
5770         be_cmd_fw_clean(adapter);
5771
5772         be_unmap_pci_bars(adapter);
5773         be_drv_cleanup(adapter);
5774
5775         pci_disable_pcie_error_reporting(pdev);
5776
5777         pci_release_regions(pdev);
5778         pci_disable_device(pdev);
5779
5780         free_netdev(adapter->netdev);
5781 }
5782
5783 static ssize_t be_hwmon_show_temp(struct device *dev,
5784                                   struct device_attribute *dev_attr,
5785                                   char *buf)
5786 {
5787         struct be_adapter *adapter = dev_get_drvdata(dev);
5788
5789         /* Unit: millidegree Celsius */
5790         if (adapter->hwmon_info.be_on_die_temp == BE_INVALID_DIE_TEMP)
5791                 return -EIO;
5792         else
5793                 return sprintf(buf, "%u\n",
5794                                adapter->hwmon_info.be_on_die_temp * 1000);
5795 }
5796
5797 static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO,
5798                           be_hwmon_show_temp, NULL, 1);
5799
5800 static struct attribute *be_hwmon_attrs[] = {
5801         &sensor_dev_attr_temp1_input.dev_attr.attr,
5802         NULL
5803 };
5804
5805 ATTRIBUTE_GROUPS(be_hwmon);
5806
5807 static char *mc_name(struct be_adapter *adapter)
5808 {
5809         char *str = ""; /* default */
5810
5811         switch (adapter->mc_type) {
5812         case UMC:
5813                 str = "UMC";
5814                 break;
5815         case FLEX10:
5816                 str = "FLEX10";
5817                 break;
5818         case vNIC1:
5819                 str = "vNIC-1";
5820                 break;
5821         case nPAR:
5822                 str = "nPAR";
5823                 break;
5824         case UFP:
5825                 str = "UFP";
5826                 break;
5827         case vNIC2:
5828                 str = "vNIC-2";
5829                 break;
5830         default:
5831                 str = "";
5832         }
5833
5834         return str;
5835 }
5836
5837 static inline char *func_name(struct be_adapter *adapter)
5838 {
5839         return be_physfn(adapter) ? "PF" : "VF";
5840 }
5841
5842 static inline char *nic_name(struct pci_dev *pdev)
5843 {
5844         switch (pdev->device) {
5845         case OC_DEVICE_ID1:
5846                 return OC_NAME;
5847         case OC_DEVICE_ID2:
5848                 return OC_NAME_BE;
5849         case OC_DEVICE_ID3:
5850         case OC_DEVICE_ID4:
5851                 return OC_NAME_LANCER;
5852         case BE_DEVICE_ID2:
5853                 return BE3_NAME;
5854         case OC_DEVICE_ID5:
5855         case OC_DEVICE_ID6:
5856                 return OC_NAME_SH;
5857         default:
5858                 return BE_NAME;
5859         }
5860 }
5861
5862 static int be_probe(struct pci_dev *pdev, const struct pci_device_id *pdev_id)
5863 {
5864         struct be_adapter *adapter;
5865         struct net_device *netdev;
5866         int status = 0;
5867
5868         dev_info(&pdev->dev, "%s version is %s\n", DRV_NAME, DRV_VER);
5869
5870         status = pci_enable_device(pdev);
5871         if (status)
5872                 goto do_none;
5873
5874         status = pci_request_regions(pdev, DRV_NAME);
5875         if (status)
5876                 goto disable_dev;
5877         pci_set_master(pdev);
5878
5879         netdev = alloc_etherdev_mqs(sizeof(*adapter), MAX_TX_QS, MAX_RX_QS);
5880         if (!netdev) {
5881                 status = -ENOMEM;
5882                 goto rel_reg;
5883         }
5884         adapter = netdev_priv(netdev);
5885         adapter->pdev = pdev;
5886         pci_set_drvdata(pdev, adapter);
5887         adapter->netdev = netdev;
5888         SET_NETDEV_DEV(netdev, &pdev->dev);
5889
5890         status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
5891         if (!status) {
5892                 netdev->features |= NETIF_F_HIGHDMA;
5893         } else {
5894                 status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
5895                 if (status) {
5896                         dev_err(&pdev->dev, "Could not set PCI DMA Mask\n");
5897                         goto free_netdev;
5898                 }
5899         }
5900
5901         status = pci_enable_pcie_error_reporting(pdev);
5902         if (!status)
5903                 dev_info(&pdev->dev, "PCIe error reporting enabled\n");
5904
5905         status = be_map_pci_bars(adapter);
5906         if (status)
5907                 goto free_netdev;
5908
5909         status = be_drv_init(adapter);
5910         if (status)
5911                 goto unmap_bars;
5912
5913         status = be_setup(adapter);
5914         if (status)
5915                 goto drv_cleanup;
5916
5917         be_netdev_init(netdev);
5918         status = register_netdev(netdev);
5919         if (status != 0)
5920                 goto unsetup;
5921
5922         be_roce_dev_add(adapter);
5923
5924         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5925         adapter->error_recovery.probe_time = jiffies;
5926
5927         /* On Die temperature not supported for VF. */
5928         if (be_physfn(adapter) && IS_ENABLED(CONFIG_BE2NET_HWMON)) {
5929                 adapter->hwmon_info.hwmon_dev =
5930                         devm_hwmon_device_register_with_groups(&pdev->dev,
5931                                                                DRV_NAME,
5932                                                                adapter,
5933                                                                be_hwmon_groups);
5934                 adapter->hwmon_info.be_on_die_temp = BE_INVALID_DIE_TEMP;
5935         }
5936
5937         dev_info(&pdev->dev, "%s: %s %s port %c\n", nic_name(pdev),
5938                  func_name(adapter), mc_name(adapter), adapter->port_name);
5939
5940         return 0;
5941
5942 unsetup:
5943         be_clear(adapter);
5944 drv_cleanup:
5945         be_drv_cleanup(adapter);
5946 unmap_bars:
5947         be_unmap_pci_bars(adapter);
5948 free_netdev:
5949         free_netdev(netdev);
5950 rel_reg:
5951         pci_release_regions(pdev);
5952 disable_dev:
5953         pci_disable_device(pdev);
5954 do_none:
5955         dev_err(&pdev->dev, "%s initialization failed\n", nic_name(pdev));
5956         return status;
5957 }
5958
5959 static int be_suspend(struct pci_dev *pdev, pm_message_t state)
5960 {
5961         struct be_adapter *adapter = pci_get_drvdata(pdev);
5962
5963         be_intr_set(adapter, false);
5964         be_cancel_err_detection(adapter);
5965
5966         be_cleanup(adapter);
5967
5968         pci_save_state(pdev);
5969         pci_disable_device(pdev);
5970         pci_set_power_state(pdev, pci_choose_state(pdev, state));
5971         return 0;
5972 }
5973
5974 static int be_pci_resume(struct pci_dev *pdev)
5975 {
5976         struct be_adapter *adapter = pci_get_drvdata(pdev);
5977         int status = 0;
5978
5979         status = pci_enable_device(pdev);
5980         if (status)
5981                 return status;
5982
5983         pci_restore_state(pdev);
5984
5985         status = be_resume(adapter);
5986         if (status)
5987                 return status;
5988
5989         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5990
5991         return 0;
5992 }
5993
5994 /*
5995  * An FLR will stop BE from DMAing any data.
5996  */
5997 static void be_shutdown(struct pci_dev *pdev)
5998 {
5999         struct be_adapter *adapter = pci_get_drvdata(pdev);
6000
6001         if (!adapter)
6002                 return;
6003
6004         be_roce_dev_shutdown(adapter);
6005         cancel_delayed_work_sync(&adapter->work);
6006         be_cancel_err_detection(adapter);
6007
6008         netif_device_detach(adapter->netdev);
6009
6010         be_cmd_reset_function(adapter);
6011
6012         pci_disable_device(pdev);
6013 }
6014
6015 static pci_ers_result_t be_eeh_err_detected(struct pci_dev *pdev,
6016                                             pci_channel_state_t state)
6017 {
6018         struct be_adapter *adapter = pci_get_drvdata(pdev);
6019
6020         dev_err(&adapter->pdev->dev, "EEH error detected\n");
6021
6022         be_roce_dev_remove(adapter);
6023
6024         if (!be_check_error(adapter, BE_ERROR_EEH)) {
6025                 be_set_error(adapter, BE_ERROR_EEH);
6026
6027                 be_cancel_err_detection(adapter);
6028
6029                 be_cleanup(adapter);
6030         }
6031
6032         if (state == pci_channel_io_perm_failure)
6033                 return PCI_ERS_RESULT_DISCONNECT;
6034
6035         pci_disable_device(pdev);
6036
6037         /* The error could cause the FW to trigger a flash debug dump.
6038          * Resetting the card while flash dump is in progress
6039          * can cause it not to recover; wait for it to finish.
6040          * Wait only for first function as it is needed only once per
6041          * adapter.
6042          */
6043         if (pdev->devfn == 0)
6044                 ssleep(30);
6045
6046         return PCI_ERS_RESULT_NEED_RESET;
6047 }
6048
6049 static pci_ers_result_t be_eeh_reset(struct pci_dev *pdev)
6050 {
6051         struct be_adapter *adapter = pci_get_drvdata(pdev);
6052         int status;
6053
6054         dev_info(&adapter->pdev->dev, "EEH reset\n");
6055
6056         status = pci_enable_device(pdev);
6057         if (status)
6058                 return PCI_ERS_RESULT_DISCONNECT;
6059
6060         pci_set_master(pdev);
6061         pci_restore_state(pdev);
6062
6063         /* Check if card is ok and fw is ready */
6064         dev_info(&adapter->pdev->dev,
6065                  "Waiting for FW to be ready after EEH reset\n");
6066         status = be_fw_wait_ready(adapter);
6067         if (status)
6068                 return PCI_ERS_RESULT_DISCONNECT;
6069
6070         pci_cleanup_aer_uncorrect_error_status(pdev);
6071         be_clear_error(adapter, BE_CLEAR_ALL);
6072         return PCI_ERS_RESULT_RECOVERED;
6073 }
6074
6075 static void be_eeh_resume(struct pci_dev *pdev)
6076 {
6077         int status = 0;
6078         struct be_adapter *adapter = pci_get_drvdata(pdev);
6079
6080         dev_info(&adapter->pdev->dev, "EEH resume\n");
6081
6082         pci_save_state(pdev);
6083
6084         status = be_resume(adapter);
6085         if (status)
6086                 goto err;
6087
6088         be_roce_dev_add(adapter);
6089
6090         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
6091         return;
6092 err:
6093         dev_err(&adapter->pdev->dev, "EEH resume failed\n");
6094 }
6095
6096 static int be_pci_sriov_configure(struct pci_dev *pdev, int num_vfs)
6097 {
6098         struct be_adapter *adapter = pci_get_drvdata(pdev);
6099         struct be_resources vft_res = {0};
6100         int status;
6101
6102         if (!num_vfs)
6103                 be_vf_clear(adapter);
6104
6105         adapter->num_vfs = num_vfs;
6106
6107         if (adapter->num_vfs == 0 && pci_vfs_assigned(pdev)) {
6108                 dev_warn(&pdev->dev,
6109                          "Cannot disable VFs while they are assigned\n");
6110                 return -EBUSY;
6111         }
6112
6113         /* When the HW is in SRIOV capable configuration, the PF-pool resources
6114          * are equally distributed across the max-number of VFs. The user may
6115          * request only a subset of the max-vfs to be enabled.
6116          * Based on num_vfs, redistribute the resources across num_vfs so that
6117          * each VF will have access to more number of resources.
6118          * This facility is not available in BE3 FW.
6119          * Also, this is done by FW in Lancer chip.
6120          */
6121         if (skyhawk_chip(adapter) && !pci_num_vf(pdev)) {
6122                 be_calculate_vf_res(adapter, adapter->num_vfs,
6123                                     &vft_res);
6124                 status = be_cmd_set_sriov_config(adapter, adapter->pool_res,
6125                                                  adapter->num_vfs, &vft_res);
6126                 if (status)
6127                         dev_err(&pdev->dev,
6128                                 "Failed to optimize SR-IOV resources\n");
6129         }
6130
6131         status = be_get_resources(adapter);
6132         if (status)
6133                 return be_cmd_status(status);
6134
6135         /* Updating real_num_tx/rx_queues() requires rtnl_lock() */
6136         rtnl_lock();
6137         status = be_update_queues(adapter);
6138         rtnl_unlock();
6139         if (status)
6140                 return be_cmd_status(status);
6141
6142         if (adapter->num_vfs)
6143                 status = be_vf_setup(adapter);
6144
6145         if (!status)
6146                 return adapter->num_vfs;
6147
6148         return 0;
6149 }
6150
6151 static const struct pci_error_handlers be_eeh_handlers = {
6152         .error_detected = be_eeh_err_detected,
6153         .slot_reset = be_eeh_reset,
6154         .resume = be_eeh_resume,
6155 };
6156
6157 static struct pci_driver be_driver = {
6158         .name = DRV_NAME,
6159         .id_table = be_dev_ids,
6160         .probe = be_probe,
6161         .remove = be_remove,
6162         .suspend = be_suspend,
6163         .resume = be_pci_resume,
6164         .shutdown = be_shutdown,
6165         .sriov_configure = be_pci_sriov_configure,
6166         .err_handler = &be_eeh_handlers
6167 };
6168
6169 static int __init be_init_module(void)
6170 {
6171         int status;
6172
6173         if (rx_frag_size != 8192 && rx_frag_size != 4096 &&
6174             rx_frag_size != 2048) {
6175                 printk(KERN_WARNING DRV_NAME
6176                         " : Module param rx_frag_size must be 2048/4096/8192."
6177                         " Using 2048\n");
6178                 rx_frag_size = 2048;
6179         }
6180
6181         if (num_vfs > 0) {
6182                 pr_info(DRV_NAME " : Module param num_vfs is obsolete.");
6183                 pr_info(DRV_NAME " : Use sysfs method to enable VFs\n");
6184         }
6185
6186         be_wq = create_singlethread_workqueue("be_wq");
6187         if (!be_wq) {
6188                 pr_warn(DRV_NAME "workqueue creation failed\n");
6189                 return -1;
6190         }
6191
6192         be_err_recovery_workq =
6193                 create_singlethread_workqueue("be_err_recover");
6194         if (!be_err_recovery_workq)
6195                 pr_warn(DRV_NAME "Could not create error recovery workqueue\n");
6196
6197         status = pci_register_driver(&be_driver);
6198         if (status) {
6199                 destroy_workqueue(be_wq);
6200                 be_destroy_err_recovery_workq();
6201         }
6202         return status;
6203 }
6204 module_init(be_init_module);
6205
6206 static void __exit be_exit_module(void)
6207 {
6208         pci_unregister_driver(&be_driver);
6209
6210         be_destroy_err_recovery_workq();
6211
6212         if (be_wq)
6213                 destroy_workqueue(be_wq);
6214 }
6215 module_exit(be_exit_module);