]> asedeno.scripts.mit.edu Git - linux.git/blob - drivers/net/ethernet/intel/igbvf/netdev.c
4eab83faec6208b052b74024bd67f53a10756de9
[linux.git] / drivers / net / ethernet / intel / igbvf / netdev.c
1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright(c) 2009 - 2018 Intel Corporation. */
3
4 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
5
6 #include <linux/module.h>
7 #include <linux/types.h>
8 #include <linux/init.h>
9 #include <linux/pci.h>
10 #include <linux/vmalloc.h>
11 #include <linux/pagemap.h>
12 #include <linux/delay.h>
13 #include <linux/netdevice.h>
14 #include <linux/tcp.h>
15 #include <linux/ipv6.h>
16 #include <linux/slab.h>
17 #include <net/checksum.h>
18 #include <net/ip6_checksum.h>
19 #include <linux/mii.h>
20 #include <linux/ethtool.h>
21 #include <linux/if_vlan.h>
22 #include <linux/prefetch.h>
23 #include <linux/sctp.h>
24
25 #include "igbvf.h"
26
27 #define DRV_VERSION "2.4.0-k"
28 char igbvf_driver_name[] = "igbvf";
29 const char igbvf_driver_version[] = DRV_VERSION;
30 static const char igbvf_driver_string[] =
31                   "Intel(R) Gigabit Virtual Function Network Driver";
32 static const char igbvf_copyright[] =
33                   "Copyright (c) 2009 - 2012 Intel Corporation.";
34
35 #define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV|NETIF_MSG_PROBE|NETIF_MSG_LINK)
36 static int debug = -1;
37 module_param(debug, int, 0);
38 MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
39
40 static int igbvf_poll(struct napi_struct *napi, int budget);
41 static void igbvf_reset(struct igbvf_adapter *);
42 static void igbvf_set_interrupt_capability(struct igbvf_adapter *);
43 static void igbvf_reset_interrupt_capability(struct igbvf_adapter *);
44
45 static struct igbvf_info igbvf_vf_info = {
46         .mac            = e1000_vfadapt,
47         .flags          = 0,
48         .pba            = 10,
49         .init_ops       = e1000_init_function_pointers_vf,
50 };
51
52 static struct igbvf_info igbvf_i350_vf_info = {
53         .mac            = e1000_vfadapt_i350,
54         .flags          = 0,
55         .pba            = 10,
56         .init_ops       = e1000_init_function_pointers_vf,
57 };
58
59 static const struct igbvf_info *igbvf_info_tbl[] = {
60         [board_vf]      = &igbvf_vf_info,
61         [board_i350_vf] = &igbvf_i350_vf_info,
62 };
63
64 /**
65  * igbvf_desc_unused - calculate if we have unused descriptors
66  * @rx_ring: address of receive ring structure
67  **/
68 static int igbvf_desc_unused(struct igbvf_ring *ring)
69 {
70         if (ring->next_to_clean > ring->next_to_use)
71                 return ring->next_to_clean - ring->next_to_use - 1;
72
73         return ring->count + ring->next_to_clean - ring->next_to_use - 1;
74 }
75
76 /**
77  * igbvf_receive_skb - helper function to handle Rx indications
78  * @adapter: board private structure
79  * @status: descriptor status field as written by hardware
80  * @vlan: descriptor vlan field as written by hardware (no le/be conversion)
81  * @skb: pointer to sk_buff to be indicated to stack
82  **/
83 static void igbvf_receive_skb(struct igbvf_adapter *adapter,
84                               struct net_device *netdev,
85                               struct sk_buff *skb,
86                               u32 status, u16 vlan)
87 {
88         u16 vid;
89
90         if (status & E1000_RXD_STAT_VP) {
91                 if ((adapter->flags & IGBVF_FLAG_RX_LB_VLAN_BSWAP) &&
92                     (status & E1000_RXDEXT_STATERR_LB))
93                         vid = be16_to_cpu(vlan) & E1000_RXD_SPC_VLAN_MASK;
94                 else
95                         vid = le16_to_cpu(vlan) & E1000_RXD_SPC_VLAN_MASK;
96                 if (test_bit(vid, adapter->active_vlans))
97                         __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid);
98         }
99
100         napi_gro_receive(&adapter->rx_ring->napi, skb);
101 }
102
103 static inline void igbvf_rx_checksum_adv(struct igbvf_adapter *adapter,
104                                          u32 status_err, struct sk_buff *skb)
105 {
106         skb_checksum_none_assert(skb);
107
108         /* Ignore Checksum bit is set or checksum is disabled through ethtool */
109         if ((status_err & E1000_RXD_STAT_IXSM) ||
110             (adapter->flags & IGBVF_FLAG_RX_CSUM_DISABLED))
111                 return;
112
113         /* TCP/UDP checksum error bit is set */
114         if (status_err &
115             (E1000_RXDEXT_STATERR_TCPE | E1000_RXDEXT_STATERR_IPE)) {
116                 /* let the stack verify checksum errors */
117                 adapter->hw_csum_err++;
118                 return;
119         }
120
121         /* It must be a TCP or UDP packet with a valid checksum */
122         if (status_err & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))
123                 skb->ip_summed = CHECKSUM_UNNECESSARY;
124
125         adapter->hw_csum_good++;
126 }
127
128 /**
129  * igbvf_alloc_rx_buffers - Replace used receive buffers; packet split
130  * @rx_ring: address of ring structure to repopulate
131  * @cleaned_count: number of buffers to repopulate
132  **/
133 static void igbvf_alloc_rx_buffers(struct igbvf_ring *rx_ring,
134                                    int cleaned_count)
135 {
136         struct igbvf_adapter *adapter = rx_ring->adapter;
137         struct net_device *netdev = adapter->netdev;
138         struct pci_dev *pdev = adapter->pdev;
139         union e1000_adv_rx_desc *rx_desc;
140         struct igbvf_buffer *buffer_info;
141         struct sk_buff *skb;
142         unsigned int i;
143         int bufsz;
144
145         i = rx_ring->next_to_use;
146         buffer_info = &rx_ring->buffer_info[i];
147
148         if (adapter->rx_ps_hdr_size)
149                 bufsz = adapter->rx_ps_hdr_size;
150         else
151                 bufsz = adapter->rx_buffer_len;
152
153         while (cleaned_count--) {
154                 rx_desc = IGBVF_RX_DESC_ADV(*rx_ring, i);
155
156                 if (adapter->rx_ps_hdr_size && !buffer_info->page_dma) {
157                         if (!buffer_info->page) {
158                                 buffer_info->page = alloc_page(GFP_ATOMIC);
159                                 if (!buffer_info->page) {
160                                         adapter->alloc_rx_buff_failed++;
161                                         goto no_buffers;
162                                 }
163                                 buffer_info->page_offset = 0;
164                         } else {
165                                 buffer_info->page_offset ^= PAGE_SIZE / 2;
166                         }
167                         buffer_info->page_dma =
168                                 dma_map_page(&pdev->dev, buffer_info->page,
169                                              buffer_info->page_offset,
170                                              PAGE_SIZE / 2,
171                                              DMA_FROM_DEVICE);
172                         if (dma_mapping_error(&pdev->dev,
173                                               buffer_info->page_dma)) {
174                                 __free_page(buffer_info->page);
175                                 buffer_info->page = NULL;
176                                 dev_err(&pdev->dev, "RX DMA map failed\n");
177                                 break;
178                         }
179                 }
180
181                 if (!buffer_info->skb) {
182                         skb = netdev_alloc_skb_ip_align(netdev, bufsz);
183                         if (!skb) {
184                                 adapter->alloc_rx_buff_failed++;
185                                 goto no_buffers;
186                         }
187
188                         buffer_info->skb = skb;
189                         buffer_info->dma = dma_map_single(&pdev->dev, skb->data,
190                                                           bufsz,
191                                                           DMA_FROM_DEVICE);
192                         if (dma_mapping_error(&pdev->dev, buffer_info->dma)) {
193                                 dev_kfree_skb(buffer_info->skb);
194                                 buffer_info->skb = NULL;
195                                 dev_err(&pdev->dev, "RX DMA map failed\n");
196                                 goto no_buffers;
197                         }
198                 }
199                 /* Refresh the desc even if buffer_addrs didn't change because
200                  * each write-back erases this info.
201                  */
202                 if (adapter->rx_ps_hdr_size) {
203                         rx_desc->read.pkt_addr =
204                              cpu_to_le64(buffer_info->page_dma);
205                         rx_desc->read.hdr_addr = cpu_to_le64(buffer_info->dma);
206                 } else {
207                         rx_desc->read.pkt_addr = cpu_to_le64(buffer_info->dma);
208                         rx_desc->read.hdr_addr = 0;
209                 }
210
211                 i++;
212                 if (i == rx_ring->count)
213                         i = 0;
214                 buffer_info = &rx_ring->buffer_info[i];
215         }
216
217 no_buffers:
218         if (rx_ring->next_to_use != i) {
219                 rx_ring->next_to_use = i;
220                 if (i == 0)
221                         i = (rx_ring->count - 1);
222                 else
223                         i--;
224
225                 /* Force memory writes to complete before letting h/w
226                  * know there are new descriptors to fetch.  (Only
227                  * applicable for weak-ordered memory model archs,
228                  * such as IA-64).
229                 */
230                 wmb();
231                 writel(i, adapter->hw.hw_addr + rx_ring->tail);
232         }
233 }
234
235 /**
236  * igbvf_clean_rx_irq - Send received data up the network stack; legacy
237  * @adapter: board private structure
238  *
239  * the return value indicates whether actual cleaning was done, there
240  * is no guarantee that everything was cleaned
241  **/
242 static bool igbvf_clean_rx_irq(struct igbvf_adapter *adapter,
243                                int *work_done, int work_to_do)
244 {
245         struct igbvf_ring *rx_ring = adapter->rx_ring;
246         struct net_device *netdev = adapter->netdev;
247         struct pci_dev *pdev = adapter->pdev;
248         union e1000_adv_rx_desc *rx_desc, *next_rxd;
249         struct igbvf_buffer *buffer_info, *next_buffer;
250         struct sk_buff *skb;
251         bool cleaned = false;
252         int cleaned_count = 0;
253         unsigned int total_bytes = 0, total_packets = 0;
254         unsigned int i;
255         u32 length, hlen, staterr;
256
257         i = rx_ring->next_to_clean;
258         rx_desc = IGBVF_RX_DESC_ADV(*rx_ring, i);
259         staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
260
261         while (staterr & E1000_RXD_STAT_DD) {
262                 if (*work_done >= work_to_do)
263                         break;
264                 (*work_done)++;
265                 rmb(); /* read descriptor and rx_buffer_info after status DD */
266
267                 buffer_info = &rx_ring->buffer_info[i];
268
269                 /* HW will not DMA in data larger than the given buffer, even
270                  * if it parses the (NFS, of course) header to be larger.  In
271                  * that case, it fills the header buffer and spills the rest
272                  * into the page.
273                  */
274                 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hs_rss.hdr_info)
275                        & E1000_RXDADV_HDRBUFLEN_MASK) >>
276                        E1000_RXDADV_HDRBUFLEN_SHIFT;
277                 if (hlen > adapter->rx_ps_hdr_size)
278                         hlen = adapter->rx_ps_hdr_size;
279
280                 length = le16_to_cpu(rx_desc->wb.upper.length);
281                 cleaned = true;
282                 cleaned_count++;
283
284                 skb = buffer_info->skb;
285                 prefetch(skb->data - NET_IP_ALIGN);
286                 buffer_info->skb = NULL;
287                 if (!adapter->rx_ps_hdr_size) {
288                         dma_unmap_single(&pdev->dev, buffer_info->dma,
289                                          adapter->rx_buffer_len,
290                                          DMA_FROM_DEVICE);
291                         buffer_info->dma = 0;
292                         skb_put(skb, length);
293                         goto send_up;
294                 }
295
296                 if (!skb_shinfo(skb)->nr_frags) {
297                         dma_unmap_single(&pdev->dev, buffer_info->dma,
298                                          adapter->rx_ps_hdr_size,
299                                          DMA_FROM_DEVICE);
300                         buffer_info->dma = 0;
301                         skb_put(skb, hlen);
302                 }
303
304                 if (length) {
305                         dma_unmap_page(&pdev->dev, buffer_info->page_dma,
306                                        PAGE_SIZE / 2,
307                                        DMA_FROM_DEVICE);
308                         buffer_info->page_dma = 0;
309
310                         skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
311                                            buffer_info->page,
312                                            buffer_info->page_offset,
313                                            length);
314
315                         if ((adapter->rx_buffer_len > (PAGE_SIZE / 2)) ||
316                             (page_count(buffer_info->page) != 1))
317                                 buffer_info->page = NULL;
318                         else
319                                 get_page(buffer_info->page);
320
321                         skb->len += length;
322                         skb->data_len += length;
323                         skb->truesize += PAGE_SIZE / 2;
324                 }
325 send_up:
326                 i++;
327                 if (i == rx_ring->count)
328                         i = 0;
329                 next_rxd = IGBVF_RX_DESC_ADV(*rx_ring, i);
330                 prefetch(next_rxd);
331                 next_buffer = &rx_ring->buffer_info[i];
332
333                 if (!(staterr & E1000_RXD_STAT_EOP)) {
334                         buffer_info->skb = next_buffer->skb;
335                         buffer_info->dma = next_buffer->dma;
336                         next_buffer->skb = skb;
337                         next_buffer->dma = 0;
338                         goto next_desc;
339                 }
340
341                 if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
342                         dev_kfree_skb_irq(skb);
343                         goto next_desc;
344                 }
345
346                 total_bytes += skb->len;
347                 total_packets++;
348
349                 igbvf_rx_checksum_adv(adapter, staterr, skb);
350
351                 skb->protocol = eth_type_trans(skb, netdev);
352
353                 igbvf_receive_skb(adapter, netdev, skb, staterr,
354                                   rx_desc->wb.upper.vlan);
355
356 next_desc:
357                 rx_desc->wb.upper.status_error = 0;
358
359                 /* return some buffers to hardware, one at a time is too slow */
360                 if (cleaned_count >= IGBVF_RX_BUFFER_WRITE) {
361                         igbvf_alloc_rx_buffers(rx_ring, cleaned_count);
362                         cleaned_count = 0;
363                 }
364
365                 /* use prefetched values */
366                 rx_desc = next_rxd;
367                 buffer_info = next_buffer;
368
369                 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
370         }
371
372         rx_ring->next_to_clean = i;
373         cleaned_count = igbvf_desc_unused(rx_ring);
374
375         if (cleaned_count)
376                 igbvf_alloc_rx_buffers(rx_ring, cleaned_count);
377
378         adapter->total_rx_packets += total_packets;
379         adapter->total_rx_bytes += total_bytes;
380         netdev->stats.rx_bytes += total_bytes;
381         netdev->stats.rx_packets += total_packets;
382         return cleaned;
383 }
384
385 static void igbvf_put_txbuf(struct igbvf_adapter *adapter,
386                             struct igbvf_buffer *buffer_info)
387 {
388         if (buffer_info->dma) {
389                 if (buffer_info->mapped_as_page)
390                         dma_unmap_page(&adapter->pdev->dev,
391                                        buffer_info->dma,
392                                        buffer_info->length,
393                                        DMA_TO_DEVICE);
394                 else
395                         dma_unmap_single(&adapter->pdev->dev,
396                                          buffer_info->dma,
397                                          buffer_info->length,
398                                          DMA_TO_DEVICE);
399                 buffer_info->dma = 0;
400         }
401         if (buffer_info->skb) {
402                 dev_kfree_skb_any(buffer_info->skb);
403                 buffer_info->skb = NULL;
404         }
405         buffer_info->time_stamp = 0;
406 }
407
408 /**
409  * igbvf_setup_tx_resources - allocate Tx resources (Descriptors)
410  * @adapter: board private structure
411  *
412  * Return 0 on success, negative on failure
413  **/
414 int igbvf_setup_tx_resources(struct igbvf_adapter *adapter,
415                              struct igbvf_ring *tx_ring)
416 {
417         struct pci_dev *pdev = adapter->pdev;
418         int size;
419
420         size = sizeof(struct igbvf_buffer) * tx_ring->count;
421         tx_ring->buffer_info = vzalloc(size);
422         if (!tx_ring->buffer_info)
423                 goto err;
424
425         /* round up to nearest 4K */
426         tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
427         tx_ring->size = ALIGN(tx_ring->size, 4096);
428
429         tx_ring->desc = dma_alloc_coherent(&pdev->dev, tx_ring->size,
430                                            &tx_ring->dma, GFP_KERNEL);
431         if (!tx_ring->desc)
432                 goto err;
433
434         tx_ring->adapter = adapter;
435         tx_ring->next_to_use = 0;
436         tx_ring->next_to_clean = 0;
437
438         return 0;
439 err:
440         vfree(tx_ring->buffer_info);
441         dev_err(&adapter->pdev->dev,
442                 "Unable to allocate memory for the transmit descriptor ring\n");
443         return -ENOMEM;
444 }
445
446 /**
447  * igbvf_setup_rx_resources - allocate Rx resources (Descriptors)
448  * @adapter: board private structure
449  *
450  * Returns 0 on success, negative on failure
451  **/
452 int igbvf_setup_rx_resources(struct igbvf_adapter *adapter,
453                              struct igbvf_ring *rx_ring)
454 {
455         struct pci_dev *pdev = adapter->pdev;
456         int size, desc_len;
457
458         size = sizeof(struct igbvf_buffer) * rx_ring->count;
459         rx_ring->buffer_info = vzalloc(size);
460         if (!rx_ring->buffer_info)
461                 goto err;
462
463         desc_len = sizeof(union e1000_adv_rx_desc);
464
465         /* Round up to nearest 4K */
466         rx_ring->size = rx_ring->count * desc_len;
467         rx_ring->size = ALIGN(rx_ring->size, 4096);
468
469         rx_ring->desc = dma_alloc_coherent(&pdev->dev, rx_ring->size,
470                                            &rx_ring->dma, GFP_KERNEL);
471         if (!rx_ring->desc)
472                 goto err;
473
474         rx_ring->next_to_clean = 0;
475         rx_ring->next_to_use = 0;
476
477         rx_ring->adapter = adapter;
478
479         return 0;
480
481 err:
482         vfree(rx_ring->buffer_info);
483         rx_ring->buffer_info = NULL;
484         dev_err(&adapter->pdev->dev,
485                 "Unable to allocate memory for the receive descriptor ring\n");
486         return -ENOMEM;
487 }
488
489 /**
490  * igbvf_clean_tx_ring - Free Tx Buffers
491  * @tx_ring: ring to be cleaned
492  **/
493 static void igbvf_clean_tx_ring(struct igbvf_ring *tx_ring)
494 {
495         struct igbvf_adapter *adapter = tx_ring->adapter;
496         struct igbvf_buffer *buffer_info;
497         unsigned long size;
498         unsigned int i;
499
500         if (!tx_ring->buffer_info)
501                 return;
502
503         /* Free all the Tx ring sk_buffs */
504         for (i = 0; i < tx_ring->count; i++) {
505                 buffer_info = &tx_ring->buffer_info[i];
506                 igbvf_put_txbuf(adapter, buffer_info);
507         }
508
509         size = sizeof(struct igbvf_buffer) * tx_ring->count;
510         memset(tx_ring->buffer_info, 0, size);
511
512         /* Zero out the descriptor ring */
513         memset(tx_ring->desc, 0, tx_ring->size);
514
515         tx_ring->next_to_use = 0;
516         tx_ring->next_to_clean = 0;
517
518         writel(0, adapter->hw.hw_addr + tx_ring->head);
519         writel(0, adapter->hw.hw_addr + tx_ring->tail);
520 }
521
522 /**
523  * igbvf_free_tx_resources - Free Tx Resources per Queue
524  * @tx_ring: ring to free resources from
525  *
526  * Free all transmit software resources
527  **/
528 void igbvf_free_tx_resources(struct igbvf_ring *tx_ring)
529 {
530         struct pci_dev *pdev = tx_ring->adapter->pdev;
531
532         igbvf_clean_tx_ring(tx_ring);
533
534         vfree(tx_ring->buffer_info);
535         tx_ring->buffer_info = NULL;
536
537         dma_free_coherent(&pdev->dev, tx_ring->size, tx_ring->desc,
538                           tx_ring->dma);
539
540         tx_ring->desc = NULL;
541 }
542
543 /**
544  * igbvf_clean_rx_ring - Free Rx Buffers per Queue
545  * @adapter: board private structure
546  **/
547 static void igbvf_clean_rx_ring(struct igbvf_ring *rx_ring)
548 {
549         struct igbvf_adapter *adapter = rx_ring->adapter;
550         struct igbvf_buffer *buffer_info;
551         struct pci_dev *pdev = adapter->pdev;
552         unsigned long size;
553         unsigned int i;
554
555         if (!rx_ring->buffer_info)
556                 return;
557
558         /* Free all the Rx ring sk_buffs */
559         for (i = 0; i < rx_ring->count; i++) {
560                 buffer_info = &rx_ring->buffer_info[i];
561                 if (buffer_info->dma) {
562                         if (adapter->rx_ps_hdr_size) {
563                                 dma_unmap_single(&pdev->dev, buffer_info->dma,
564                                                  adapter->rx_ps_hdr_size,
565                                                  DMA_FROM_DEVICE);
566                         } else {
567                                 dma_unmap_single(&pdev->dev, buffer_info->dma,
568                                                  adapter->rx_buffer_len,
569                                                  DMA_FROM_DEVICE);
570                         }
571                         buffer_info->dma = 0;
572                 }
573
574                 if (buffer_info->skb) {
575                         dev_kfree_skb(buffer_info->skb);
576                         buffer_info->skb = NULL;
577                 }
578
579                 if (buffer_info->page) {
580                         if (buffer_info->page_dma)
581                                 dma_unmap_page(&pdev->dev,
582                                                buffer_info->page_dma,
583                                                PAGE_SIZE / 2,
584                                                DMA_FROM_DEVICE);
585                         put_page(buffer_info->page);
586                         buffer_info->page = NULL;
587                         buffer_info->page_dma = 0;
588                         buffer_info->page_offset = 0;
589                 }
590         }
591
592         size = sizeof(struct igbvf_buffer) * rx_ring->count;
593         memset(rx_ring->buffer_info, 0, size);
594
595         /* Zero out the descriptor ring */
596         memset(rx_ring->desc, 0, rx_ring->size);
597
598         rx_ring->next_to_clean = 0;
599         rx_ring->next_to_use = 0;
600
601         writel(0, adapter->hw.hw_addr + rx_ring->head);
602         writel(0, adapter->hw.hw_addr + rx_ring->tail);
603 }
604
605 /**
606  * igbvf_free_rx_resources - Free Rx Resources
607  * @rx_ring: ring to clean the resources from
608  *
609  * Free all receive software resources
610  **/
611
612 void igbvf_free_rx_resources(struct igbvf_ring *rx_ring)
613 {
614         struct pci_dev *pdev = rx_ring->adapter->pdev;
615
616         igbvf_clean_rx_ring(rx_ring);
617
618         vfree(rx_ring->buffer_info);
619         rx_ring->buffer_info = NULL;
620
621         dma_free_coherent(&pdev->dev, rx_ring->size, rx_ring->desc,
622                           rx_ring->dma);
623         rx_ring->desc = NULL;
624 }
625
626 /**
627  * igbvf_update_itr - update the dynamic ITR value based on statistics
628  * @adapter: pointer to adapter
629  * @itr_setting: current adapter->itr
630  * @packets: the number of packets during this measurement interval
631  * @bytes: the number of bytes during this measurement interval
632  *
633  * Stores a new ITR value based on packets and byte counts during the last
634  * interrupt.  The advantage of per interrupt computation is faster updates
635  * and more accurate ITR for the current traffic pattern.  Constants in this
636  * function were computed based on theoretical maximum wire speed and thresholds
637  * were set based on testing data as well as attempting to minimize response
638  * time while increasing bulk throughput.
639  **/
640 static enum latency_range igbvf_update_itr(struct igbvf_adapter *adapter,
641                                            enum latency_range itr_setting,
642                                            int packets, int bytes)
643 {
644         enum latency_range retval = itr_setting;
645
646         if (packets == 0)
647                 goto update_itr_done;
648
649         switch (itr_setting) {
650         case lowest_latency:
651                 /* handle TSO and jumbo frames */
652                 if (bytes/packets > 8000)
653                         retval = bulk_latency;
654                 else if ((packets < 5) && (bytes > 512))
655                         retval = low_latency;
656                 break;
657         case low_latency:  /* 50 usec aka 20000 ints/s */
658                 if (bytes > 10000) {
659                         /* this if handles the TSO accounting */
660                         if (bytes/packets > 8000)
661                                 retval = bulk_latency;
662                         else if ((packets < 10) || ((bytes/packets) > 1200))
663                                 retval = bulk_latency;
664                         else if ((packets > 35))
665                                 retval = lowest_latency;
666                 } else if (bytes/packets > 2000) {
667                         retval = bulk_latency;
668                 } else if (packets <= 2 && bytes < 512) {
669                         retval = lowest_latency;
670                 }
671                 break;
672         case bulk_latency: /* 250 usec aka 4000 ints/s */
673                 if (bytes > 25000) {
674                         if (packets > 35)
675                                 retval = low_latency;
676                 } else if (bytes < 6000) {
677                         retval = low_latency;
678                 }
679                 break;
680         default:
681                 break;
682         }
683
684 update_itr_done:
685         return retval;
686 }
687
688 static int igbvf_range_to_itr(enum latency_range current_range)
689 {
690         int new_itr;
691
692         switch (current_range) {
693         /* counts and packets in update_itr are dependent on these numbers */
694         case lowest_latency:
695                 new_itr = IGBVF_70K_ITR;
696                 break;
697         case low_latency:
698                 new_itr = IGBVF_20K_ITR;
699                 break;
700         case bulk_latency:
701                 new_itr = IGBVF_4K_ITR;
702                 break;
703         default:
704                 new_itr = IGBVF_START_ITR;
705                 break;
706         }
707         return new_itr;
708 }
709
710 static void igbvf_set_itr(struct igbvf_adapter *adapter)
711 {
712         u32 new_itr;
713
714         adapter->tx_ring->itr_range =
715                         igbvf_update_itr(adapter,
716                                          adapter->tx_ring->itr_val,
717                                          adapter->total_tx_packets,
718                                          adapter->total_tx_bytes);
719
720         /* conservative mode (itr 3) eliminates the lowest_latency setting */
721         if (adapter->requested_itr == 3 &&
722             adapter->tx_ring->itr_range == lowest_latency)
723                 adapter->tx_ring->itr_range = low_latency;
724
725         new_itr = igbvf_range_to_itr(adapter->tx_ring->itr_range);
726
727         if (new_itr != adapter->tx_ring->itr_val) {
728                 u32 current_itr = adapter->tx_ring->itr_val;
729                 /* this attempts to bias the interrupt rate towards Bulk
730                  * by adding intermediate steps when interrupt rate is
731                  * increasing
732                  */
733                 new_itr = new_itr > current_itr ?
734                           min(current_itr + (new_itr >> 2), new_itr) :
735                           new_itr;
736                 adapter->tx_ring->itr_val = new_itr;
737
738                 adapter->tx_ring->set_itr = 1;
739         }
740
741         adapter->rx_ring->itr_range =
742                         igbvf_update_itr(adapter, adapter->rx_ring->itr_val,
743                                          adapter->total_rx_packets,
744                                          adapter->total_rx_bytes);
745         if (adapter->requested_itr == 3 &&
746             adapter->rx_ring->itr_range == lowest_latency)
747                 adapter->rx_ring->itr_range = low_latency;
748
749         new_itr = igbvf_range_to_itr(adapter->rx_ring->itr_range);
750
751         if (new_itr != adapter->rx_ring->itr_val) {
752                 u32 current_itr = adapter->rx_ring->itr_val;
753
754                 new_itr = new_itr > current_itr ?
755                           min(current_itr + (new_itr >> 2), new_itr) :
756                           new_itr;
757                 adapter->rx_ring->itr_val = new_itr;
758
759                 adapter->rx_ring->set_itr = 1;
760         }
761 }
762
763 /**
764  * igbvf_clean_tx_irq - Reclaim resources after transmit completes
765  * @adapter: board private structure
766  *
767  * returns true if ring is completely cleaned
768  **/
769 static bool igbvf_clean_tx_irq(struct igbvf_ring *tx_ring)
770 {
771         struct igbvf_adapter *adapter = tx_ring->adapter;
772         struct net_device *netdev = adapter->netdev;
773         struct igbvf_buffer *buffer_info;
774         struct sk_buff *skb;
775         union e1000_adv_tx_desc *tx_desc, *eop_desc;
776         unsigned int total_bytes = 0, total_packets = 0;
777         unsigned int i, count = 0;
778         bool cleaned = false;
779
780         i = tx_ring->next_to_clean;
781         buffer_info = &tx_ring->buffer_info[i];
782         eop_desc = buffer_info->next_to_watch;
783
784         do {
785                 /* if next_to_watch is not set then there is no work pending */
786                 if (!eop_desc)
787                         break;
788
789                 /* prevent any other reads prior to eop_desc */
790                 smp_rmb();
791
792                 /* if DD is not set pending work has not been completed */
793                 if (!(eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)))
794                         break;
795
796                 /* clear next_to_watch to prevent false hangs */
797                 buffer_info->next_to_watch = NULL;
798
799                 for (cleaned = false; !cleaned; count++) {
800                         tx_desc = IGBVF_TX_DESC_ADV(*tx_ring, i);
801                         cleaned = (tx_desc == eop_desc);
802                         skb = buffer_info->skb;
803
804                         if (skb) {
805                                 unsigned int segs, bytecount;
806
807                                 /* gso_segs is currently only valid for tcp */
808                                 segs = skb_shinfo(skb)->gso_segs ?: 1;
809                                 /* multiply data chunks by size of headers */
810                                 bytecount = ((segs - 1) * skb_headlen(skb)) +
811                                             skb->len;
812                                 total_packets += segs;
813                                 total_bytes += bytecount;
814                         }
815
816                         igbvf_put_txbuf(adapter, buffer_info);
817                         tx_desc->wb.status = 0;
818
819                         i++;
820                         if (i == tx_ring->count)
821                                 i = 0;
822
823                         buffer_info = &tx_ring->buffer_info[i];
824                 }
825
826                 eop_desc = buffer_info->next_to_watch;
827         } while (count < tx_ring->count);
828
829         tx_ring->next_to_clean = i;
830
831         if (unlikely(count && netif_carrier_ok(netdev) &&
832             igbvf_desc_unused(tx_ring) >= IGBVF_TX_QUEUE_WAKE)) {
833                 /* Make sure that anybody stopping the queue after this
834                  * sees the new next_to_clean.
835                  */
836                 smp_mb();
837                 if (netif_queue_stopped(netdev) &&
838                     !(test_bit(__IGBVF_DOWN, &adapter->state))) {
839                         netif_wake_queue(netdev);
840                         ++adapter->restart_queue;
841                 }
842         }
843
844         netdev->stats.tx_bytes += total_bytes;
845         netdev->stats.tx_packets += total_packets;
846         return count < tx_ring->count;
847 }
848
849 static irqreturn_t igbvf_msix_other(int irq, void *data)
850 {
851         struct net_device *netdev = data;
852         struct igbvf_adapter *adapter = netdev_priv(netdev);
853         struct e1000_hw *hw = &adapter->hw;
854
855         adapter->int_counter1++;
856
857         hw->mac.get_link_status = 1;
858         if (!test_bit(__IGBVF_DOWN, &adapter->state))
859                 mod_timer(&adapter->watchdog_timer, jiffies + 1);
860
861         ew32(EIMS, adapter->eims_other);
862
863         return IRQ_HANDLED;
864 }
865
866 static irqreturn_t igbvf_intr_msix_tx(int irq, void *data)
867 {
868         struct net_device *netdev = data;
869         struct igbvf_adapter *adapter = netdev_priv(netdev);
870         struct e1000_hw *hw = &adapter->hw;
871         struct igbvf_ring *tx_ring = adapter->tx_ring;
872
873         if (tx_ring->set_itr) {
874                 writel(tx_ring->itr_val,
875                        adapter->hw.hw_addr + tx_ring->itr_register);
876                 adapter->tx_ring->set_itr = 0;
877         }
878
879         adapter->total_tx_bytes = 0;
880         adapter->total_tx_packets = 0;
881
882         /* auto mask will automatically re-enable the interrupt when we write
883          * EICS
884          */
885         if (!igbvf_clean_tx_irq(tx_ring))
886                 /* Ring was not completely cleaned, so fire another interrupt */
887                 ew32(EICS, tx_ring->eims_value);
888         else
889                 ew32(EIMS, tx_ring->eims_value);
890
891         return IRQ_HANDLED;
892 }
893
894 static irqreturn_t igbvf_intr_msix_rx(int irq, void *data)
895 {
896         struct net_device *netdev = data;
897         struct igbvf_adapter *adapter = netdev_priv(netdev);
898
899         adapter->int_counter0++;
900
901         /* Write the ITR value calculated at the end of the
902          * previous interrupt.
903          */
904         if (adapter->rx_ring->set_itr) {
905                 writel(adapter->rx_ring->itr_val,
906                        adapter->hw.hw_addr + adapter->rx_ring->itr_register);
907                 adapter->rx_ring->set_itr = 0;
908         }
909
910         if (napi_schedule_prep(&adapter->rx_ring->napi)) {
911                 adapter->total_rx_bytes = 0;
912                 adapter->total_rx_packets = 0;
913                 __napi_schedule(&adapter->rx_ring->napi);
914         }
915
916         return IRQ_HANDLED;
917 }
918
919 #define IGBVF_NO_QUEUE -1
920
921 static void igbvf_assign_vector(struct igbvf_adapter *adapter, int rx_queue,
922                                 int tx_queue, int msix_vector)
923 {
924         struct e1000_hw *hw = &adapter->hw;
925         u32 ivar, index;
926
927         /* 82576 uses a table-based method for assigning vectors.
928          * Each queue has a single entry in the table to which we write
929          * a vector number along with a "valid" bit.  Sadly, the layout
930          * of the table is somewhat counterintuitive.
931          */
932         if (rx_queue > IGBVF_NO_QUEUE) {
933                 index = (rx_queue >> 1);
934                 ivar = array_er32(IVAR0, index);
935                 if (rx_queue & 0x1) {
936                         /* vector goes into third byte of register */
937                         ivar = ivar & 0xFF00FFFF;
938                         ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
939                 } else {
940                         /* vector goes into low byte of register */
941                         ivar = ivar & 0xFFFFFF00;
942                         ivar |= msix_vector | E1000_IVAR_VALID;
943                 }
944                 adapter->rx_ring[rx_queue].eims_value = BIT(msix_vector);
945                 array_ew32(IVAR0, index, ivar);
946         }
947         if (tx_queue > IGBVF_NO_QUEUE) {
948                 index = (tx_queue >> 1);
949                 ivar = array_er32(IVAR0, index);
950                 if (tx_queue & 0x1) {
951                         /* vector goes into high byte of register */
952                         ivar = ivar & 0x00FFFFFF;
953                         ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
954                 } else {
955                         /* vector goes into second byte of register */
956                         ivar = ivar & 0xFFFF00FF;
957                         ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
958                 }
959                 adapter->tx_ring[tx_queue].eims_value = BIT(msix_vector);
960                 array_ew32(IVAR0, index, ivar);
961         }
962 }
963
964 /**
965  * igbvf_configure_msix - Configure MSI-X hardware
966  * @adapter: board private structure
967  *
968  * igbvf_configure_msix sets up the hardware to properly
969  * generate MSI-X interrupts.
970  **/
971 static void igbvf_configure_msix(struct igbvf_adapter *adapter)
972 {
973         u32 tmp;
974         struct e1000_hw *hw = &adapter->hw;
975         struct igbvf_ring *tx_ring = adapter->tx_ring;
976         struct igbvf_ring *rx_ring = adapter->rx_ring;
977         int vector = 0;
978
979         adapter->eims_enable_mask = 0;
980
981         igbvf_assign_vector(adapter, IGBVF_NO_QUEUE, 0, vector++);
982         adapter->eims_enable_mask |= tx_ring->eims_value;
983         writel(tx_ring->itr_val, hw->hw_addr + tx_ring->itr_register);
984         igbvf_assign_vector(adapter, 0, IGBVF_NO_QUEUE, vector++);
985         adapter->eims_enable_mask |= rx_ring->eims_value;
986         writel(rx_ring->itr_val, hw->hw_addr + rx_ring->itr_register);
987
988         /* set vector for other causes, i.e. link changes */
989
990         tmp = (vector++ | E1000_IVAR_VALID);
991
992         ew32(IVAR_MISC, tmp);
993
994         adapter->eims_enable_mask = GENMASK(vector - 1, 0);
995         adapter->eims_other = BIT(vector - 1);
996         e1e_flush();
997 }
998
999 static void igbvf_reset_interrupt_capability(struct igbvf_adapter *adapter)
1000 {
1001         if (adapter->msix_entries) {
1002                 pci_disable_msix(adapter->pdev);
1003                 kfree(adapter->msix_entries);
1004                 adapter->msix_entries = NULL;
1005         }
1006 }
1007
1008 /**
1009  * igbvf_set_interrupt_capability - set MSI or MSI-X if supported
1010  * @adapter: board private structure
1011  *
1012  * Attempt to configure interrupts using the best available
1013  * capabilities of the hardware and kernel.
1014  **/
1015 static void igbvf_set_interrupt_capability(struct igbvf_adapter *adapter)
1016 {
1017         int err = -ENOMEM;
1018         int i;
1019
1020         /* we allocate 3 vectors, 1 for Tx, 1 for Rx, one for PF messages */
1021         adapter->msix_entries = kcalloc(3, sizeof(struct msix_entry),
1022                                         GFP_KERNEL);
1023         if (adapter->msix_entries) {
1024                 for (i = 0; i < 3; i++)
1025                         adapter->msix_entries[i].entry = i;
1026
1027                 err = pci_enable_msix_range(adapter->pdev,
1028                                             adapter->msix_entries, 3, 3);
1029         }
1030
1031         if (err < 0) {
1032                 /* MSI-X failed */
1033                 dev_err(&adapter->pdev->dev,
1034                         "Failed to initialize MSI-X interrupts.\n");
1035                 igbvf_reset_interrupt_capability(adapter);
1036         }
1037 }
1038
1039 /**
1040  * igbvf_request_msix - Initialize MSI-X interrupts
1041  * @adapter: board private structure
1042  *
1043  * igbvf_request_msix allocates MSI-X vectors and requests interrupts from the
1044  * kernel.
1045  **/
1046 static int igbvf_request_msix(struct igbvf_adapter *adapter)
1047 {
1048         struct net_device *netdev = adapter->netdev;
1049         int err = 0, vector = 0;
1050
1051         if (strlen(netdev->name) < (IFNAMSIZ - 5)) {
1052                 sprintf(adapter->tx_ring->name, "%s-tx-0", netdev->name);
1053                 sprintf(adapter->rx_ring->name, "%s-rx-0", netdev->name);
1054         } else {
1055                 memcpy(adapter->tx_ring->name, netdev->name, IFNAMSIZ);
1056                 memcpy(adapter->rx_ring->name, netdev->name, IFNAMSIZ);
1057         }
1058
1059         err = request_irq(adapter->msix_entries[vector].vector,
1060                           igbvf_intr_msix_tx, 0, adapter->tx_ring->name,
1061                           netdev);
1062         if (err)
1063                 goto out;
1064
1065         adapter->tx_ring->itr_register = E1000_EITR(vector);
1066         adapter->tx_ring->itr_val = adapter->current_itr;
1067         vector++;
1068
1069         err = request_irq(adapter->msix_entries[vector].vector,
1070                           igbvf_intr_msix_rx, 0, adapter->rx_ring->name,
1071                           netdev);
1072         if (err)
1073                 goto out;
1074
1075         adapter->rx_ring->itr_register = E1000_EITR(vector);
1076         adapter->rx_ring->itr_val = adapter->current_itr;
1077         vector++;
1078
1079         err = request_irq(adapter->msix_entries[vector].vector,
1080                           igbvf_msix_other, 0, netdev->name, netdev);
1081         if (err)
1082                 goto out;
1083
1084         igbvf_configure_msix(adapter);
1085         return 0;
1086 out:
1087         return err;
1088 }
1089
1090 /**
1091  * igbvf_alloc_queues - Allocate memory for all rings
1092  * @adapter: board private structure to initialize
1093  **/
1094 static int igbvf_alloc_queues(struct igbvf_adapter *adapter)
1095 {
1096         struct net_device *netdev = adapter->netdev;
1097
1098         adapter->tx_ring = kzalloc(sizeof(struct igbvf_ring), GFP_KERNEL);
1099         if (!adapter->tx_ring)
1100                 return -ENOMEM;
1101
1102         adapter->rx_ring = kzalloc(sizeof(struct igbvf_ring), GFP_KERNEL);
1103         if (!adapter->rx_ring) {
1104                 kfree(adapter->tx_ring);
1105                 return -ENOMEM;
1106         }
1107
1108         netif_napi_add(netdev, &adapter->rx_ring->napi, igbvf_poll, 64);
1109
1110         return 0;
1111 }
1112
1113 /**
1114  * igbvf_request_irq - initialize interrupts
1115  * @adapter: board private structure
1116  *
1117  * Attempts to configure interrupts using the best available
1118  * capabilities of the hardware and kernel.
1119  **/
1120 static int igbvf_request_irq(struct igbvf_adapter *adapter)
1121 {
1122         int err = -1;
1123
1124         /* igbvf supports msi-x only */
1125         if (adapter->msix_entries)
1126                 err = igbvf_request_msix(adapter);
1127
1128         if (!err)
1129                 return err;
1130
1131         dev_err(&adapter->pdev->dev,
1132                 "Unable to allocate interrupt, Error: %d\n", err);
1133
1134         return err;
1135 }
1136
1137 static void igbvf_free_irq(struct igbvf_adapter *adapter)
1138 {
1139         struct net_device *netdev = adapter->netdev;
1140         int vector;
1141
1142         if (adapter->msix_entries) {
1143                 for (vector = 0; vector < 3; vector++)
1144                         free_irq(adapter->msix_entries[vector].vector, netdev);
1145         }
1146 }
1147
1148 /**
1149  * igbvf_irq_disable - Mask off interrupt generation on the NIC
1150  * @adapter: board private structure
1151  **/
1152 static void igbvf_irq_disable(struct igbvf_adapter *adapter)
1153 {
1154         struct e1000_hw *hw = &adapter->hw;
1155
1156         ew32(EIMC, ~0);
1157
1158         if (adapter->msix_entries)
1159                 ew32(EIAC, 0);
1160 }
1161
1162 /**
1163  * igbvf_irq_enable - Enable default interrupt generation settings
1164  * @adapter: board private structure
1165  **/
1166 static void igbvf_irq_enable(struct igbvf_adapter *adapter)
1167 {
1168         struct e1000_hw *hw = &adapter->hw;
1169
1170         ew32(EIAC, adapter->eims_enable_mask);
1171         ew32(EIAM, adapter->eims_enable_mask);
1172         ew32(EIMS, adapter->eims_enable_mask);
1173 }
1174
1175 /**
1176  * igbvf_poll - NAPI Rx polling callback
1177  * @napi: struct associated with this polling callback
1178  * @budget: amount of packets driver is allowed to process this poll
1179  **/
1180 static int igbvf_poll(struct napi_struct *napi, int budget)
1181 {
1182         struct igbvf_ring *rx_ring = container_of(napi, struct igbvf_ring, napi);
1183         struct igbvf_adapter *adapter = rx_ring->adapter;
1184         struct e1000_hw *hw = &adapter->hw;
1185         int work_done = 0;
1186
1187         igbvf_clean_rx_irq(adapter, &work_done, budget);
1188
1189         if (work_done == budget)
1190                 return budget;
1191
1192         /* Exit the polling mode, but don't re-enable interrupts if stack might
1193          * poll us due to busy-polling
1194          */
1195         if (likely(napi_complete_done(napi, work_done))) {
1196                 if (adapter->requested_itr & 3)
1197                         igbvf_set_itr(adapter);
1198
1199                 if (!test_bit(__IGBVF_DOWN, &adapter->state))
1200                         ew32(EIMS, adapter->rx_ring->eims_value);
1201         }
1202
1203         return work_done;
1204 }
1205
1206 /**
1207  * igbvf_set_rlpml - set receive large packet maximum length
1208  * @adapter: board private structure
1209  *
1210  * Configure the maximum size of packets that will be received
1211  */
1212 static void igbvf_set_rlpml(struct igbvf_adapter *adapter)
1213 {
1214         int max_frame_size;
1215         struct e1000_hw *hw = &adapter->hw;
1216
1217         max_frame_size = adapter->max_frame_size + VLAN_TAG_SIZE;
1218
1219         spin_lock_bh(&hw->mbx_lock);
1220
1221         e1000_rlpml_set_vf(hw, max_frame_size);
1222
1223         spin_unlock_bh(&hw->mbx_lock);
1224 }
1225
1226 static int igbvf_vlan_rx_add_vid(struct net_device *netdev,
1227                                  __be16 proto, u16 vid)
1228 {
1229         struct igbvf_adapter *adapter = netdev_priv(netdev);
1230         struct e1000_hw *hw = &adapter->hw;
1231
1232         spin_lock_bh(&hw->mbx_lock);
1233
1234         if (hw->mac.ops.set_vfta(hw, vid, true)) {
1235                 dev_err(&adapter->pdev->dev, "Failed to add vlan id %d\n", vid);
1236                 spin_unlock_bh(&hw->mbx_lock);
1237                 return -EINVAL;
1238         }
1239
1240         spin_unlock_bh(&hw->mbx_lock);
1241
1242         set_bit(vid, adapter->active_vlans);
1243         return 0;
1244 }
1245
1246 static int igbvf_vlan_rx_kill_vid(struct net_device *netdev,
1247                                   __be16 proto, u16 vid)
1248 {
1249         struct igbvf_adapter *adapter = netdev_priv(netdev);
1250         struct e1000_hw *hw = &adapter->hw;
1251
1252         spin_lock_bh(&hw->mbx_lock);
1253
1254         if (hw->mac.ops.set_vfta(hw, vid, false)) {
1255                 dev_err(&adapter->pdev->dev,
1256                         "Failed to remove vlan id %d\n", vid);
1257                 spin_unlock_bh(&hw->mbx_lock);
1258                 return -EINVAL;
1259         }
1260
1261         spin_unlock_bh(&hw->mbx_lock);
1262
1263         clear_bit(vid, adapter->active_vlans);
1264         return 0;
1265 }
1266
1267 static void igbvf_restore_vlan(struct igbvf_adapter *adapter)
1268 {
1269         u16 vid;
1270
1271         for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID)
1272                 igbvf_vlan_rx_add_vid(adapter->netdev, htons(ETH_P_8021Q), vid);
1273 }
1274
1275 /**
1276  * igbvf_configure_tx - Configure Transmit Unit after Reset
1277  * @adapter: board private structure
1278  *
1279  * Configure the Tx unit of the MAC after a reset.
1280  **/
1281 static void igbvf_configure_tx(struct igbvf_adapter *adapter)
1282 {
1283         struct e1000_hw *hw = &adapter->hw;
1284         struct igbvf_ring *tx_ring = adapter->tx_ring;
1285         u64 tdba;
1286         u32 txdctl, dca_txctrl;
1287
1288         /* disable transmits */
1289         txdctl = er32(TXDCTL(0));
1290         ew32(TXDCTL(0), txdctl & ~E1000_TXDCTL_QUEUE_ENABLE);
1291         e1e_flush();
1292         msleep(10);
1293
1294         /* Setup the HW Tx Head and Tail descriptor pointers */
1295         ew32(TDLEN(0), tx_ring->count * sizeof(union e1000_adv_tx_desc));
1296         tdba = tx_ring->dma;
1297         ew32(TDBAL(0), (tdba & DMA_BIT_MASK(32)));
1298         ew32(TDBAH(0), (tdba >> 32));
1299         ew32(TDH(0), 0);
1300         ew32(TDT(0), 0);
1301         tx_ring->head = E1000_TDH(0);
1302         tx_ring->tail = E1000_TDT(0);
1303
1304         /* Turn off Relaxed Ordering on head write-backs.  The writebacks
1305          * MUST be delivered in order or it will completely screw up
1306          * our bookkeeping.
1307          */
1308         dca_txctrl = er32(DCA_TXCTRL(0));
1309         dca_txctrl &= ~E1000_DCA_TXCTRL_TX_WB_RO_EN;
1310         ew32(DCA_TXCTRL(0), dca_txctrl);
1311
1312         /* enable transmits */
1313         txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
1314         ew32(TXDCTL(0), txdctl);
1315
1316         /* Setup Transmit Descriptor Settings for eop descriptor */
1317         adapter->txd_cmd = E1000_ADVTXD_DCMD_EOP | E1000_ADVTXD_DCMD_IFCS;
1318
1319         /* enable Report Status bit */
1320         adapter->txd_cmd |= E1000_ADVTXD_DCMD_RS;
1321 }
1322
1323 /**
1324  * igbvf_setup_srrctl - configure the receive control registers
1325  * @adapter: Board private structure
1326  **/
1327 static void igbvf_setup_srrctl(struct igbvf_adapter *adapter)
1328 {
1329         struct e1000_hw *hw = &adapter->hw;
1330         u32 srrctl = 0;
1331
1332         srrctl &= ~(E1000_SRRCTL_DESCTYPE_MASK |
1333                     E1000_SRRCTL_BSIZEHDR_MASK |
1334                     E1000_SRRCTL_BSIZEPKT_MASK);
1335
1336         /* Enable queue drop to avoid head of line blocking */
1337         srrctl |= E1000_SRRCTL_DROP_EN;
1338
1339         /* Setup buffer sizes */
1340         srrctl |= ALIGN(adapter->rx_buffer_len, 1024) >>
1341                   E1000_SRRCTL_BSIZEPKT_SHIFT;
1342
1343         if (adapter->rx_buffer_len < 2048) {
1344                 adapter->rx_ps_hdr_size = 0;
1345                 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
1346         } else {
1347                 adapter->rx_ps_hdr_size = 128;
1348                 srrctl |= adapter->rx_ps_hdr_size <<
1349                           E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
1350                 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
1351         }
1352
1353         ew32(SRRCTL(0), srrctl);
1354 }
1355
1356 /**
1357  * igbvf_configure_rx - Configure Receive Unit after Reset
1358  * @adapter: board private structure
1359  *
1360  * Configure the Rx unit of the MAC after a reset.
1361  **/
1362 static void igbvf_configure_rx(struct igbvf_adapter *adapter)
1363 {
1364         struct e1000_hw *hw = &adapter->hw;
1365         struct igbvf_ring *rx_ring = adapter->rx_ring;
1366         u64 rdba;
1367         u32 rxdctl;
1368
1369         /* disable receives */
1370         rxdctl = er32(RXDCTL(0));
1371         ew32(RXDCTL(0), rxdctl & ~E1000_RXDCTL_QUEUE_ENABLE);
1372         e1e_flush();
1373         msleep(10);
1374
1375         /* Setup the HW Rx Head and Tail Descriptor Pointers and
1376          * the Base and Length of the Rx Descriptor Ring
1377          */
1378         rdba = rx_ring->dma;
1379         ew32(RDBAL(0), (rdba & DMA_BIT_MASK(32)));
1380         ew32(RDBAH(0), (rdba >> 32));
1381         ew32(RDLEN(0), rx_ring->count * sizeof(union e1000_adv_rx_desc));
1382         rx_ring->head = E1000_RDH(0);
1383         rx_ring->tail = E1000_RDT(0);
1384         ew32(RDH(0), 0);
1385         ew32(RDT(0), 0);
1386
1387         rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
1388         rxdctl &= 0xFFF00000;
1389         rxdctl |= IGBVF_RX_PTHRESH;
1390         rxdctl |= IGBVF_RX_HTHRESH << 8;
1391         rxdctl |= IGBVF_RX_WTHRESH << 16;
1392
1393         igbvf_set_rlpml(adapter);
1394
1395         /* enable receives */
1396         ew32(RXDCTL(0), rxdctl);
1397 }
1398
1399 /**
1400  * igbvf_set_multi - Multicast and Promiscuous mode set
1401  * @netdev: network interface device structure
1402  *
1403  * The set_multi entry point is called whenever the multicast address
1404  * list or the network interface flags are updated.  This routine is
1405  * responsible for configuring the hardware for proper multicast,
1406  * promiscuous mode, and all-multi behavior.
1407  **/
1408 static void igbvf_set_multi(struct net_device *netdev)
1409 {
1410         struct igbvf_adapter *adapter = netdev_priv(netdev);
1411         struct e1000_hw *hw = &adapter->hw;
1412         struct netdev_hw_addr *ha;
1413         u8  *mta_list = NULL;
1414         int i;
1415
1416         if (!netdev_mc_empty(netdev)) {
1417                 mta_list = kmalloc_array(netdev_mc_count(netdev), ETH_ALEN,
1418                                          GFP_ATOMIC);
1419                 if (!mta_list)
1420                         return;
1421         }
1422
1423         /* prepare a packed array of only addresses. */
1424         i = 0;
1425         netdev_for_each_mc_addr(ha, netdev)
1426                 memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
1427
1428         spin_lock_bh(&hw->mbx_lock);
1429
1430         hw->mac.ops.update_mc_addr_list(hw, mta_list, i, 0, 0);
1431
1432         spin_unlock_bh(&hw->mbx_lock);
1433         kfree(mta_list);
1434 }
1435
1436 /**
1437  * igbvf_set_uni - Configure unicast MAC filters
1438  * @netdev: network interface device structure
1439  *
1440  * This routine is responsible for configuring the hardware for proper
1441  * unicast filters.
1442  **/
1443 static int igbvf_set_uni(struct net_device *netdev)
1444 {
1445         struct igbvf_adapter *adapter = netdev_priv(netdev);
1446         struct e1000_hw *hw = &adapter->hw;
1447
1448         if (netdev_uc_count(netdev) > IGBVF_MAX_MAC_FILTERS) {
1449                 pr_err("Too many unicast filters - No Space\n");
1450                 return -ENOSPC;
1451         }
1452
1453         spin_lock_bh(&hw->mbx_lock);
1454
1455         /* Clear all unicast MAC filters */
1456         hw->mac.ops.set_uc_addr(hw, E1000_VF_MAC_FILTER_CLR, NULL);
1457
1458         spin_unlock_bh(&hw->mbx_lock);
1459
1460         if (!netdev_uc_empty(netdev)) {
1461                 struct netdev_hw_addr *ha;
1462
1463                 /* Add MAC filters one by one */
1464                 netdev_for_each_uc_addr(ha, netdev) {
1465                         spin_lock_bh(&hw->mbx_lock);
1466
1467                         hw->mac.ops.set_uc_addr(hw, E1000_VF_MAC_FILTER_ADD,
1468                                                 ha->addr);
1469
1470                         spin_unlock_bh(&hw->mbx_lock);
1471                         udelay(200);
1472                 }
1473         }
1474
1475         return 0;
1476 }
1477
1478 static void igbvf_set_rx_mode(struct net_device *netdev)
1479 {
1480         igbvf_set_multi(netdev);
1481         igbvf_set_uni(netdev);
1482 }
1483
1484 /**
1485  * igbvf_configure - configure the hardware for Rx and Tx
1486  * @adapter: private board structure
1487  **/
1488 static void igbvf_configure(struct igbvf_adapter *adapter)
1489 {
1490         igbvf_set_rx_mode(adapter->netdev);
1491
1492         igbvf_restore_vlan(adapter);
1493
1494         igbvf_configure_tx(adapter);
1495         igbvf_setup_srrctl(adapter);
1496         igbvf_configure_rx(adapter);
1497         igbvf_alloc_rx_buffers(adapter->rx_ring,
1498                                igbvf_desc_unused(adapter->rx_ring));
1499 }
1500
1501 /* igbvf_reset - bring the hardware into a known good state
1502  * @adapter: private board structure
1503  *
1504  * This function boots the hardware and enables some settings that
1505  * require a configuration cycle of the hardware - those cannot be
1506  * set/changed during runtime. After reset the device needs to be
1507  * properly configured for Rx, Tx etc.
1508  */
1509 static void igbvf_reset(struct igbvf_adapter *adapter)
1510 {
1511         struct e1000_mac_info *mac = &adapter->hw.mac;
1512         struct net_device *netdev = adapter->netdev;
1513         struct e1000_hw *hw = &adapter->hw;
1514
1515         spin_lock_bh(&hw->mbx_lock);
1516
1517         /* Allow time for pending master requests to run */
1518         if (mac->ops.reset_hw(hw))
1519                 dev_err(&adapter->pdev->dev, "PF still resetting\n");
1520
1521         mac->ops.init_hw(hw);
1522
1523         spin_unlock_bh(&hw->mbx_lock);
1524
1525         if (is_valid_ether_addr(adapter->hw.mac.addr)) {
1526                 memcpy(netdev->dev_addr, adapter->hw.mac.addr,
1527                        netdev->addr_len);
1528                 memcpy(netdev->perm_addr, adapter->hw.mac.addr,
1529                        netdev->addr_len);
1530         }
1531
1532         adapter->last_reset = jiffies;
1533 }
1534
1535 int igbvf_up(struct igbvf_adapter *adapter)
1536 {
1537         struct e1000_hw *hw = &adapter->hw;
1538
1539         /* hardware has been reset, we need to reload some things */
1540         igbvf_configure(adapter);
1541
1542         clear_bit(__IGBVF_DOWN, &adapter->state);
1543
1544         napi_enable(&adapter->rx_ring->napi);
1545         if (adapter->msix_entries)
1546                 igbvf_configure_msix(adapter);
1547
1548         /* Clear any pending interrupts. */
1549         er32(EICR);
1550         igbvf_irq_enable(adapter);
1551
1552         /* start the watchdog */
1553         hw->mac.get_link_status = 1;
1554         mod_timer(&adapter->watchdog_timer, jiffies + 1);
1555
1556         return 0;
1557 }
1558
1559 void igbvf_down(struct igbvf_adapter *adapter)
1560 {
1561         struct net_device *netdev = adapter->netdev;
1562         struct e1000_hw *hw = &adapter->hw;
1563         u32 rxdctl, txdctl;
1564
1565         /* signal that we're down so the interrupt handler does not
1566          * reschedule our watchdog timer
1567          */
1568         set_bit(__IGBVF_DOWN, &adapter->state);
1569
1570         /* disable receives in the hardware */
1571         rxdctl = er32(RXDCTL(0));
1572         ew32(RXDCTL(0), rxdctl & ~E1000_RXDCTL_QUEUE_ENABLE);
1573
1574         netif_carrier_off(netdev);
1575         netif_stop_queue(netdev);
1576
1577         /* disable transmits in the hardware */
1578         txdctl = er32(TXDCTL(0));
1579         ew32(TXDCTL(0), txdctl & ~E1000_TXDCTL_QUEUE_ENABLE);
1580
1581         /* flush both disables and wait for them to finish */
1582         e1e_flush();
1583         msleep(10);
1584
1585         napi_disable(&adapter->rx_ring->napi);
1586
1587         igbvf_irq_disable(adapter);
1588
1589         del_timer_sync(&adapter->watchdog_timer);
1590
1591         /* record the stats before reset*/
1592         igbvf_update_stats(adapter);
1593
1594         adapter->link_speed = 0;
1595         adapter->link_duplex = 0;
1596
1597         igbvf_reset(adapter);
1598         igbvf_clean_tx_ring(adapter->tx_ring);
1599         igbvf_clean_rx_ring(adapter->rx_ring);
1600 }
1601
1602 void igbvf_reinit_locked(struct igbvf_adapter *adapter)
1603 {
1604         might_sleep();
1605         while (test_and_set_bit(__IGBVF_RESETTING, &adapter->state))
1606                 usleep_range(1000, 2000);
1607         igbvf_down(adapter);
1608         igbvf_up(adapter);
1609         clear_bit(__IGBVF_RESETTING, &adapter->state);
1610 }
1611
1612 /**
1613  * igbvf_sw_init - Initialize general software structures (struct igbvf_adapter)
1614  * @adapter: board private structure to initialize
1615  *
1616  * igbvf_sw_init initializes the Adapter private data structure.
1617  * Fields are initialized based on PCI device information and
1618  * OS network device settings (MTU size).
1619  **/
1620 static int igbvf_sw_init(struct igbvf_adapter *adapter)
1621 {
1622         struct net_device *netdev = adapter->netdev;
1623         s32 rc;
1624
1625         adapter->rx_buffer_len = ETH_FRAME_LEN + VLAN_HLEN + ETH_FCS_LEN;
1626         adapter->rx_ps_hdr_size = 0;
1627         adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
1628         adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
1629
1630         adapter->tx_int_delay = 8;
1631         adapter->tx_abs_int_delay = 32;
1632         adapter->rx_int_delay = 0;
1633         adapter->rx_abs_int_delay = 8;
1634         adapter->requested_itr = 3;
1635         adapter->current_itr = IGBVF_START_ITR;
1636
1637         /* Set various function pointers */
1638         adapter->ei->init_ops(&adapter->hw);
1639
1640         rc = adapter->hw.mac.ops.init_params(&adapter->hw);
1641         if (rc)
1642                 return rc;
1643
1644         rc = adapter->hw.mbx.ops.init_params(&adapter->hw);
1645         if (rc)
1646                 return rc;
1647
1648         igbvf_set_interrupt_capability(adapter);
1649
1650         if (igbvf_alloc_queues(adapter))
1651                 return -ENOMEM;
1652
1653         spin_lock_init(&adapter->tx_queue_lock);
1654
1655         /* Explicitly disable IRQ since the NIC can be in any state. */
1656         igbvf_irq_disable(adapter);
1657
1658         spin_lock_init(&adapter->stats_lock);
1659         spin_lock_init(&adapter->hw.mbx_lock);
1660
1661         set_bit(__IGBVF_DOWN, &adapter->state);
1662         return 0;
1663 }
1664
1665 static void igbvf_initialize_last_counter_stats(struct igbvf_adapter *adapter)
1666 {
1667         struct e1000_hw *hw = &adapter->hw;
1668
1669         adapter->stats.last_gprc = er32(VFGPRC);
1670         adapter->stats.last_gorc = er32(VFGORC);
1671         adapter->stats.last_gptc = er32(VFGPTC);
1672         adapter->stats.last_gotc = er32(VFGOTC);
1673         adapter->stats.last_mprc = er32(VFMPRC);
1674         adapter->stats.last_gotlbc = er32(VFGOTLBC);
1675         adapter->stats.last_gptlbc = er32(VFGPTLBC);
1676         adapter->stats.last_gorlbc = er32(VFGORLBC);
1677         adapter->stats.last_gprlbc = er32(VFGPRLBC);
1678
1679         adapter->stats.base_gprc = er32(VFGPRC);
1680         adapter->stats.base_gorc = er32(VFGORC);
1681         adapter->stats.base_gptc = er32(VFGPTC);
1682         adapter->stats.base_gotc = er32(VFGOTC);
1683         adapter->stats.base_mprc = er32(VFMPRC);
1684         adapter->stats.base_gotlbc = er32(VFGOTLBC);
1685         adapter->stats.base_gptlbc = er32(VFGPTLBC);
1686         adapter->stats.base_gorlbc = er32(VFGORLBC);
1687         adapter->stats.base_gprlbc = er32(VFGPRLBC);
1688 }
1689
1690 /**
1691  * igbvf_open - Called when a network interface is made active
1692  * @netdev: network interface device structure
1693  *
1694  * Returns 0 on success, negative value on failure
1695  *
1696  * The open entry point is called when a network interface is made
1697  * active by the system (IFF_UP).  At this point all resources needed
1698  * for transmit and receive operations are allocated, the interrupt
1699  * handler is registered with the OS, the watchdog timer is started,
1700  * and the stack is notified that the interface is ready.
1701  **/
1702 static int igbvf_open(struct net_device *netdev)
1703 {
1704         struct igbvf_adapter *adapter = netdev_priv(netdev);
1705         struct e1000_hw *hw = &adapter->hw;
1706         int err;
1707
1708         /* disallow open during test */
1709         if (test_bit(__IGBVF_TESTING, &adapter->state))
1710                 return -EBUSY;
1711
1712         /* allocate transmit descriptors */
1713         err = igbvf_setup_tx_resources(adapter, adapter->tx_ring);
1714         if (err)
1715                 goto err_setup_tx;
1716
1717         /* allocate receive descriptors */
1718         err = igbvf_setup_rx_resources(adapter, adapter->rx_ring);
1719         if (err)
1720                 goto err_setup_rx;
1721
1722         /* before we allocate an interrupt, we must be ready to handle it.
1723          * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
1724          * as soon as we call pci_request_irq, so we have to setup our
1725          * clean_rx handler before we do so.
1726          */
1727         igbvf_configure(adapter);
1728
1729         err = igbvf_request_irq(adapter);
1730         if (err)
1731                 goto err_req_irq;
1732
1733         /* From here on the code is the same as igbvf_up() */
1734         clear_bit(__IGBVF_DOWN, &adapter->state);
1735
1736         napi_enable(&adapter->rx_ring->napi);
1737
1738         /* clear any pending interrupts */
1739         er32(EICR);
1740
1741         igbvf_irq_enable(adapter);
1742
1743         /* start the watchdog */
1744         hw->mac.get_link_status = 1;
1745         mod_timer(&adapter->watchdog_timer, jiffies + 1);
1746
1747         return 0;
1748
1749 err_req_irq:
1750         igbvf_free_rx_resources(adapter->rx_ring);
1751 err_setup_rx:
1752         igbvf_free_tx_resources(adapter->tx_ring);
1753 err_setup_tx:
1754         igbvf_reset(adapter);
1755
1756         return err;
1757 }
1758
1759 /**
1760  * igbvf_close - Disables a network interface
1761  * @netdev: network interface device structure
1762  *
1763  * Returns 0, this is not allowed to fail
1764  *
1765  * The close entry point is called when an interface is de-activated
1766  * by the OS.  The hardware is still under the drivers control, but
1767  * needs to be disabled.  A global MAC reset is issued to stop the
1768  * hardware, and all transmit and receive resources are freed.
1769  **/
1770 static int igbvf_close(struct net_device *netdev)
1771 {
1772         struct igbvf_adapter *adapter = netdev_priv(netdev);
1773
1774         WARN_ON(test_bit(__IGBVF_RESETTING, &adapter->state));
1775         igbvf_down(adapter);
1776
1777         igbvf_free_irq(adapter);
1778
1779         igbvf_free_tx_resources(adapter->tx_ring);
1780         igbvf_free_rx_resources(adapter->rx_ring);
1781
1782         return 0;
1783 }
1784
1785 /**
1786  * igbvf_set_mac - Change the Ethernet Address of the NIC
1787  * @netdev: network interface device structure
1788  * @p: pointer to an address structure
1789  *
1790  * Returns 0 on success, negative on failure
1791  **/
1792 static int igbvf_set_mac(struct net_device *netdev, void *p)
1793 {
1794         struct igbvf_adapter *adapter = netdev_priv(netdev);
1795         struct e1000_hw *hw = &adapter->hw;
1796         struct sockaddr *addr = p;
1797
1798         if (!is_valid_ether_addr(addr->sa_data))
1799                 return -EADDRNOTAVAIL;
1800
1801         memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
1802
1803         spin_lock_bh(&hw->mbx_lock);
1804
1805         hw->mac.ops.rar_set(hw, hw->mac.addr, 0);
1806
1807         spin_unlock_bh(&hw->mbx_lock);
1808
1809         if (!ether_addr_equal(addr->sa_data, hw->mac.addr))
1810                 return -EADDRNOTAVAIL;
1811
1812         memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
1813
1814         return 0;
1815 }
1816
1817 #define UPDATE_VF_COUNTER(reg, name) \
1818 { \
1819         u32 current_counter = er32(reg); \
1820         if (current_counter < adapter->stats.last_##name) \
1821                 adapter->stats.name += 0x100000000LL; \
1822         adapter->stats.last_##name = current_counter; \
1823         adapter->stats.name &= 0xFFFFFFFF00000000LL; \
1824         adapter->stats.name |= current_counter; \
1825 }
1826
1827 /**
1828  * igbvf_update_stats - Update the board statistics counters
1829  * @adapter: board private structure
1830 **/
1831 void igbvf_update_stats(struct igbvf_adapter *adapter)
1832 {
1833         struct e1000_hw *hw = &adapter->hw;
1834         struct pci_dev *pdev = adapter->pdev;
1835
1836         /* Prevent stats update while adapter is being reset, link is down
1837          * or if the pci connection is down.
1838          */
1839         if (adapter->link_speed == 0)
1840                 return;
1841
1842         if (test_bit(__IGBVF_RESETTING, &adapter->state))
1843                 return;
1844
1845         if (pci_channel_offline(pdev))
1846                 return;
1847
1848         UPDATE_VF_COUNTER(VFGPRC, gprc);
1849         UPDATE_VF_COUNTER(VFGORC, gorc);
1850         UPDATE_VF_COUNTER(VFGPTC, gptc);
1851         UPDATE_VF_COUNTER(VFGOTC, gotc);
1852         UPDATE_VF_COUNTER(VFMPRC, mprc);
1853         UPDATE_VF_COUNTER(VFGOTLBC, gotlbc);
1854         UPDATE_VF_COUNTER(VFGPTLBC, gptlbc);
1855         UPDATE_VF_COUNTER(VFGORLBC, gorlbc);
1856         UPDATE_VF_COUNTER(VFGPRLBC, gprlbc);
1857
1858         /* Fill out the OS statistics structure */
1859         adapter->netdev->stats.multicast = adapter->stats.mprc;
1860 }
1861
1862 static void igbvf_print_link_info(struct igbvf_adapter *adapter)
1863 {
1864         dev_info(&adapter->pdev->dev, "Link is Up %d Mbps %s Duplex\n",
1865                  adapter->link_speed,
1866                  adapter->link_duplex == FULL_DUPLEX ? "Full" : "Half");
1867 }
1868
1869 static bool igbvf_has_link(struct igbvf_adapter *adapter)
1870 {
1871         struct e1000_hw *hw = &adapter->hw;
1872         s32 ret_val = E1000_SUCCESS;
1873         bool link_active;
1874
1875         /* If interface is down, stay link down */
1876         if (test_bit(__IGBVF_DOWN, &adapter->state))
1877                 return false;
1878
1879         spin_lock_bh(&hw->mbx_lock);
1880
1881         ret_val = hw->mac.ops.check_for_link(hw);
1882
1883         spin_unlock_bh(&hw->mbx_lock);
1884
1885         link_active = !hw->mac.get_link_status;
1886
1887         /* if check for link returns error we will need to reset */
1888         if (ret_val && time_after(jiffies, adapter->last_reset + (10 * HZ)))
1889                 schedule_work(&adapter->reset_task);
1890
1891         return link_active;
1892 }
1893
1894 /**
1895  * igbvf_watchdog - Timer Call-back
1896  * @data: pointer to adapter cast into an unsigned long
1897  **/
1898 static void igbvf_watchdog(struct timer_list *t)
1899 {
1900         struct igbvf_adapter *adapter = from_timer(adapter, t, watchdog_timer);
1901
1902         /* Do the rest outside of interrupt context */
1903         schedule_work(&adapter->watchdog_task);
1904 }
1905
1906 static void igbvf_watchdog_task(struct work_struct *work)
1907 {
1908         struct igbvf_adapter *adapter = container_of(work,
1909                                                      struct igbvf_adapter,
1910                                                      watchdog_task);
1911         struct net_device *netdev = adapter->netdev;
1912         struct e1000_mac_info *mac = &adapter->hw.mac;
1913         struct igbvf_ring *tx_ring = adapter->tx_ring;
1914         struct e1000_hw *hw = &adapter->hw;
1915         u32 link;
1916         int tx_pending = 0;
1917
1918         link = igbvf_has_link(adapter);
1919
1920         if (link) {
1921                 if (!netif_carrier_ok(netdev)) {
1922                         mac->ops.get_link_up_info(&adapter->hw,
1923                                                   &adapter->link_speed,
1924                                                   &adapter->link_duplex);
1925                         igbvf_print_link_info(adapter);
1926
1927                         netif_carrier_on(netdev);
1928                         netif_wake_queue(netdev);
1929                 }
1930         } else {
1931                 if (netif_carrier_ok(netdev)) {
1932                         adapter->link_speed = 0;
1933                         adapter->link_duplex = 0;
1934                         dev_info(&adapter->pdev->dev, "Link is Down\n");
1935                         netif_carrier_off(netdev);
1936                         netif_stop_queue(netdev);
1937                 }
1938         }
1939
1940         if (netif_carrier_ok(netdev)) {
1941                 igbvf_update_stats(adapter);
1942         } else {
1943                 tx_pending = (igbvf_desc_unused(tx_ring) + 1 <
1944                               tx_ring->count);
1945                 if (tx_pending) {
1946                         /* We've lost link, so the controller stops DMA,
1947                          * but we've got queued Tx work that's never going
1948                          * to get done, so reset controller to flush Tx.
1949                          * (Do the reset outside of interrupt context).
1950                          */
1951                         adapter->tx_timeout_count++;
1952                         schedule_work(&adapter->reset_task);
1953                 }
1954         }
1955
1956         /* Cause software interrupt to ensure Rx ring is cleaned */
1957         ew32(EICS, adapter->rx_ring->eims_value);
1958
1959         /* Reset the timer */
1960         if (!test_bit(__IGBVF_DOWN, &adapter->state))
1961                 mod_timer(&adapter->watchdog_timer,
1962                           round_jiffies(jiffies + (2 * HZ)));
1963 }
1964
1965 #define IGBVF_TX_FLAGS_CSUM             0x00000001
1966 #define IGBVF_TX_FLAGS_VLAN             0x00000002
1967 #define IGBVF_TX_FLAGS_TSO              0x00000004
1968 #define IGBVF_TX_FLAGS_IPV4             0x00000008
1969 #define IGBVF_TX_FLAGS_VLAN_MASK        0xffff0000
1970 #define IGBVF_TX_FLAGS_VLAN_SHIFT       16
1971
1972 static void igbvf_tx_ctxtdesc(struct igbvf_ring *tx_ring, u32 vlan_macip_lens,
1973                               u32 type_tucmd, u32 mss_l4len_idx)
1974 {
1975         struct e1000_adv_tx_context_desc *context_desc;
1976         struct igbvf_buffer *buffer_info;
1977         u16 i = tx_ring->next_to_use;
1978
1979         context_desc = IGBVF_TX_CTXTDESC_ADV(*tx_ring, i);
1980         buffer_info = &tx_ring->buffer_info[i];
1981
1982         i++;
1983         tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
1984
1985         /* set bits to identify this as an advanced context descriptor */
1986         type_tucmd |= E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
1987
1988         context_desc->vlan_macip_lens   = cpu_to_le32(vlan_macip_lens);
1989         context_desc->seqnum_seed       = 0;
1990         context_desc->type_tucmd_mlhl   = cpu_to_le32(type_tucmd);
1991         context_desc->mss_l4len_idx     = cpu_to_le32(mss_l4len_idx);
1992
1993         buffer_info->time_stamp = jiffies;
1994         buffer_info->dma = 0;
1995 }
1996
1997 static int igbvf_tso(struct igbvf_ring *tx_ring,
1998                      struct sk_buff *skb, u32 tx_flags, u8 *hdr_len)
1999 {
2000         u32 vlan_macip_lens, type_tucmd, mss_l4len_idx;
2001         union {
2002                 struct iphdr *v4;
2003                 struct ipv6hdr *v6;
2004                 unsigned char *hdr;
2005         } ip;
2006         union {
2007                 struct tcphdr *tcp;
2008                 unsigned char *hdr;
2009         } l4;
2010         u32 paylen, l4_offset;
2011         int err;
2012
2013         if (skb->ip_summed != CHECKSUM_PARTIAL)
2014                 return 0;
2015
2016         if (!skb_is_gso(skb))
2017                 return 0;
2018
2019         err = skb_cow_head(skb, 0);
2020         if (err < 0)
2021                 return err;
2022
2023         ip.hdr = skb_network_header(skb);
2024         l4.hdr = skb_checksum_start(skb);
2025
2026         /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
2027         type_tucmd = E1000_ADVTXD_TUCMD_L4T_TCP;
2028
2029         /* initialize outer IP header fields */
2030         if (ip.v4->version == 4) {
2031                 unsigned char *csum_start = skb_checksum_start(skb);
2032                 unsigned char *trans_start = ip.hdr + (ip.v4->ihl * 4);
2033
2034                 /* IP header will have to cancel out any data that
2035                  * is not a part of the outer IP header
2036                  */
2037                 ip.v4->check = csum_fold(csum_partial(trans_start,
2038                                                       csum_start - trans_start,
2039                                                       0));
2040                 type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
2041
2042                 ip.v4->tot_len = 0;
2043         } else {
2044                 ip.v6->payload_len = 0;
2045         }
2046
2047         /* determine offset of inner transport header */
2048         l4_offset = l4.hdr - skb->data;
2049
2050         /* compute length of segmentation header */
2051         *hdr_len = (l4.tcp->doff * 4) + l4_offset;
2052
2053         /* remove payload length from inner checksum */
2054         paylen = skb->len - l4_offset;
2055         csum_replace_by_diff(&l4.tcp->check, htonl(paylen));
2056
2057         /* MSS L4LEN IDX */
2058         mss_l4len_idx = (*hdr_len - l4_offset) << E1000_ADVTXD_L4LEN_SHIFT;
2059         mss_l4len_idx |= skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT;
2060
2061         /* VLAN MACLEN IPLEN */
2062         vlan_macip_lens = l4.hdr - ip.hdr;
2063         vlan_macip_lens |= (ip.hdr - skb->data) << E1000_ADVTXD_MACLEN_SHIFT;
2064         vlan_macip_lens |= tx_flags & IGBVF_TX_FLAGS_VLAN_MASK;
2065
2066         igbvf_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
2067
2068         return 1;
2069 }
2070
2071 static inline bool igbvf_ipv6_csum_is_sctp(struct sk_buff *skb)
2072 {
2073         unsigned int offset = 0;
2074
2075         ipv6_find_hdr(skb, &offset, IPPROTO_SCTP, NULL, NULL);
2076
2077         return offset == skb_checksum_start_offset(skb);
2078 }
2079
2080 static bool igbvf_tx_csum(struct igbvf_ring *tx_ring, struct sk_buff *skb,
2081                           u32 tx_flags, __be16 protocol)
2082 {
2083         u32 vlan_macip_lens = 0;
2084         u32 type_tucmd = 0;
2085
2086         if (skb->ip_summed != CHECKSUM_PARTIAL) {
2087 csum_failed:
2088                 if (!(tx_flags & IGBVF_TX_FLAGS_VLAN))
2089                         return false;
2090                 goto no_csum;
2091         }
2092
2093         switch (skb->csum_offset) {
2094         case offsetof(struct tcphdr, check):
2095                 type_tucmd = E1000_ADVTXD_TUCMD_L4T_TCP;
2096                 /* fall through */
2097         case offsetof(struct udphdr, check):
2098                 break;
2099         case offsetof(struct sctphdr, checksum):
2100                 /* validate that this is actually an SCTP request */
2101                 if (((protocol == htons(ETH_P_IP)) &&
2102                      (ip_hdr(skb)->protocol == IPPROTO_SCTP)) ||
2103                     ((protocol == htons(ETH_P_IPV6)) &&
2104                      igbvf_ipv6_csum_is_sctp(skb))) {
2105                         type_tucmd = E1000_ADVTXD_TUCMD_L4T_SCTP;
2106                         break;
2107                 }
2108                 /* fall through */
2109         default:
2110                 skb_checksum_help(skb);
2111                 goto csum_failed;
2112         }
2113
2114         vlan_macip_lens = skb_checksum_start_offset(skb) -
2115                           skb_network_offset(skb);
2116 no_csum:
2117         vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
2118         vlan_macip_lens |= tx_flags & IGBVF_TX_FLAGS_VLAN_MASK;
2119
2120         igbvf_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, 0);
2121         return true;
2122 }
2123
2124 static int igbvf_maybe_stop_tx(struct net_device *netdev, int size)
2125 {
2126         struct igbvf_adapter *adapter = netdev_priv(netdev);
2127
2128         /* there is enough descriptors then we don't need to worry  */
2129         if (igbvf_desc_unused(adapter->tx_ring) >= size)
2130                 return 0;
2131
2132         netif_stop_queue(netdev);
2133
2134         /* Herbert's original patch had:
2135          *  smp_mb__after_netif_stop_queue();
2136          * but since that doesn't exist yet, just open code it.
2137          */
2138         smp_mb();
2139
2140         /* We need to check again just in case room has been made available */
2141         if (igbvf_desc_unused(adapter->tx_ring) < size)
2142                 return -EBUSY;
2143
2144         netif_wake_queue(netdev);
2145
2146         ++adapter->restart_queue;
2147         return 0;
2148 }
2149
2150 #define IGBVF_MAX_TXD_PWR       16
2151 #define IGBVF_MAX_DATA_PER_TXD  (1u << IGBVF_MAX_TXD_PWR)
2152
2153 static inline int igbvf_tx_map_adv(struct igbvf_adapter *adapter,
2154                                    struct igbvf_ring *tx_ring,
2155                                    struct sk_buff *skb)
2156 {
2157         struct igbvf_buffer *buffer_info;
2158         struct pci_dev *pdev = adapter->pdev;
2159         unsigned int len = skb_headlen(skb);
2160         unsigned int count = 0, i;
2161         unsigned int f;
2162
2163         i = tx_ring->next_to_use;
2164
2165         buffer_info = &tx_ring->buffer_info[i];
2166         BUG_ON(len >= IGBVF_MAX_DATA_PER_TXD);
2167         buffer_info->length = len;
2168         /* set time_stamp *before* dma to help avoid a possible race */
2169         buffer_info->time_stamp = jiffies;
2170         buffer_info->mapped_as_page = false;
2171         buffer_info->dma = dma_map_single(&pdev->dev, skb->data, len,
2172                                           DMA_TO_DEVICE);
2173         if (dma_mapping_error(&pdev->dev, buffer_info->dma))
2174                 goto dma_error;
2175
2176         for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
2177                 const struct skb_frag_struct *frag;
2178
2179                 count++;
2180                 i++;
2181                 if (i == tx_ring->count)
2182                         i = 0;
2183
2184                 frag = &skb_shinfo(skb)->frags[f];
2185                 len = skb_frag_size(frag);
2186
2187                 buffer_info = &tx_ring->buffer_info[i];
2188                 BUG_ON(len >= IGBVF_MAX_DATA_PER_TXD);
2189                 buffer_info->length = len;
2190                 buffer_info->time_stamp = jiffies;
2191                 buffer_info->mapped_as_page = true;
2192                 buffer_info->dma = skb_frag_dma_map(&pdev->dev, frag, 0, len,
2193                                                     DMA_TO_DEVICE);
2194                 if (dma_mapping_error(&pdev->dev, buffer_info->dma))
2195                         goto dma_error;
2196         }
2197
2198         tx_ring->buffer_info[i].skb = skb;
2199
2200         return ++count;
2201
2202 dma_error:
2203         dev_err(&pdev->dev, "TX DMA map failed\n");
2204
2205         /* clear timestamp and dma mappings for failed buffer_info mapping */
2206         buffer_info->dma = 0;
2207         buffer_info->time_stamp = 0;
2208         buffer_info->length = 0;
2209         buffer_info->mapped_as_page = false;
2210         if (count)
2211                 count--;
2212
2213         /* clear timestamp and dma mappings for remaining portion of packet */
2214         while (count--) {
2215                 if (i == 0)
2216                         i += tx_ring->count;
2217                 i--;
2218                 buffer_info = &tx_ring->buffer_info[i];
2219                 igbvf_put_txbuf(adapter, buffer_info);
2220         }
2221
2222         return 0;
2223 }
2224
2225 static inline void igbvf_tx_queue_adv(struct igbvf_adapter *adapter,
2226                                       struct igbvf_ring *tx_ring,
2227                                       int tx_flags, int count,
2228                                       unsigned int first, u32 paylen,
2229                                       u8 hdr_len)
2230 {
2231         union e1000_adv_tx_desc *tx_desc = NULL;
2232         struct igbvf_buffer *buffer_info;
2233         u32 olinfo_status = 0, cmd_type_len;
2234         unsigned int i;
2235
2236         cmd_type_len = (E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS |
2237                         E1000_ADVTXD_DCMD_DEXT);
2238
2239         if (tx_flags & IGBVF_TX_FLAGS_VLAN)
2240                 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
2241
2242         if (tx_flags & IGBVF_TX_FLAGS_TSO) {
2243                 cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
2244
2245                 /* insert tcp checksum */
2246                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
2247
2248                 /* insert ip checksum */
2249                 if (tx_flags & IGBVF_TX_FLAGS_IPV4)
2250                         olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
2251
2252         } else if (tx_flags & IGBVF_TX_FLAGS_CSUM) {
2253                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
2254         }
2255
2256         olinfo_status |= ((paylen - hdr_len) << E1000_ADVTXD_PAYLEN_SHIFT);
2257
2258         i = tx_ring->next_to_use;
2259         while (count--) {
2260                 buffer_info = &tx_ring->buffer_info[i];
2261                 tx_desc = IGBVF_TX_DESC_ADV(*tx_ring, i);
2262                 tx_desc->read.buffer_addr = cpu_to_le64(buffer_info->dma);
2263                 tx_desc->read.cmd_type_len =
2264                          cpu_to_le32(cmd_type_len | buffer_info->length);
2265                 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
2266                 i++;
2267                 if (i == tx_ring->count)
2268                         i = 0;
2269         }
2270
2271         tx_desc->read.cmd_type_len |= cpu_to_le32(adapter->txd_cmd);
2272         /* Force memory writes to complete before letting h/w
2273          * know there are new descriptors to fetch.  (Only
2274          * applicable for weak-ordered memory model archs,
2275          * such as IA-64).
2276          */
2277         wmb();
2278
2279         tx_ring->buffer_info[first].next_to_watch = tx_desc;
2280         tx_ring->next_to_use = i;
2281         writel(i, adapter->hw.hw_addr + tx_ring->tail);
2282         /* we need this if more than one processor can write to our tail
2283          * at a time, it synchronizes IO on IA64/Altix systems
2284          */
2285         mmiowb();
2286 }
2287
2288 static netdev_tx_t igbvf_xmit_frame_ring_adv(struct sk_buff *skb,
2289                                              struct net_device *netdev,
2290                                              struct igbvf_ring *tx_ring)
2291 {
2292         struct igbvf_adapter *adapter = netdev_priv(netdev);
2293         unsigned int first, tx_flags = 0;
2294         u8 hdr_len = 0;
2295         int count = 0;
2296         int tso = 0;
2297         __be16 protocol = vlan_get_protocol(skb);
2298
2299         if (test_bit(__IGBVF_DOWN, &adapter->state)) {
2300                 dev_kfree_skb_any(skb);
2301                 return NETDEV_TX_OK;
2302         }
2303
2304         if (skb->len <= 0) {
2305                 dev_kfree_skb_any(skb);
2306                 return NETDEV_TX_OK;
2307         }
2308
2309         /* need: count + 4 desc gap to keep tail from touching
2310          *       + 2 desc gap to keep tail from touching head,
2311          *       + 1 desc for skb->data,
2312          *       + 1 desc for context descriptor,
2313          * head, otherwise try next time
2314          */
2315         if (igbvf_maybe_stop_tx(netdev, skb_shinfo(skb)->nr_frags + 4)) {
2316                 /* this is a hard error */
2317                 return NETDEV_TX_BUSY;
2318         }
2319
2320         if (skb_vlan_tag_present(skb)) {
2321                 tx_flags |= IGBVF_TX_FLAGS_VLAN;
2322                 tx_flags |= (skb_vlan_tag_get(skb) <<
2323                              IGBVF_TX_FLAGS_VLAN_SHIFT);
2324         }
2325
2326         if (protocol == htons(ETH_P_IP))
2327                 tx_flags |= IGBVF_TX_FLAGS_IPV4;
2328
2329         first = tx_ring->next_to_use;
2330
2331         tso = igbvf_tso(tx_ring, skb, tx_flags, &hdr_len);
2332         if (unlikely(tso < 0)) {
2333                 dev_kfree_skb_any(skb);
2334                 return NETDEV_TX_OK;
2335         }
2336
2337         if (tso)
2338                 tx_flags |= IGBVF_TX_FLAGS_TSO;
2339         else if (igbvf_tx_csum(tx_ring, skb, tx_flags, protocol) &&
2340                  (skb->ip_summed == CHECKSUM_PARTIAL))
2341                 tx_flags |= IGBVF_TX_FLAGS_CSUM;
2342
2343         /* count reflects descriptors mapped, if 0 then mapping error
2344          * has occurred and we need to rewind the descriptor queue
2345          */
2346         count = igbvf_tx_map_adv(adapter, tx_ring, skb);
2347
2348         if (count) {
2349                 igbvf_tx_queue_adv(adapter, tx_ring, tx_flags, count,
2350                                    first, skb->len, hdr_len);
2351                 /* Make sure there is space in the ring for the next send. */
2352                 igbvf_maybe_stop_tx(netdev, MAX_SKB_FRAGS + 4);
2353         } else {
2354                 dev_kfree_skb_any(skb);
2355                 tx_ring->buffer_info[first].time_stamp = 0;
2356                 tx_ring->next_to_use = first;
2357         }
2358
2359         return NETDEV_TX_OK;
2360 }
2361
2362 static netdev_tx_t igbvf_xmit_frame(struct sk_buff *skb,
2363                                     struct net_device *netdev)
2364 {
2365         struct igbvf_adapter *adapter = netdev_priv(netdev);
2366         struct igbvf_ring *tx_ring;
2367
2368         if (test_bit(__IGBVF_DOWN, &adapter->state)) {
2369                 dev_kfree_skb_any(skb);
2370                 return NETDEV_TX_OK;
2371         }
2372
2373         tx_ring = &adapter->tx_ring[0];
2374
2375         return igbvf_xmit_frame_ring_adv(skb, netdev, tx_ring);
2376 }
2377
2378 /**
2379  * igbvf_tx_timeout - Respond to a Tx Hang
2380  * @netdev: network interface device structure
2381  **/
2382 static void igbvf_tx_timeout(struct net_device *netdev)
2383 {
2384         struct igbvf_adapter *adapter = netdev_priv(netdev);
2385
2386         /* Do the reset outside of interrupt context */
2387         adapter->tx_timeout_count++;
2388         schedule_work(&adapter->reset_task);
2389 }
2390
2391 static void igbvf_reset_task(struct work_struct *work)
2392 {
2393         struct igbvf_adapter *adapter;
2394
2395         adapter = container_of(work, struct igbvf_adapter, reset_task);
2396
2397         igbvf_reinit_locked(adapter);
2398 }
2399
2400 /**
2401  * igbvf_change_mtu - Change the Maximum Transfer Unit
2402  * @netdev: network interface device structure
2403  * @new_mtu: new value for maximum frame size
2404  *
2405  * Returns 0 on success, negative on failure
2406  **/
2407 static int igbvf_change_mtu(struct net_device *netdev, int new_mtu)
2408 {
2409         struct igbvf_adapter *adapter = netdev_priv(netdev);
2410         int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN;
2411
2412         while (test_and_set_bit(__IGBVF_RESETTING, &adapter->state))
2413                 usleep_range(1000, 2000);
2414         /* igbvf_down has a dependency on max_frame_size */
2415         adapter->max_frame_size = max_frame;
2416         if (netif_running(netdev))
2417                 igbvf_down(adapter);
2418
2419         /* NOTE: netdev_alloc_skb reserves 16 bytes, and typically NET_IP_ALIGN
2420          * means we reserve 2 more, this pushes us to allocate from the next
2421          * larger slab size.
2422          * i.e. RXBUFFER_2048 --> size-4096 slab
2423          * However with the new *_jumbo_rx* routines, jumbo receives will use
2424          * fragmented skbs
2425          */
2426
2427         if (max_frame <= 1024)
2428                 adapter->rx_buffer_len = 1024;
2429         else if (max_frame <= 2048)
2430                 adapter->rx_buffer_len = 2048;
2431         else
2432 #if (PAGE_SIZE / 2) > 16384
2433                 adapter->rx_buffer_len = 16384;
2434 #else
2435                 adapter->rx_buffer_len = PAGE_SIZE / 2;
2436 #endif
2437
2438         /* adjust allocation if LPE protects us, and we aren't using SBP */
2439         if ((max_frame == ETH_FRAME_LEN + ETH_FCS_LEN) ||
2440             (max_frame == ETH_FRAME_LEN + VLAN_HLEN + ETH_FCS_LEN))
2441                 adapter->rx_buffer_len = ETH_FRAME_LEN + VLAN_HLEN +
2442                                          ETH_FCS_LEN;
2443
2444         dev_info(&adapter->pdev->dev, "changing MTU from %d to %d\n",
2445                  netdev->mtu, new_mtu);
2446         netdev->mtu = new_mtu;
2447
2448         if (netif_running(netdev))
2449                 igbvf_up(adapter);
2450         else
2451                 igbvf_reset(adapter);
2452
2453         clear_bit(__IGBVF_RESETTING, &adapter->state);
2454
2455         return 0;
2456 }
2457
2458 static int igbvf_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
2459 {
2460         switch (cmd) {
2461         default:
2462                 return -EOPNOTSUPP;
2463         }
2464 }
2465
2466 static int igbvf_suspend(struct pci_dev *pdev, pm_message_t state)
2467 {
2468         struct net_device *netdev = pci_get_drvdata(pdev);
2469         struct igbvf_adapter *adapter = netdev_priv(netdev);
2470 #ifdef CONFIG_PM
2471         int retval = 0;
2472 #endif
2473
2474         netif_device_detach(netdev);
2475
2476         if (netif_running(netdev)) {
2477                 WARN_ON(test_bit(__IGBVF_RESETTING, &adapter->state));
2478                 igbvf_down(adapter);
2479                 igbvf_free_irq(adapter);
2480         }
2481
2482 #ifdef CONFIG_PM
2483         retval = pci_save_state(pdev);
2484         if (retval)
2485                 return retval;
2486 #endif
2487
2488         pci_disable_device(pdev);
2489
2490         return 0;
2491 }
2492
2493 #ifdef CONFIG_PM
2494 static int igbvf_resume(struct pci_dev *pdev)
2495 {
2496         struct net_device *netdev = pci_get_drvdata(pdev);
2497         struct igbvf_adapter *adapter = netdev_priv(netdev);
2498         u32 err;
2499
2500         pci_restore_state(pdev);
2501         err = pci_enable_device_mem(pdev);
2502         if (err) {
2503                 dev_err(&pdev->dev, "Cannot enable PCI device from suspend\n");
2504                 return err;
2505         }
2506
2507         pci_set_master(pdev);
2508
2509         if (netif_running(netdev)) {
2510                 err = igbvf_request_irq(adapter);
2511                 if (err)
2512                         return err;
2513         }
2514
2515         igbvf_reset(adapter);
2516
2517         if (netif_running(netdev))
2518                 igbvf_up(adapter);
2519
2520         netif_device_attach(netdev);
2521
2522         return 0;
2523 }
2524 #endif
2525
2526 static void igbvf_shutdown(struct pci_dev *pdev)
2527 {
2528         igbvf_suspend(pdev, PMSG_SUSPEND);
2529 }
2530
2531 #ifdef CONFIG_NET_POLL_CONTROLLER
2532 /* Polling 'interrupt' - used by things like netconsole to send skbs
2533  * without having to re-enable interrupts. It's not called while
2534  * the interrupt routine is executing.
2535  */
2536 static void igbvf_netpoll(struct net_device *netdev)
2537 {
2538         struct igbvf_adapter *adapter = netdev_priv(netdev);
2539
2540         disable_irq(adapter->pdev->irq);
2541
2542         igbvf_clean_tx_irq(adapter->tx_ring);
2543
2544         enable_irq(adapter->pdev->irq);
2545 }
2546 #endif
2547
2548 /**
2549  * igbvf_io_error_detected - called when PCI error is detected
2550  * @pdev: Pointer to PCI device
2551  * @state: The current pci connection state
2552  *
2553  * This function is called after a PCI bus error affecting
2554  * this device has been detected.
2555  */
2556 static pci_ers_result_t igbvf_io_error_detected(struct pci_dev *pdev,
2557                                                 pci_channel_state_t state)
2558 {
2559         struct net_device *netdev = pci_get_drvdata(pdev);
2560         struct igbvf_adapter *adapter = netdev_priv(netdev);
2561
2562         netif_device_detach(netdev);
2563
2564         if (state == pci_channel_io_perm_failure)
2565                 return PCI_ERS_RESULT_DISCONNECT;
2566
2567         if (netif_running(netdev))
2568                 igbvf_down(adapter);
2569         pci_disable_device(pdev);
2570
2571         /* Request a slot slot reset. */
2572         return PCI_ERS_RESULT_NEED_RESET;
2573 }
2574
2575 /**
2576  * igbvf_io_slot_reset - called after the pci bus has been reset.
2577  * @pdev: Pointer to PCI device
2578  *
2579  * Restart the card from scratch, as if from a cold-boot. Implementation
2580  * resembles the first-half of the igbvf_resume routine.
2581  */
2582 static pci_ers_result_t igbvf_io_slot_reset(struct pci_dev *pdev)
2583 {
2584         struct net_device *netdev = pci_get_drvdata(pdev);
2585         struct igbvf_adapter *adapter = netdev_priv(netdev);
2586
2587         if (pci_enable_device_mem(pdev)) {
2588                 dev_err(&pdev->dev,
2589                         "Cannot re-enable PCI device after reset.\n");
2590                 return PCI_ERS_RESULT_DISCONNECT;
2591         }
2592         pci_set_master(pdev);
2593
2594         igbvf_reset(adapter);
2595
2596         return PCI_ERS_RESULT_RECOVERED;
2597 }
2598
2599 /**
2600  * igbvf_io_resume - called when traffic can start flowing again.
2601  * @pdev: Pointer to PCI device
2602  *
2603  * This callback is called when the error recovery driver tells us that
2604  * its OK to resume normal operation. Implementation resembles the
2605  * second-half of the igbvf_resume routine.
2606  */
2607 static void igbvf_io_resume(struct pci_dev *pdev)
2608 {
2609         struct net_device *netdev = pci_get_drvdata(pdev);
2610         struct igbvf_adapter *adapter = netdev_priv(netdev);
2611
2612         if (netif_running(netdev)) {
2613                 if (igbvf_up(adapter)) {
2614                         dev_err(&pdev->dev,
2615                                 "can't bring device back up after reset\n");
2616                         return;
2617                 }
2618         }
2619
2620         netif_device_attach(netdev);
2621 }
2622
2623 static void igbvf_print_device_info(struct igbvf_adapter *adapter)
2624 {
2625         struct e1000_hw *hw = &adapter->hw;
2626         struct net_device *netdev = adapter->netdev;
2627         struct pci_dev *pdev = adapter->pdev;
2628
2629         if (hw->mac.type == e1000_vfadapt_i350)
2630                 dev_info(&pdev->dev, "Intel(R) I350 Virtual Function\n");
2631         else
2632                 dev_info(&pdev->dev, "Intel(R) 82576 Virtual Function\n");
2633         dev_info(&pdev->dev, "Address: %pM\n", netdev->dev_addr);
2634 }
2635
2636 static int igbvf_set_features(struct net_device *netdev,
2637                               netdev_features_t features)
2638 {
2639         struct igbvf_adapter *adapter = netdev_priv(netdev);
2640
2641         if (features & NETIF_F_RXCSUM)
2642                 adapter->flags &= ~IGBVF_FLAG_RX_CSUM_DISABLED;
2643         else
2644                 adapter->flags |= IGBVF_FLAG_RX_CSUM_DISABLED;
2645
2646         return 0;
2647 }
2648
2649 #define IGBVF_MAX_MAC_HDR_LEN           127
2650 #define IGBVF_MAX_NETWORK_HDR_LEN       511
2651
2652 static netdev_features_t
2653 igbvf_features_check(struct sk_buff *skb, struct net_device *dev,
2654                      netdev_features_t features)
2655 {
2656         unsigned int network_hdr_len, mac_hdr_len;
2657
2658         /* Make certain the headers can be described by a context descriptor */
2659         mac_hdr_len = skb_network_header(skb) - skb->data;
2660         if (unlikely(mac_hdr_len > IGBVF_MAX_MAC_HDR_LEN))
2661                 return features & ~(NETIF_F_HW_CSUM |
2662                                     NETIF_F_SCTP_CRC |
2663                                     NETIF_F_HW_VLAN_CTAG_TX |
2664                                     NETIF_F_TSO |
2665                                     NETIF_F_TSO6);
2666
2667         network_hdr_len = skb_checksum_start(skb) - skb_network_header(skb);
2668         if (unlikely(network_hdr_len >  IGBVF_MAX_NETWORK_HDR_LEN))
2669                 return features & ~(NETIF_F_HW_CSUM |
2670                                     NETIF_F_SCTP_CRC |
2671                                     NETIF_F_TSO |
2672                                     NETIF_F_TSO6);
2673
2674         /* We can only support IPV4 TSO in tunnels if we can mangle the
2675          * inner IP ID field, so strip TSO if MANGLEID is not supported.
2676          */
2677         if (skb->encapsulation && !(features & NETIF_F_TSO_MANGLEID))
2678                 features &= ~NETIF_F_TSO;
2679
2680         return features;
2681 }
2682
2683 static const struct net_device_ops igbvf_netdev_ops = {
2684         .ndo_open               = igbvf_open,
2685         .ndo_stop               = igbvf_close,
2686         .ndo_start_xmit         = igbvf_xmit_frame,
2687         .ndo_set_rx_mode        = igbvf_set_rx_mode,
2688         .ndo_set_mac_address    = igbvf_set_mac,
2689         .ndo_change_mtu         = igbvf_change_mtu,
2690         .ndo_do_ioctl           = igbvf_ioctl,
2691         .ndo_tx_timeout         = igbvf_tx_timeout,
2692         .ndo_vlan_rx_add_vid    = igbvf_vlan_rx_add_vid,
2693         .ndo_vlan_rx_kill_vid   = igbvf_vlan_rx_kill_vid,
2694 #ifdef CONFIG_NET_POLL_CONTROLLER
2695         .ndo_poll_controller    = igbvf_netpoll,
2696 #endif
2697         .ndo_set_features       = igbvf_set_features,
2698         .ndo_features_check     = igbvf_features_check,
2699 };
2700
2701 /**
2702  * igbvf_probe - Device Initialization Routine
2703  * @pdev: PCI device information struct
2704  * @ent: entry in igbvf_pci_tbl
2705  *
2706  * Returns 0 on success, negative on failure
2707  *
2708  * igbvf_probe initializes an adapter identified by a pci_dev structure.
2709  * The OS initialization, configuring of the adapter private structure,
2710  * and a hardware reset occur.
2711  **/
2712 static int igbvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
2713 {
2714         struct net_device *netdev;
2715         struct igbvf_adapter *adapter;
2716         struct e1000_hw *hw;
2717         const struct igbvf_info *ei = igbvf_info_tbl[ent->driver_data];
2718
2719         static int cards_found;
2720         int err, pci_using_dac;
2721
2722         err = pci_enable_device_mem(pdev);
2723         if (err)
2724                 return err;
2725
2726         pci_using_dac = 0;
2727         err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
2728         if (!err) {
2729                 pci_using_dac = 1;
2730         } else {
2731                 err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
2732                 if (err) {
2733                         dev_err(&pdev->dev,
2734                                 "No usable DMA configuration, aborting\n");
2735                         goto err_dma;
2736                 }
2737         }
2738
2739         err = pci_request_regions(pdev, igbvf_driver_name);
2740         if (err)
2741                 goto err_pci_reg;
2742
2743         pci_set_master(pdev);
2744
2745         err = -ENOMEM;
2746         netdev = alloc_etherdev(sizeof(struct igbvf_adapter));
2747         if (!netdev)
2748                 goto err_alloc_etherdev;
2749
2750         SET_NETDEV_DEV(netdev, &pdev->dev);
2751
2752         pci_set_drvdata(pdev, netdev);
2753         adapter = netdev_priv(netdev);
2754         hw = &adapter->hw;
2755         adapter->netdev = netdev;
2756         adapter->pdev = pdev;
2757         adapter->ei = ei;
2758         adapter->pba = ei->pba;
2759         adapter->flags = ei->flags;
2760         adapter->hw.back = adapter;
2761         adapter->hw.mac.type = ei->mac;
2762         adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE);
2763
2764         /* PCI config space info */
2765
2766         hw->vendor_id = pdev->vendor;
2767         hw->device_id = pdev->device;
2768         hw->subsystem_vendor_id = pdev->subsystem_vendor;
2769         hw->subsystem_device_id = pdev->subsystem_device;
2770         hw->revision_id = pdev->revision;
2771
2772         err = -EIO;
2773         adapter->hw.hw_addr = ioremap(pci_resource_start(pdev, 0),
2774                                       pci_resource_len(pdev, 0));
2775
2776         if (!adapter->hw.hw_addr)
2777                 goto err_ioremap;
2778
2779         if (ei->get_variants) {
2780                 err = ei->get_variants(adapter);
2781                 if (err)
2782                         goto err_get_variants;
2783         }
2784
2785         /* setup adapter struct */
2786         err = igbvf_sw_init(adapter);
2787         if (err)
2788                 goto err_sw_init;
2789
2790         /* construct the net_device struct */
2791         netdev->netdev_ops = &igbvf_netdev_ops;
2792
2793         igbvf_set_ethtool_ops(netdev);
2794         netdev->watchdog_timeo = 5 * HZ;
2795         strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
2796
2797         adapter->bd_number = cards_found++;
2798
2799         netdev->hw_features = NETIF_F_SG |
2800                               NETIF_F_TSO |
2801                               NETIF_F_TSO6 |
2802                               NETIF_F_RXCSUM |
2803                               NETIF_F_HW_CSUM |
2804                               NETIF_F_SCTP_CRC;
2805
2806 #define IGBVF_GSO_PARTIAL_FEATURES (NETIF_F_GSO_GRE | \
2807                                     NETIF_F_GSO_GRE_CSUM | \
2808                                     NETIF_F_GSO_IPXIP4 | \
2809                                     NETIF_F_GSO_IPXIP6 | \
2810                                     NETIF_F_GSO_UDP_TUNNEL | \
2811                                     NETIF_F_GSO_UDP_TUNNEL_CSUM)
2812
2813         netdev->gso_partial_features = IGBVF_GSO_PARTIAL_FEATURES;
2814         netdev->hw_features |= NETIF_F_GSO_PARTIAL |
2815                                IGBVF_GSO_PARTIAL_FEATURES;
2816
2817         netdev->features = netdev->hw_features;
2818
2819         if (pci_using_dac)
2820                 netdev->features |= NETIF_F_HIGHDMA;
2821
2822         netdev->vlan_features |= netdev->features | NETIF_F_TSO_MANGLEID;
2823         netdev->mpls_features |= NETIF_F_HW_CSUM;
2824         netdev->hw_enc_features |= netdev->vlan_features;
2825
2826         /* set this bit last since it cannot be part of vlan_features */
2827         netdev->features |= NETIF_F_HW_VLAN_CTAG_FILTER |
2828                             NETIF_F_HW_VLAN_CTAG_RX |
2829                             NETIF_F_HW_VLAN_CTAG_TX;
2830
2831         /* MTU range: 68 - 9216 */
2832         netdev->min_mtu = ETH_MIN_MTU;
2833         netdev->max_mtu = MAX_STD_JUMBO_FRAME_SIZE;
2834
2835         spin_lock_bh(&hw->mbx_lock);
2836
2837         /*reset the controller to put the device in a known good state */
2838         err = hw->mac.ops.reset_hw(hw);
2839         if (err) {
2840                 dev_info(&pdev->dev,
2841                          "PF still in reset state. Is the PF interface up?\n");
2842         } else {
2843                 err = hw->mac.ops.read_mac_addr(hw);
2844                 if (err)
2845                         dev_info(&pdev->dev, "Error reading MAC address.\n");
2846                 else if (is_zero_ether_addr(adapter->hw.mac.addr))
2847                         dev_info(&pdev->dev,
2848                                  "MAC address not assigned by administrator.\n");
2849                 memcpy(netdev->dev_addr, adapter->hw.mac.addr,
2850                        netdev->addr_len);
2851         }
2852
2853         spin_unlock_bh(&hw->mbx_lock);
2854
2855         if (!is_valid_ether_addr(netdev->dev_addr)) {
2856                 dev_info(&pdev->dev, "Assigning random MAC address.\n");
2857                 eth_hw_addr_random(netdev);
2858                 memcpy(adapter->hw.mac.addr, netdev->dev_addr,
2859                        netdev->addr_len);
2860         }
2861
2862         timer_setup(&adapter->watchdog_timer, igbvf_watchdog, 0);
2863
2864         INIT_WORK(&adapter->reset_task, igbvf_reset_task);
2865         INIT_WORK(&adapter->watchdog_task, igbvf_watchdog_task);
2866
2867         /* ring size defaults */
2868         adapter->rx_ring->count = 1024;
2869         adapter->tx_ring->count = 1024;
2870
2871         /* reset the hardware with the new settings */
2872         igbvf_reset(adapter);
2873
2874         /* set hardware-specific flags */
2875         if (adapter->hw.mac.type == e1000_vfadapt_i350)
2876                 adapter->flags |= IGBVF_FLAG_RX_LB_VLAN_BSWAP;
2877
2878         strcpy(netdev->name, "eth%d");
2879         err = register_netdev(netdev);
2880         if (err)
2881                 goto err_hw_init;
2882
2883         /* tell the stack to leave us alone until igbvf_open() is called */
2884         netif_carrier_off(netdev);
2885         netif_stop_queue(netdev);
2886
2887         igbvf_print_device_info(adapter);
2888
2889         igbvf_initialize_last_counter_stats(adapter);
2890
2891         return 0;
2892
2893 err_hw_init:
2894         kfree(adapter->tx_ring);
2895         kfree(adapter->rx_ring);
2896 err_sw_init:
2897         igbvf_reset_interrupt_capability(adapter);
2898 err_get_variants:
2899         iounmap(adapter->hw.hw_addr);
2900 err_ioremap:
2901         free_netdev(netdev);
2902 err_alloc_etherdev:
2903         pci_release_regions(pdev);
2904 err_pci_reg:
2905 err_dma:
2906         pci_disable_device(pdev);
2907         return err;
2908 }
2909
2910 /**
2911  * igbvf_remove - Device Removal Routine
2912  * @pdev: PCI device information struct
2913  *
2914  * igbvf_remove is called by the PCI subsystem to alert the driver
2915  * that it should release a PCI device.  The could be caused by a
2916  * Hot-Plug event, or because the driver is going to be removed from
2917  * memory.
2918  **/
2919 static void igbvf_remove(struct pci_dev *pdev)
2920 {
2921         struct net_device *netdev = pci_get_drvdata(pdev);
2922         struct igbvf_adapter *adapter = netdev_priv(netdev);
2923         struct e1000_hw *hw = &adapter->hw;
2924
2925         /* The watchdog timer may be rescheduled, so explicitly
2926          * disable it from being rescheduled.
2927          */
2928         set_bit(__IGBVF_DOWN, &adapter->state);
2929         del_timer_sync(&adapter->watchdog_timer);
2930
2931         cancel_work_sync(&adapter->reset_task);
2932         cancel_work_sync(&adapter->watchdog_task);
2933
2934         unregister_netdev(netdev);
2935
2936         igbvf_reset_interrupt_capability(adapter);
2937
2938         /* it is important to delete the NAPI struct prior to freeing the
2939          * Rx ring so that you do not end up with null pointer refs
2940          */
2941         netif_napi_del(&adapter->rx_ring->napi);
2942         kfree(adapter->tx_ring);
2943         kfree(adapter->rx_ring);
2944
2945         iounmap(hw->hw_addr);
2946         if (hw->flash_address)
2947                 iounmap(hw->flash_address);
2948         pci_release_regions(pdev);
2949
2950         free_netdev(netdev);
2951
2952         pci_disable_device(pdev);
2953 }
2954
2955 /* PCI Error Recovery (ERS) */
2956 static const struct pci_error_handlers igbvf_err_handler = {
2957         .error_detected = igbvf_io_error_detected,
2958         .slot_reset = igbvf_io_slot_reset,
2959         .resume = igbvf_io_resume,
2960 };
2961
2962 static const struct pci_device_id igbvf_pci_tbl[] = {
2963         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_VF), board_vf },
2964         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_VF), board_i350_vf },
2965         { } /* terminate list */
2966 };
2967 MODULE_DEVICE_TABLE(pci, igbvf_pci_tbl);
2968
2969 /* PCI Device API Driver */
2970 static struct pci_driver igbvf_driver = {
2971         .name           = igbvf_driver_name,
2972         .id_table       = igbvf_pci_tbl,
2973         .probe          = igbvf_probe,
2974         .remove         = igbvf_remove,
2975 #ifdef CONFIG_PM
2976         /* Power Management Hooks */
2977         .suspend        = igbvf_suspend,
2978         .resume         = igbvf_resume,
2979 #endif
2980         .shutdown       = igbvf_shutdown,
2981         .err_handler    = &igbvf_err_handler
2982 };
2983
2984 /**
2985  * igbvf_init_module - Driver Registration Routine
2986  *
2987  * igbvf_init_module is the first routine called when the driver is
2988  * loaded. All it does is register with the PCI subsystem.
2989  **/
2990 static int __init igbvf_init_module(void)
2991 {
2992         int ret;
2993
2994         pr_info("%s - version %s\n", igbvf_driver_string, igbvf_driver_version);
2995         pr_info("%s\n", igbvf_copyright);
2996
2997         ret = pci_register_driver(&igbvf_driver);
2998
2999         return ret;
3000 }
3001 module_init(igbvf_init_module);
3002
3003 /**
3004  * igbvf_exit_module - Driver Exit Cleanup Routine
3005  *
3006  * igbvf_exit_module is called just before the driver is removed
3007  * from memory.
3008  **/
3009 static void __exit igbvf_exit_module(void)
3010 {
3011         pci_unregister_driver(&igbvf_driver);
3012 }
3013 module_exit(igbvf_exit_module);
3014
3015 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
3016 MODULE_DESCRIPTION("Intel(R) Gigabit Virtual Function Network Driver");
3017 MODULE_LICENSE("GPL v2");
3018 MODULE_VERSION(DRV_VERSION);
3019
3020 /* netdev.c */