]> asedeno.scripts.mit.edu Git - linux.git/blob - samples/bpf/xdpsock_user.c
Merge ath-next from git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/ath.git
[linux.git] / samples / bpf / xdpsock_user.c
1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright(c) 2017 - 2018 Intel Corporation.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms and conditions of the GNU General Public License,
6  * version 2, as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11  * more details.
12  */
13
14 #include <assert.h>
15 #include <errno.h>
16 #include <getopt.h>
17 #include <libgen.h>
18 #include <linux/bpf.h>
19 #include <linux/if_link.h>
20 #include <linux/if_xdp.h>
21 #include <linux/if_ether.h>
22 #include <net/if.h>
23 #include <signal.h>
24 #include <stdbool.h>
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <net/ethernet.h>
29 #include <sys/resource.h>
30 #include <sys/socket.h>
31 #include <sys/mman.h>
32 #include <time.h>
33 #include <unistd.h>
34 #include <pthread.h>
35 #include <locale.h>
36 #include <sys/types.h>
37 #include <poll.h>
38
39 #include "bpf_load.h"
40 #include "bpf_util.h"
41 #include <bpf/bpf.h>
42
43 #include "xdpsock.h"
44
45 #ifndef SOL_XDP
46 #define SOL_XDP 283
47 #endif
48
49 #ifndef AF_XDP
50 #define AF_XDP 44
51 #endif
52
53 #ifndef PF_XDP
54 #define PF_XDP AF_XDP
55 #endif
56
57 #define NUM_FRAMES 131072
58 #define FRAME_HEADROOM 0
59 #define FRAME_SIZE 2048
60 #define NUM_DESCS 1024
61 #define BATCH_SIZE 16
62
63 #define FQ_NUM_DESCS 1024
64 #define CQ_NUM_DESCS 1024
65
66 #define DEBUG_HEXDUMP 0
67
68 typedef __u32 u32;
69
70 static unsigned long prev_time;
71
72 enum benchmark_type {
73         BENCH_RXDROP = 0,
74         BENCH_TXONLY = 1,
75         BENCH_L2FWD = 2,
76 };
77
78 static enum benchmark_type opt_bench = BENCH_RXDROP;
79 static u32 opt_xdp_flags;
80 static const char *opt_if = "";
81 static int opt_ifindex;
82 static int opt_queue;
83 static int opt_poll;
84 static int opt_shared_packet_buffer;
85 static int opt_interval = 1;
86
87 struct xdp_umem_uqueue {
88         u32 cached_prod;
89         u32 cached_cons;
90         u32 mask;
91         u32 size;
92         struct xdp_umem_ring *ring;
93 };
94
95 struct xdp_umem {
96         char (*frames)[FRAME_SIZE];
97         struct xdp_umem_uqueue fq;
98         struct xdp_umem_uqueue cq;
99         int fd;
100 };
101
102 struct xdp_uqueue {
103         u32 cached_prod;
104         u32 cached_cons;
105         u32 mask;
106         u32 size;
107         struct xdp_rxtx_ring *ring;
108 };
109
110 struct xdpsock {
111         struct xdp_uqueue rx;
112         struct xdp_uqueue tx;
113         int sfd;
114         struct xdp_umem *umem;
115         u32 outstanding_tx;
116         unsigned long rx_npkts;
117         unsigned long tx_npkts;
118         unsigned long prev_rx_npkts;
119         unsigned long prev_tx_npkts;
120 };
121
122 #define MAX_SOCKS 4
123 static int num_socks;
124 struct xdpsock *xsks[MAX_SOCKS];
125
126 static unsigned long get_nsecs(void)
127 {
128         struct timespec ts;
129
130         clock_gettime(CLOCK_MONOTONIC, &ts);
131         return ts.tv_sec * 1000000000UL + ts.tv_nsec;
132 }
133
134 static void dump_stats(void);
135
136 #define lassert(expr)                                                   \
137         do {                                                            \
138                 if (!(expr)) {                                          \
139                         fprintf(stderr, "%s:%s:%i: Assertion failed: "  \
140                                 #expr ": errno: %d/\"%s\"\n",           \
141                                 __FILE__, __func__, __LINE__,           \
142                                 errno, strerror(errno));                \
143                         dump_stats();                                   \
144                         exit(EXIT_FAILURE);                             \
145                 }                                                       \
146         } while (0)
147
148 #define barrier() __asm__ __volatile__("": : :"memory")
149 #define u_smp_rmb() barrier()
150 #define u_smp_wmb() barrier()
151 #define likely(x) __builtin_expect(!!(x), 1)
152 #define unlikely(x) __builtin_expect(!!(x), 0)
153
154 static const char pkt_data[] =
155         "\x3c\xfd\xfe\x9e\x7f\x71\xec\xb1\xd7\x98\x3a\xc0\x08\x00\x45\x00"
156         "\x00\x2e\x00\x00\x00\x00\x40\x11\x88\x97\x05\x08\x07\x08\xc8\x14"
157         "\x1e\x04\x10\x92\x10\x92\x00\x1a\x6d\xa3\x34\x33\x1f\x69\x40\x6b"
158         "\x54\x59\xb6\x14\x2d\x11\x44\xbf\xaf\xd9\xbe\xaa";
159
160 static inline u32 umem_nb_free(struct xdp_umem_uqueue *q, u32 nb)
161 {
162         u32 free_entries = q->size - (q->cached_prod - q->cached_cons);
163
164         if (free_entries >= nb)
165                 return free_entries;
166
167         /* Refresh the local tail pointer */
168         q->cached_cons = q->ring->ptrs.consumer;
169
170         return q->size - (q->cached_prod - q->cached_cons);
171 }
172
173 static inline u32 xq_nb_free(struct xdp_uqueue *q, u32 ndescs)
174 {
175         u32 free_entries = q->cached_cons - q->cached_prod;
176
177         if (free_entries >= ndescs)
178                 return free_entries;
179
180         /* Refresh the local tail pointer */
181         q->cached_cons = q->ring->ptrs.consumer + q->size;
182         return q->cached_cons - q->cached_prod;
183 }
184
185 static inline u32 umem_nb_avail(struct xdp_umem_uqueue *q, u32 nb)
186 {
187         u32 entries = q->cached_prod - q->cached_cons;
188
189         if (entries == 0) {
190                 q->cached_prod = q->ring->ptrs.producer;
191                 entries = q->cached_prod - q->cached_cons;
192         }
193
194         return (entries > nb) ? nb : entries;
195 }
196
197 static inline u32 xq_nb_avail(struct xdp_uqueue *q, u32 ndescs)
198 {
199         u32 entries = q->cached_prod - q->cached_cons;
200
201         if (entries == 0) {
202                 q->cached_prod = q->ring->ptrs.producer;
203                 entries = q->cached_prod - q->cached_cons;
204         }
205
206         return (entries > ndescs) ? ndescs : entries;
207 }
208
209 static inline int umem_fill_to_kernel_ex(struct xdp_umem_uqueue *fq,
210                                          struct xdp_desc *d,
211                                          size_t nb)
212 {
213         u32 i;
214
215         if (umem_nb_free(fq, nb) < nb)
216                 return -ENOSPC;
217
218         for (i = 0; i < nb; i++) {
219                 u32 idx = fq->cached_prod++ & fq->mask;
220
221                 fq->ring->desc[idx] = d[i].idx;
222         }
223
224         u_smp_wmb();
225
226         fq->ring->ptrs.producer = fq->cached_prod;
227
228         return 0;
229 }
230
231 static inline int umem_fill_to_kernel(struct xdp_umem_uqueue *fq, u32 *d,
232                                       size_t nb)
233 {
234         u32 i;
235
236         if (umem_nb_free(fq, nb) < nb)
237                 return -ENOSPC;
238
239         for (i = 0; i < nb; i++) {
240                 u32 idx = fq->cached_prod++ & fq->mask;
241
242                 fq->ring->desc[idx] = d[i];
243         }
244
245         u_smp_wmb();
246
247         fq->ring->ptrs.producer = fq->cached_prod;
248
249         return 0;
250 }
251
252 static inline size_t umem_complete_from_kernel(struct xdp_umem_uqueue *cq,
253                                                u32 *d, size_t nb)
254 {
255         u32 idx, i, entries = umem_nb_avail(cq, nb);
256
257         u_smp_rmb();
258
259         for (i = 0; i < entries; i++) {
260                 idx = cq->cached_cons++ & cq->mask;
261                 d[i] = cq->ring->desc[idx];
262         }
263
264         if (entries > 0) {
265                 u_smp_wmb();
266
267                 cq->ring->ptrs.consumer = cq->cached_cons;
268         }
269
270         return entries;
271 }
272
273 static inline void *xq_get_data(struct xdpsock *xsk, __u32 idx, __u32 off)
274 {
275         lassert(idx < NUM_FRAMES);
276         return &xsk->umem->frames[idx][off];
277 }
278
279 static inline int xq_enq(struct xdp_uqueue *uq,
280                          const struct xdp_desc *descs,
281                          unsigned int ndescs)
282 {
283         struct xdp_rxtx_ring *r = uq->ring;
284         unsigned int i;
285
286         if (xq_nb_free(uq, ndescs) < ndescs)
287                 return -ENOSPC;
288
289         for (i = 0; i < ndescs; i++) {
290                 u32 idx = uq->cached_prod++ & uq->mask;
291
292                 r->desc[idx].idx = descs[i].idx;
293                 r->desc[idx].len = descs[i].len;
294                 r->desc[idx].offset = descs[i].offset;
295         }
296
297         u_smp_wmb();
298
299         r->ptrs.producer = uq->cached_prod;
300         return 0;
301 }
302
303 static inline int xq_enq_tx_only(struct xdp_uqueue *uq,
304                                  __u32 idx, unsigned int ndescs)
305 {
306         struct xdp_rxtx_ring *q = uq->ring;
307         unsigned int i;
308
309         if (xq_nb_free(uq, ndescs) < ndescs)
310                 return -ENOSPC;
311
312         for (i = 0; i < ndescs; i++) {
313                 u32 idx = uq->cached_prod++ & uq->mask;
314
315                 q->desc[idx].idx        = idx + i;
316                 q->desc[idx].len        = sizeof(pkt_data) - 1;
317                 q->desc[idx].offset     = 0;
318         }
319
320         u_smp_wmb();
321
322         q->ptrs.producer = uq->cached_prod;
323         return 0;
324 }
325
326 static inline int xq_deq(struct xdp_uqueue *uq,
327                          struct xdp_desc *descs,
328                          int ndescs)
329 {
330         struct xdp_rxtx_ring *r = uq->ring;
331         unsigned int idx;
332         int i, entries;
333
334         entries = xq_nb_avail(uq, ndescs);
335
336         u_smp_rmb();
337
338         for (i = 0; i < entries; i++) {
339                 idx = uq->cached_cons++ & uq->mask;
340                 descs[i] = r->desc[idx];
341         }
342
343         if (entries > 0) {
344                 u_smp_wmb();
345
346                 r->ptrs.consumer = uq->cached_cons;
347         }
348
349         return entries;
350 }
351
352 static void swap_mac_addresses(void *data)
353 {
354         struct ether_header *eth = (struct ether_header *)data;
355         struct ether_addr *src_addr = (struct ether_addr *)&eth->ether_shost;
356         struct ether_addr *dst_addr = (struct ether_addr *)&eth->ether_dhost;
357         struct ether_addr tmp;
358
359         tmp = *src_addr;
360         *src_addr = *dst_addr;
361         *dst_addr = tmp;
362 }
363
364 #if DEBUG_HEXDUMP
365 static void hex_dump(void *pkt, size_t length, const char *prefix)
366 {
367         int i = 0;
368         const unsigned char *address = (unsigned char *)pkt;
369         const unsigned char *line = address;
370         size_t line_size = 32;
371         unsigned char c;
372
373         printf("length = %zu\n", length);
374         printf("%s | ", prefix);
375         while (length-- > 0) {
376                 printf("%02X ", *address++);
377                 if (!(++i % line_size) || (length == 0 && i % line_size)) {
378                         if (length == 0) {
379                                 while (i++ % line_size)
380                                         printf("__ ");
381                         }
382                         printf(" | ");  /* right close */
383                         while (line < address) {
384                                 c = *line++;
385                                 printf("%c", (c < 33 || c == 255) ? 0x2E : c);
386                         }
387                         printf("\n");
388                         if (length > 0)
389                                 printf("%s | ", prefix);
390                 }
391         }
392         printf("\n");
393 }
394 #endif
395
396 static size_t gen_eth_frame(char *frame)
397 {
398         memcpy(frame, pkt_data, sizeof(pkt_data) - 1);
399         return sizeof(pkt_data) - 1;
400 }
401
402 static struct xdp_umem *xdp_umem_configure(int sfd)
403 {
404         int fq_size = FQ_NUM_DESCS, cq_size = CQ_NUM_DESCS;
405         struct xdp_umem_reg mr;
406         struct xdp_umem *umem;
407         void *bufs;
408
409         umem = calloc(1, sizeof(*umem));
410         lassert(umem);
411
412         lassert(posix_memalign(&bufs, getpagesize(), /* PAGE_SIZE aligned */
413                                NUM_FRAMES * FRAME_SIZE) == 0);
414
415         mr.addr = (__u64)bufs;
416         mr.len = NUM_FRAMES * FRAME_SIZE;
417         mr.frame_size = FRAME_SIZE;
418         mr.frame_headroom = FRAME_HEADROOM;
419
420         lassert(setsockopt(sfd, SOL_XDP, XDP_UMEM_REG, &mr, sizeof(mr)) == 0);
421         lassert(setsockopt(sfd, SOL_XDP, XDP_UMEM_FILL_RING, &fq_size,
422                            sizeof(int)) == 0);
423         lassert(setsockopt(sfd, SOL_XDP, XDP_UMEM_COMPLETION_RING, &cq_size,
424                            sizeof(int)) == 0);
425
426         umem->fq.ring = mmap(0, sizeof(struct xdp_umem_ring) +
427                              FQ_NUM_DESCS * sizeof(u32),
428                              PROT_READ | PROT_WRITE,
429                              MAP_SHARED | MAP_POPULATE, sfd,
430                              XDP_UMEM_PGOFF_FILL_RING);
431         lassert(umem->fq.ring != MAP_FAILED);
432
433         umem->fq.mask = FQ_NUM_DESCS - 1;
434         umem->fq.size = FQ_NUM_DESCS;
435
436         umem->cq.ring = mmap(0, sizeof(struct xdp_umem_ring) +
437                              CQ_NUM_DESCS * sizeof(u32),
438                              PROT_READ | PROT_WRITE,
439                              MAP_SHARED | MAP_POPULATE, sfd,
440                              XDP_UMEM_PGOFF_COMPLETION_RING);
441         lassert(umem->cq.ring != MAP_FAILED);
442
443         umem->cq.mask = CQ_NUM_DESCS - 1;
444         umem->cq.size = CQ_NUM_DESCS;
445
446         umem->frames = (char (*)[FRAME_SIZE])bufs;
447         umem->fd = sfd;
448
449         if (opt_bench == BENCH_TXONLY) {
450                 int i;
451
452                 for (i = 0; i < NUM_FRAMES; i++)
453                         (void)gen_eth_frame(&umem->frames[i][0]);
454         }
455
456         return umem;
457 }
458
459 static struct xdpsock *xsk_configure(struct xdp_umem *umem)
460 {
461         struct sockaddr_xdp sxdp = {};
462         int sfd, ndescs = NUM_DESCS;
463         struct xdpsock *xsk;
464         bool shared = true;
465         u32 i;
466
467         sfd = socket(PF_XDP, SOCK_RAW, 0);
468         lassert(sfd >= 0);
469
470         xsk = calloc(1, sizeof(*xsk));
471         lassert(xsk);
472
473         xsk->sfd = sfd;
474         xsk->outstanding_tx = 0;
475
476         if (!umem) {
477                 shared = false;
478                 xsk->umem = xdp_umem_configure(sfd);
479         } else {
480                 xsk->umem = umem;
481         }
482
483         lassert(setsockopt(sfd, SOL_XDP, XDP_RX_RING,
484                            &ndescs, sizeof(int)) == 0);
485         lassert(setsockopt(sfd, SOL_XDP, XDP_TX_RING,
486                            &ndescs, sizeof(int)) == 0);
487
488         /* Rx */
489         xsk->rx.ring = mmap(NULL,
490                             sizeof(struct xdp_ring) +
491                             NUM_DESCS * sizeof(struct xdp_desc),
492                             PROT_READ | PROT_WRITE,
493                             MAP_SHARED | MAP_POPULATE, sfd,
494                             XDP_PGOFF_RX_RING);
495         lassert(xsk->rx.ring != MAP_FAILED);
496
497         if (!shared) {
498                 for (i = 0; i < NUM_DESCS / 2; i++)
499                         lassert(umem_fill_to_kernel(&xsk->umem->fq, &i, 1)
500                                 == 0);
501         }
502
503         /* Tx */
504         xsk->tx.ring = mmap(NULL,
505                          sizeof(struct xdp_ring) +
506                          NUM_DESCS * sizeof(struct xdp_desc),
507                          PROT_READ | PROT_WRITE,
508                          MAP_SHARED | MAP_POPULATE, sfd,
509                          XDP_PGOFF_TX_RING);
510         lassert(xsk->tx.ring != MAP_FAILED);
511
512         xsk->rx.mask = NUM_DESCS - 1;
513         xsk->rx.size = NUM_DESCS;
514
515         xsk->tx.mask = NUM_DESCS - 1;
516         xsk->tx.size = NUM_DESCS;
517
518         sxdp.sxdp_family = PF_XDP;
519         sxdp.sxdp_ifindex = opt_ifindex;
520         sxdp.sxdp_queue_id = opt_queue;
521         if (shared) {
522                 sxdp.sxdp_flags = XDP_SHARED_UMEM;
523                 sxdp.sxdp_shared_umem_fd = umem->fd;
524         }
525
526         lassert(bind(sfd, (struct sockaddr *)&sxdp, sizeof(sxdp)) == 0);
527
528         return xsk;
529 }
530
531 static void print_benchmark(bool running)
532 {
533         const char *bench_str = "INVALID";
534
535         if (opt_bench == BENCH_RXDROP)
536                 bench_str = "rxdrop";
537         else if (opt_bench == BENCH_TXONLY)
538                 bench_str = "txonly";
539         else if (opt_bench == BENCH_L2FWD)
540                 bench_str = "l2fwd";
541
542         printf("%s:%d %s ", opt_if, opt_queue, bench_str);
543         if (opt_xdp_flags & XDP_FLAGS_SKB_MODE)
544                 printf("xdp-skb ");
545         else if (opt_xdp_flags & XDP_FLAGS_DRV_MODE)
546                 printf("xdp-drv ");
547         else
548                 printf("        ");
549
550         if (opt_poll)
551                 printf("poll() ");
552
553         if (running) {
554                 printf("running...");
555                 fflush(stdout);
556         }
557 }
558
559 static void dump_stats(void)
560 {
561         unsigned long now = get_nsecs();
562         long dt = now - prev_time;
563         int i;
564
565         prev_time = now;
566
567         for (i = 0; i < num_socks; i++) {
568                 char *fmt = "%-15s %'-11.0f %'-11lu\n";
569                 double rx_pps, tx_pps;
570
571                 rx_pps = (xsks[i]->rx_npkts - xsks[i]->prev_rx_npkts) *
572                          1000000000. / dt;
573                 tx_pps = (xsks[i]->tx_npkts - xsks[i]->prev_tx_npkts) *
574                          1000000000. / dt;
575
576                 printf("\n sock%d@", i);
577                 print_benchmark(false);
578                 printf("\n");
579
580                 printf("%-15s %-11s %-11s %-11.2f\n", "", "pps", "pkts",
581                        dt / 1000000000.);
582                 printf(fmt, "rx", rx_pps, xsks[i]->rx_npkts);
583                 printf(fmt, "tx", tx_pps, xsks[i]->tx_npkts);
584
585                 xsks[i]->prev_rx_npkts = xsks[i]->rx_npkts;
586                 xsks[i]->prev_tx_npkts = xsks[i]->tx_npkts;
587         }
588 }
589
590 static void *poller(void *arg)
591 {
592         (void)arg;
593         for (;;) {
594                 sleep(opt_interval);
595                 dump_stats();
596         }
597
598         return NULL;
599 }
600
601 static void int_exit(int sig)
602 {
603         (void)sig;
604         dump_stats();
605         bpf_set_link_xdp_fd(opt_ifindex, -1, opt_xdp_flags);
606         exit(EXIT_SUCCESS);
607 }
608
609 static struct option long_options[] = {
610         {"rxdrop", no_argument, 0, 'r'},
611         {"txonly", no_argument, 0, 't'},
612         {"l2fwd", no_argument, 0, 'l'},
613         {"interface", required_argument, 0, 'i'},
614         {"queue", required_argument, 0, 'q'},
615         {"poll", no_argument, 0, 'p'},
616         {"shared-buffer", no_argument, 0, 's'},
617         {"xdp-skb", no_argument, 0, 'S'},
618         {"xdp-native", no_argument, 0, 'N'},
619         {"interval", required_argument, 0, 'n'},
620         {0, 0, 0, 0}
621 };
622
623 static void usage(const char *prog)
624 {
625         const char *str =
626                 "  Usage: %s [OPTIONS]\n"
627                 "  Options:\n"
628                 "  -r, --rxdrop         Discard all incoming packets (default)\n"
629                 "  -t, --txonly         Only send packets\n"
630                 "  -l, --l2fwd          MAC swap L2 forwarding\n"
631                 "  -i, --interface=n    Run on interface n\n"
632                 "  -q, --queue=n        Use queue n (default 0)\n"
633                 "  -p, --poll           Use poll syscall\n"
634                 "  -s, --shared-buffer  Use shared packet buffer\n"
635                 "  -S, --xdp-skb=n      Use XDP skb-mod\n"
636                 "  -N, --xdp-native=n   Enfore XDP native mode\n"
637                 "  -n, --interval=n     Specify statistics update interval (default 1 sec).\n"
638                 "\n";
639         fprintf(stderr, str, prog);
640         exit(EXIT_FAILURE);
641 }
642
643 static void parse_command_line(int argc, char **argv)
644 {
645         int option_index, c;
646
647         opterr = 0;
648
649         for (;;) {
650                 c = getopt_long(argc, argv, "rtli:q:psSNn:", long_options,
651                                 &option_index);
652                 if (c == -1)
653                         break;
654
655                 switch (c) {
656                 case 'r':
657                         opt_bench = BENCH_RXDROP;
658                         break;
659                 case 't':
660                         opt_bench = BENCH_TXONLY;
661                         break;
662                 case 'l':
663                         opt_bench = BENCH_L2FWD;
664                         break;
665                 case 'i':
666                         opt_if = optarg;
667                         break;
668                 case 'q':
669                         opt_queue = atoi(optarg);
670                         break;
671                 case 's':
672                         opt_shared_packet_buffer = 1;
673                         break;
674                 case 'p':
675                         opt_poll = 1;
676                         break;
677                 case 'S':
678                         opt_xdp_flags |= XDP_FLAGS_SKB_MODE;
679                         break;
680                 case 'N':
681                         opt_xdp_flags |= XDP_FLAGS_DRV_MODE;
682                         break;
683                 case 'n':
684                         opt_interval = atoi(optarg);
685                         break;
686                 default:
687                         usage(basename(argv[0]));
688                 }
689         }
690
691         opt_ifindex = if_nametoindex(opt_if);
692         if (!opt_ifindex) {
693                 fprintf(stderr, "ERROR: interface \"%s\" does not exist\n",
694                         opt_if);
695                 usage(basename(argv[0]));
696         }
697 }
698
699 static void kick_tx(int fd)
700 {
701         int ret;
702
703         ret = sendto(fd, NULL, 0, MSG_DONTWAIT, NULL, 0);
704         if (ret >= 0 || errno == ENOBUFS || errno == EAGAIN)
705                 return;
706         lassert(0);
707 }
708
709 static inline void complete_tx_l2fwd(struct xdpsock *xsk)
710 {
711         u32 descs[BATCH_SIZE];
712         unsigned int rcvd;
713         size_t ndescs;
714
715         if (!xsk->outstanding_tx)
716                 return;
717
718         kick_tx(xsk->sfd);
719         ndescs = (xsk->outstanding_tx > BATCH_SIZE) ? BATCH_SIZE :
720                  xsk->outstanding_tx;
721
722         /* re-add completed Tx buffers */
723         rcvd = umem_complete_from_kernel(&xsk->umem->cq, descs, ndescs);
724         if (rcvd > 0) {
725                 umem_fill_to_kernel(&xsk->umem->fq, descs, rcvd);
726                 xsk->outstanding_tx -= rcvd;
727                 xsk->tx_npkts += rcvd;
728         }
729 }
730
731 static inline void complete_tx_only(struct xdpsock *xsk)
732 {
733         u32 descs[BATCH_SIZE];
734         unsigned int rcvd;
735
736         if (!xsk->outstanding_tx)
737                 return;
738
739         kick_tx(xsk->sfd);
740
741         rcvd = umem_complete_from_kernel(&xsk->umem->cq, descs, BATCH_SIZE);
742         if (rcvd > 0) {
743                 xsk->outstanding_tx -= rcvd;
744                 xsk->tx_npkts += rcvd;
745         }
746 }
747
748 static void rx_drop(struct xdpsock *xsk)
749 {
750         struct xdp_desc descs[BATCH_SIZE];
751         unsigned int rcvd, i;
752
753         rcvd = xq_deq(&xsk->rx, descs, BATCH_SIZE);
754         if (!rcvd)
755                 return;
756
757         for (i = 0; i < rcvd; i++) {
758                 u32 idx = descs[i].idx;
759
760                 lassert(idx < NUM_FRAMES);
761 #if DEBUG_HEXDUMP
762                 char *pkt;
763                 char buf[32];
764
765                 pkt = xq_get_data(xsk, idx, descs[i].offset);
766                 sprintf(buf, "idx=%d", idx);
767                 hex_dump(pkt, descs[i].len, buf);
768 #endif
769         }
770
771         xsk->rx_npkts += rcvd;
772
773         umem_fill_to_kernel_ex(&xsk->umem->fq, descs, rcvd);
774 }
775
776 static void rx_drop_all(void)
777 {
778         struct pollfd fds[MAX_SOCKS + 1];
779         int i, ret, timeout, nfds = 1;
780
781         memset(fds, 0, sizeof(fds));
782
783         for (i = 0; i < num_socks; i++) {
784                 fds[i].fd = xsks[i]->sfd;
785                 fds[i].events = POLLIN;
786                 timeout = 1000; /* 1sn */
787         }
788
789         for (;;) {
790                 if (opt_poll) {
791                         ret = poll(fds, nfds, timeout);
792                         if (ret <= 0)
793                                 continue;
794                 }
795
796                 for (i = 0; i < num_socks; i++)
797                         rx_drop(xsks[i]);
798         }
799 }
800
801 static void tx_only(struct xdpsock *xsk)
802 {
803         int timeout, ret, nfds = 1;
804         struct pollfd fds[nfds + 1];
805         unsigned int idx = 0;
806
807         memset(fds, 0, sizeof(fds));
808         fds[0].fd = xsk->sfd;
809         fds[0].events = POLLOUT;
810         timeout = 1000; /* 1sn */
811
812         for (;;) {
813                 if (opt_poll) {
814                         ret = poll(fds, nfds, timeout);
815                         if (ret <= 0)
816                                 continue;
817
818                         if (fds[0].fd != xsk->sfd ||
819                             !(fds[0].revents & POLLOUT))
820                                 continue;
821                 }
822
823                 if (xq_nb_free(&xsk->tx, BATCH_SIZE) >= BATCH_SIZE) {
824                         lassert(xq_enq_tx_only(&xsk->tx, idx, BATCH_SIZE) == 0);
825
826                         xsk->outstanding_tx += BATCH_SIZE;
827                         idx += BATCH_SIZE;
828                         idx %= NUM_FRAMES;
829                 }
830
831                 complete_tx_only(xsk);
832         }
833 }
834
835 static void l2fwd(struct xdpsock *xsk)
836 {
837         for (;;) {
838                 struct xdp_desc descs[BATCH_SIZE];
839                 unsigned int rcvd, i;
840                 int ret;
841
842                 for (;;) {
843                         complete_tx_l2fwd(xsk);
844
845                         rcvd = xq_deq(&xsk->rx, descs, BATCH_SIZE);
846                         if (rcvd > 0)
847                                 break;
848                 }
849
850                 for (i = 0; i < rcvd; i++) {
851                         char *pkt = xq_get_data(xsk, descs[i].idx,
852                                                 descs[i].offset);
853
854                         swap_mac_addresses(pkt);
855 #if DEBUG_HEXDUMP
856                         char buf[32];
857                         u32 idx = descs[i].idx;
858
859                         sprintf(buf, "idx=%d", idx);
860                         hex_dump(pkt, descs[i].len, buf);
861 #endif
862                 }
863
864                 xsk->rx_npkts += rcvd;
865
866                 ret = xq_enq(&xsk->tx, descs, rcvd);
867                 lassert(ret == 0);
868                 xsk->outstanding_tx += rcvd;
869         }
870 }
871
872 int main(int argc, char **argv)
873 {
874         struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
875         char xdp_filename[256];
876         int i, ret, key = 0;
877         pthread_t pt;
878
879         parse_command_line(argc, argv);
880
881         if (setrlimit(RLIMIT_MEMLOCK, &r)) {
882                 fprintf(stderr, "ERROR: setrlimit(RLIMIT_MEMLOCK) \"%s\"\n",
883                         strerror(errno));
884                 exit(EXIT_FAILURE);
885         }
886
887         snprintf(xdp_filename, sizeof(xdp_filename), "%s_kern.o", argv[0]);
888
889         if (load_bpf_file(xdp_filename)) {
890                 fprintf(stderr, "ERROR: load_bpf_file %s\n", bpf_log_buf);
891                 exit(EXIT_FAILURE);
892         }
893
894         if (!prog_fd[0]) {
895                 fprintf(stderr, "ERROR: load_bpf_file: \"%s\"\n",
896                         strerror(errno));
897                 exit(EXIT_FAILURE);
898         }
899
900         if (bpf_set_link_xdp_fd(opt_ifindex, prog_fd[0], opt_xdp_flags) < 0) {
901                 fprintf(stderr, "ERROR: link set xdp fd failed\n");
902                 exit(EXIT_FAILURE);
903         }
904
905         ret = bpf_map_update_elem(map_fd[0], &key, &opt_queue, 0);
906         if (ret) {
907                 fprintf(stderr, "ERROR: bpf_map_update_elem qidconf\n");
908                 exit(EXIT_FAILURE);
909         }
910
911         /* Create sockets... */
912         xsks[num_socks++] = xsk_configure(NULL);
913
914 #if RR_LB
915         for (i = 0; i < MAX_SOCKS - 1; i++)
916                 xsks[num_socks++] = xsk_configure(xsks[0]->umem);
917 #endif
918
919         /* ...and insert them into the map. */
920         for (i = 0; i < num_socks; i++) {
921                 key = i;
922                 ret = bpf_map_update_elem(map_fd[1], &key, &xsks[i]->sfd, 0);
923                 if (ret) {
924                         fprintf(stderr, "ERROR: bpf_map_update_elem %d\n", i);
925                         exit(EXIT_FAILURE);
926                 }
927         }
928
929         signal(SIGINT, int_exit);
930         signal(SIGTERM, int_exit);
931         signal(SIGABRT, int_exit);
932
933         setlocale(LC_ALL, "");
934
935         ret = pthread_create(&pt, NULL, poller, NULL);
936         lassert(ret == 0);
937
938         prev_time = get_nsecs();
939
940         if (opt_bench == BENCH_RXDROP)
941                 rx_drop_all();
942         else if (opt_bench == BENCH_TXONLY)
943                 tx_only(xsks[0]);
944         else
945                 l2fwd(xsks[0]);
946
947         return 0;
948 }