]> asedeno.scripts.mit.edu Git - linux.git/blobdiff - samples/bpf/xdpsock_user.c
Merge tag 'for-linus-5.4-ofs1' of git://git.kernel.org/pub/scm/linux/kernel/git/hubca...
[linux.git] / samples / bpf / xdpsock_user.c
index 93eaaf7239b293d366fedb57ac1e6e7df643385d..df011ac3340222236c25a7ad4f17a07cc316c9ee 100644 (file)
@@ -67,8 +67,14 @@ static int opt_ifindex;
 static int opt_queue;
 static int opt_poll;
 static int opt_interval = 1;
+static u32 opt_xdp_bind_flags = XDP_USE_NEED_WAKEUP;
+static u32 opt_umem_flags;
+static int opt_unaligned_chunks;
+static int opt_mmap_flags;
 static u32 opt_xdp_bind_flags;
 static int opt_xsk_frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE;
+static int opt_timeout = 1000;
+static bool opt_need_wakeup = true;
 static __u32 prog_id;
 
 struct xsk_umem_info {
@@ -282,7 +288,9 @@ static struct xsk_umem_info *xsk_configure_umem(void *buffer, u64 size)
                .comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
                .frame_size = opt_xsk_frame_size,
                .frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM,
+               .flags = opt_umem_flags
        };
+
        int ret;
 
        umem = calloc(1, sizeof(*umem));
@@ -291,6 +299,7 @@ static struct xsk_umem_info *xsk_configure_umem(void *buffer, u64 size)
 
        ret = xsk_umem__create(&umem->umem, buffer, size, &umem->fq, &umem->cq,
                               &cfg);
+
        if (ret)
                exit_with_error(-ret);
 
@@ -352,6 +361,8 @@ static struct option long_options[] = {
        {"zero-copy", no_argument, 0, 'z'},
        {"copy", no_argument, 0, 'c'},
        {"frame-size", required_argument, 0, 'f'},
+       {"no-need-wakeup", no_argument, 0, 'm'},
+       {"unaligned", no_argument, 0, 'u'},
        {0, 0, 0, 0}
 };
 
@@ -372,6 +383,9 @@ static void usage(const char *prog)
                "  -z, --zero-copy      Force zero-copy mode.\n"
                "  -c, --copy           Force copy mode.\n"
                "  -f, --frame-size=n   Set the frame size (must be a power of two, default is %d).\n"
+               "  -m, --no-need-wakeup Turn off use of driver need wakeup flag.\n"
+               "  -f, --frame-size=n   Set the frame size (must be a power of two in aligned mode, default is %d).\n"
+               "  -u, --unaligned      Enable unaligned chunk placement\n"
                "\n";
        fprintf(stderr, str, prog, XSK_UMEM__DEFAULT_FRAME_SIZE);
        exit(EXIT_FAILURE);
@@ -384,8 +398,8 @@ static void parse_command_line(int argc, char **argv)
        opterr = 0;
 
        for (;;) {
-               c = getopt_long(argc, argv, "Frtli:q:psSNn:czf:", long_options,
-                               &option_index);
+               c = getopt_long(argc, argv, "Frtli:q:psSNn:czf:mu",
+                               long_options, &option_index);
                if (c == -1)
                        break;
 
@@ -424,12 +438,21 @@ static void parse_command_line(int argc, char **argv)
                case 'c':
                        opt_xdp_bind_flags |= XDP_COPY;
                        break;
+               case 'u':
+                       opt_umem_flags |= XDP_UMEM_UNALIGNED_CHUNK_FLAG;
+                       opt_unaligned_chunks = 1;
+                       opt_mmap_flags = MAP_HUGETLB;
+                       break;
                case 'F':
                        opt_xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
                        break;
                case 'f':
                        opt_xsk_frame_size = atoi(optarg);
+               case 'm':
+                       opt_need_wakeup = false;
+                       opt_xdp_bind_flags &= ~XDP_USE_NEED_WAKEUP;
                        break;
+
                default:
                        usage(basename(argv[0]));
                }
@@ -442,7 +465,8 @@ static void parse_command_line(int argc, char **argv)
                usage(basename(argv[0]));
        }
 
-       if (opt_xsk_frame_size & (opt_xsk_frame_size - 1)) {
+       if ((opt_xsk_frame_size & (opt_xsk_frame_size - 1)) &&
+           !opt_unaligned_chunks) {
                fprintf(stderr, "--frame-size=%d is not a power of two\n",
                        opt_xsk_frame_size);
                usage(basename(argv[0]));
@@ -459,8 +483,10 @@ static void kick_tx(struct xsk_socket_info *xsk)
        exit_with_error(errno);
 }
 
-static inline void complete_tx_l2fwd(struct xsk_socket_info *xsk)
+static inline void complete_tx_l2fwd(struct xsk_socket_info *xsk,
+                                    struct pollfd *fds)
 {
+       struct xsk_umem_info *umem = xsk->umem;
        u32 idx_cq = 0, idx_fq = 0;
        unsigned int rcvd;
        size_t ndescs;
@@ -468,27 +494,30 @@ static inline void complete_tx_l2fwd(struct xsk_socket_info *xsk)
        if (!xsk->outstanding_tx)
                return;
 
-       kick_tx(xsk);
+       if (!opt_need_wakeup || xsk_ring_prod__needs_wakeup(&xsk->tx))
+               kick_tx(xsk);
+
        ndescs = (xsk->outstanding_tx > BATCH_SIZE) ? BATCH_SIZE :
                xsk->outstanding_tx;
 
        /* re-add completed Tx buffers */
-       rcvd = xsk_ring_cons__peek(&xsk->umem->cq, ndescs, &idx_cq);
+       rcvd = xsk_ring_cons__peek(&umem->cq, ndescs, &idx_cq);
        if (rcvd > 0) {
                unsigned int i;
                int ret;
 
-               ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq);
+               ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq);
                while (ret != rcvd) {
                        if (ret < 0)
                                exit_with_error(-ret);
-                       ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd,
-                                                    &idx_fq);
+                       if (xsk_ring_prod__needs_wakeup(&umem->fq))
+                               ret = poll(fds, num_socks, opt_timeout);
+                       ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq);
                }
+
                for (i = 0; i < rcvd; i++)
-                       *xsk_ring_prod__fill_addr(&xsk->umem->fq, idx_fq++) =
-                               *xsk_ring_cons__comp_addr(&xsk->umem->cq,
-                                                         idx_cq++);
+                       *xsk_ring_prod__fill_addr(&umem->fq, idx_fq++) =
+                               *xsk_ring_cons__comp_addr(&umem->cq, idx_cq++);
 
                xsk_ring_prod__submit(&xsk->umem->fq, rcvd);
                xsk_ring_cons__release(&xsk->umem->cq, rcvd);
@@ -505,7 +534,8 @@ static inline void complete_tx_only(struct xsk_socket_info *xsk)
        if (!xsk->outstanding_tx)
                return;
 
-       kick_tx(xsk);
+       if (!opt_need_wakeup || xsk_ring_prod__needs_wakeup(&xsk->tx))
+               kick_tx(xsk);
 
        rcvd = xsk_ring_cons__peek(&xsk->umem->cq, BATCH_SIZE, &idx);
        if (rcvd > 0) {
@@ -515,30 +545,38 @@ static inline void complete_tx_only(struct xsk_socket_info *xsk)
        }
 }
 
-static void rx_drop(struct xsk_socket_info *xsk)
+static void rx_drop(struct xsk_socket_info *xsk, struct pollfd *fds)
 {
        unsigned int rcvd, i;
        u32 idx_rx = 0, idx_fq = 0;
        int ret;
 
        rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE, &idx_rx);
-       if (!rcvd)
+       if (!rcvd) {
+               if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq))
+                       ret = poll(fds, num_socks, opt_timeout);
                return;
+       }
 
        ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq);
        while (ret != rcvd) {
                if (ret < 0)
                        exit_with_error(-ret);
+               if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq))
+                       ret = poll(fds, num_socks, opt_timeout);
                ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq);
        }
 
        for (i = 0; i < rcvd; i++) {
                u64 addr = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx)->addr;
                u32 len = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++)->len;
+               u64 orig = xsk_umem__extract_addr(addr);
+
+               addr = xsk_umem__add_offset_to_addr(addr);
                char *pkt = xsk_umem__get_data(xsk->umem->buffer, addr);
 
                hex_dump(pkt, len, addr);
-               *xsk_ring_prod__fill_addr(&xsk->umem->fq, idx_fq++) = addr;
+               *xsk_ring_prod__fill_addr(&xsk->umem->fq, idx_fq++) = orig;
        }
 
        xsk_ring_prod__submit(&xsk->umem->fq, rcvd);
@@ -549,42 +587,65 @@ static void rx_drop(struct xsk_socket_info *xsk)
 static void rx_drop_all(void)
 {
        struct pollfd fds[MAX_SOCKS + 1];
-       int i, ret, timeout, nfds = 1;
+       int i, ret;
 
        memset(fds, 0, sizeof(fds));
 
        for (i = 0; i < num_socks; i++) {
                fds[i].fd = xsk_socket__fd(xsks[i]->xsk);
                fds[i].events = POLLIN;
-               timeout = 1000; /* 1sn */
        }
 
        for (;;) {
                if (opt_poll) {
-                       ret = poll(fds, nfds, timeout);
+                       ret = poll(fds, num_socks, opt_timeout);
                        if (ret <= 0)
                                continue;
                }
 
                for (i = 0; i < num_socks; i++)
-                       rx_drop(xsks[i]);
+                       rx_drop(xsks[i], fds);
+       }
+}
+
+static void tx_only(struct xsk_socket_info *xsk, u32 frame_nb)
+{
+       u32 idx;
+
+       if (xsk_ring_prod__reserve(&xsk->tx, BATCH_SIZE, &idx) == BATCH_SIZE) {
+               unsigned int i;
+
+               for (i = 0; i < BATCH_SIZE; i++) {
+                       xsk_ring_prod__tx_desc(&xsk->tx, idx + i)->addr =
+                               (frame_nb + i) << XSK_UMEM__DEFAULT_FRAME_SHIFT;
+                       xsk_ring_prod__tx_desc(&xsk->tx, idx + i)->len =
+                               sizeof(pkt_data) - 1;
+               }
+
+               xsk_ring_prod__submit(&xsk->tx, BATCH_SIZE);
+               xsk->outstanding_tx += BATCH_SIZE;
+               frame_nb += BATCH_SIZE;
+               frame_nb %= NUM_FRAMES;
        }
+
+       complete_tx_only(xsk);
 }
 
-static void tx_only(struct xsk_socket_info *xsk)
+static void tx_only_all(void)
 {
-       int timeout, ret, nfds = 1;
-       struct pollfd fds[nfds + 1];
-       u32 idx, frame_nb = 0;
+       struct pollfd fds[MAX_SOCKS];
+       u32 frame_nb[MAX_SOCKS] = {};
+       int i, ret;
 
        memset(fds, 0, sizeof(fds));
-       fds[0].fd = xsk_socket__fd(xsk->xsk);
-       fds[0].events = POLLOUT;
-       timeout = 1000; /* 1sn */
+       for (i = 0; i < num_socks; i++) {
+               fds[0].fd = xsk_socket__fd(xsks[i]->xsk);
+               fds[0].events = POLLOUT;
+       }
 
        for (;;) {
                if (opt_poll) {
-                       ret = poll(fds, nfds, timeout);
+                       ret = poll(fds, num_socks, opt_timeout);
                        if (ret <= 0)
                                continue;
 
@@ -592,69 +653,78 @@ static void tx_only(struct xsk_socket_info *xsk)
                                continue;
                }
 
-               if (xsk_ring_prod__reserve(&xsk->tx, BATCH_SIZE, &idx) ==
-                   BATCH_SIZE) {
-                       unsigned int i;
-
-                       for (i = 0; i < BATCH_SIZE; i++) {
-                               xsk_ring_prod__tx_desc(&xsk->tx, idx + i)->addr
-                                       = (frame_nb + i) * opt_xsk_frame_size;
-                               xsk_ring_prod__tx_desc(&xsk->tx, idx + i)->len =
-                                       sizeof(pkt_data) - 1;
-                       }
-
-                       xsk_ring_prod__submit(&xsk->tx, BATCH_SIZE);
-                       xsk->outstanding_tx += BATCH_SIZE;
-                       frame_nb += BATCH_SIZE;
-                       frame_nb %= NUM_FRAMES;
-               }
-
-               complete_tx_only(xsk);
+               for (i = 0; i < num_socks; i++)
+                       tx_only(xsks[i], frame_nb[i]);
        }
 }
 
-static void l2fwd(struct xsk_socket_info *xsk)
+static void l2fwd(struct xsk_socket_info *xsk, struct pollfd *fds)
 {
-       for (;;) {
-               unsigned int rcvd, i;
-               u32 idx_rx = 0, idx_tx = 0;
-               int ret;
+       unsigned int rcvd, i;
+       u32 idx_rx = 0, idx_tx = 0;
+       int ret;
 
-               for (;;) {
-                       complete_tx_l2fwd(xsk);
+       complete_tx_l2fwd(xsk, fds);
 
-                       rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE,
-                                                  &idx_rx);
-                       if (rcvd > 0)
-                               break;
-               }
+       rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE, &idx_rx);
+       if (!rcvd) {
+               if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq))
+                       ret = poll(fds, num_socks, opt_timeout);
+               return;
+       }
 
+       ret = xsk_ring_prod__reserve(&xsk->tx, rcvd, &idx_tx);
+       while (ret != rcvd) {
+               if (ret < 0)
+                       exit_with_error(-ret);
+               if (xsk_ring_prod__needs_wakeup(&xsk->tx))
+                       kick_tx(xsk);
                ret = xsk_ring_prod__reserve(&xsk->tx, rcvd, &idx_tx);
-               while (ret != rcvd) {
-                       if (ret < 0)
-                               exit_with_error(-ret);
-                       ret = xsk_ring_prod__reserve(&xsk->tx, rcvd, &idx_tx);
-               }
+       }
 
-               for (i = 0; i < rcvd; i++) {
-                       u64 addr = xsk_ring_cons__rx_desc(&xsk->rx,
-                                                         idx_rx)->addr;
-                       u32 len = xsk_ring_cons__rx_desc(&xsk->rx,
-                                                        idx_rx++)->len;
-                       char *pkt = xsk_umem__get_data(xsk->umem->buffer, addr);
+       for (i = 0; i < rcvd; i++) {
+               u64 addr = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx)->addr;
+               u32 len = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++)->len;
+               u64 orig = addr;
 
-                       swap_mac_addresses(pkt);
+               addr = xsk_umem__add_offset_to_addr(addr);
+               char *pkt = xsk_umem__get_data(xsk->umem->buffer, addr);
 
-                       hex_dump(pkt, len, addr);
-                       xsk_ring_prod__tx_desc(&xsk->tx, idx_tx)->addr = addr;
-                       xsk_ring_prod__tx_desc(&xsk->tx, idx_tx++)->len = len;
-               }
+               swap_mac_addresses(pkt);
+
+               hex_dump(pkt, len, addr);
+               xsk_ring_prod__tx_desc(&xsk->tx, idx_tx)->addr = orig;
+               xsk_ring_prod__tx_desc(&xsk->tx, idx_tx++)->len = len;
+       }
+
+       xsk_ring_prod__submit(&xsk->tx, rcvd);
+       xsk_ring_cons__release(&xsk->rx, rcvd);
+
+       xsk->rx_npkts += rcvd;
+       xsk->outstanding_tx += rcvd;
+}
 
-               xsk_ring_prod__submit(&xsk->tx, rcvd);
-               xsk_ring_cons__release(&xsk->rx, rcvd);
+static void l2fwd_all(void)
+{
+       struct pollfd fds[MAX_SOCKS];
+       int i, ret;
+
+       memset(fds, 0, sizeof(fds));
 
-               xsk->rx_npkts += rcvd;
-               xsk->outstanding_tx += rcvd;
+       for (i = 0; i < num_socks; i++) {
+               fds[i].fd = xsk_socket__fd(xsks[i]->xsk);
+               fds[i].events = POLLOUT | POLLIN;
+       }
+
+       for (;;) {
+               if (opt_poll) {
+                       ret = poll(fds, num_socks, opt_timeout);
+                       if (ret <= 0)
+                               continue;
+               }
+
+               for (i = 0; i < num_socks; i++)
+                       l2fwd(xsks[i], fds);
        }
 }
 
@@ -674,11 +744,14 @@ int main(int argc, char **argv)
                exit(EXIT_FAILURE);
        }
 
-       ret = posix_memalign(&bufs, getpagesize(), /* PAGE_SIZE aligned */
-                            NUM_FRAMES * opt_xsk_frame_size);
-       if (ret)
-               exit_with_error(ret);
-
+       /* Reserve memory for the umem. Use hugepages if unaligned chunk mode */
+       bufs = mmap(NULL, NUM_FRAMES * opt_xsk_frame_size,
+                   PROT_READ | PROT_WRITE,
+                   MAP_PRIVATE | MAP_ANONYMOUS | opt_mmap_flags, -1, 0);
+       if (bufs == MAP_FAILED) {
+               printf("ERROR: mmap failed\n");
+               exit(EXIT_FAILURE);
+       }
        /* Create sockets... */
        umem = xsk_configure_umem(bufs, NUM_FRAMES * opt_xsk_frame_size);
        xsks[num_socks++] = xsk_configure_socket(umem);
@@ -705,9 +778,9 @@ int main(int argc, char **argv)
        if (opt_bench == BENCH_RXDROP)
                rx_drop_all();
        else if (opt_bench == BENCH_TXONLY)
-               tx_only(xsks[0]);
+               tx_only_all();
        else
-               l2fwd(xsks[0]);
+               l2fwd_all();
 
        return 0;
 }