]> asedeno.scripts.mit.edu Git - linux.git/commitdiff
bpf: cpumap add tracepoints
authorJesper Dangaard Brouer <brouer@redhat.com>
Mon, 16 Oct 2017 10:19:44 +0000 (12:19 +0200)
committerDavid S. Miller <davem@davemloft.net>
Wed, 18 Oct 2017 11:12:18 +0000 (12:12 +0100)
This adds two tracepoint to the cpumap.  One for the enqueue side
trace_xdp_cpumap_enqueue() and one for the kthread dequeue side
trace_xdp_cpumap_kthread().

To mitigate the tracepoint overhead, these are invoked during the
enqueue/dequeue bulking phases, thus amortizing the cost.

The obvious use-cases are for debugging and monitoring.  The
non-intuitive use-case is using these as a feedback loop to know the
system load.  One can imagine auto-scaling by reducing, adding or
activating more worker CPUs on demand.

V4: tracepoint remove time_limit info, instead add sched info

V8: intro struct bpf_cpu_map_entry members cpu+map_id in this patch

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
include/trace/events/xdp.h
kernel/bpf/cpumap.c

index eb2ece96c1a2b38608c35a7fe86ca75c9533811d..0c8dec61987e0e6135f4c30637aa824d0c0fd6cd 100644 (file)
@@ -150,6 +150,76 @@ DEFINE_EVENT_PRINT(xdp_redirect_template, xdp_redirect_map_err,
         trace_xdp_redirect_map_err(dev, xdp, devmap_ifindex(fwd, map), \
                                    err, map, idx)
 
+TRACE_EVENT(xdp_cpumap_kthread,
+
+       TP_PROTO(int map_id, unsigned int processed,  unsigned int drops,
+                int sched),
+
+       TP_ARGS(map_id, processed, drops, sched),
+
+       TP_STRUCT__entry(
+               __field(int, map_id)
+               __field(u32, act)
+               __field(int, cpu)
+               __field(unsigned int, drops)
+               __field(unsigned int, processed)
+               __field(int, sched)
+       ),
+
+       TP_fast_assign(
+               __entry->map_id         = map_id;
+               __entry->act            = XDP_REDIRECT;
+               __entry->cpu            = smp_processor_id();
+               __entry->drops          = drops;
+               __entry->processed      = processed;
+               __entry->sched  = sched;
+       ),
+
+       TP_printk("kthread"
+                 " cpu=%d map_id=%d action=%s"
+                 " processed=%u drops=%u"
+                 " sched=%d",
+                 __entry->cpu, __entry->map_id,
+                 __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB),
+                 __entry->processed, __entry->drops,
+                 __entry->sched)
+);
+
+TRACE_EVENT(xdp_cpumap_enqueue,
+
+       TP_PROTO(int map_id, unsigned int processed,  unsigned int drops,
+                int to_cpu),
+
+       TP_ARGS(map_id, processed, drops, to_cpu),
+
+       TP_STRUCT__entry(
+               __field(int, map_id)
+               __field(u32, act)
+               __field(int, cpu)
+               __field(unsigned int, drops)
+               __field(unsigned int, processed)
+               __field(int, to_cpu)
+       ),
+
+       TP_fast_assign(
+               __entry->map_id         = map_id;
+               __entry->act            = XDP_REDIRECT;
+               __entry->cpu            = smp_processor_id();
+               __entry->drops          = drops;
+               __entry->processed      = processed;
+               __entry->to_cpu         = to_cpu;
+       ),
+
+       TP_printk("enqueue"
+                 " cpu=%d map_id=%d action=%s"
+                 " processed=%u drops=%u"
+                 " to_cpu=%d",
+                 __entry->cpu, __entry->map_id,
+                 __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB),
+                 __entry->processed, __entry->drops,
+                 __entry->to_cpu)
+);
+
 #endif /* _TRACE_XDP_H */
 
 #include <trace/define_trace.h>
index ee7adf4352dd4ed3738b5ee510ccef1f21b44357..b4358d84ddf19e9b9a3e474e7de5d93bbaca0867 100644 (file)
@@ -24,6 +24,7 @@
 #include <linux/workqueue.h>
 #include <linux/kthread.h>
 #include <linux/capability.h>
+#include <trace/events/xdp.h>
 
 #include <linux/netdevice.h>   /* netif_receive_skb_core */
 #include <linux/etherdevice.h> /* eth_type_trans */
@@ -43,6 +44,8 @@ struct xdp_bulk_queue {
 
 /* Struct for every remote "destination" CPU in map */
 struct bpf_cpu_map_entry {
+       u32 cpu;    /* kthread CPU and map index */
+       int map_id; /* Back reference to map */
        u32 qsize;  /* Queue size placeholder for map lookup */
 
        /* XDP can run multiple RX-ring queues, need __percpu enqueue store */
@@ -280,15 +283,16 @@ static int cpu_map_kthread_run(void *data)
         * kthread_stop signal until queue is empty.
         */
        while (!kthread_should_stop() || !__ptr_ring_empty(rcpu->queue)) {
-               unsigned int processed = 0, drops = 0;
+               unsigned int processed = 0, drops = 0, sched = 0;
                struct xdp_pkt *xdp_pkt;
 
                /* Release CPU reschedule checks */
                if (__ptr_ring_empty(rcpu->queue)) {
                        __set_current_state(TASK_INTERRUPTIBLE);
                        schedule();
+                       sched = 1;
                } else {
-                       cond_resched();
+                       sched = cond_resched();
                }
                __set_current_state(TASK_RUNNING);
 
@@ -318,6 +322,9 @@ static int cpu_map_kthread_run(void *data)
                        if (++processed == 8)
                                break;
                }
+               /* Feedback loop via tracepoint */
+               trace_xdp_cpumap_kthread(rcpu->map_id, processed, drops, sched);
+
                local_bh_enable(); /* resched point, may call do_softirq() */
        }
        __set_current_state(TASK_RUNNING);
@@ -354,7 +361,9 @@ struct bpf_cpu_map_entry *__cpu_map_entry_alloc(u32 qsize, u32 cpu, int map_id)
        if (err)
                goto free_queue;
 
-       rcpu->qsize = qsize;
+       rcpu->cpu    = cpu;
+       rcpu->map_id = map_id;
+       rcpu->qsize  = qsize;
 
        /* Setup kthread */
        rcpu->kthread = kthread_create_on_node(cpu_map_kthread_run, rcpu, numa,
@@ -584,6 +593,8 @@ const struct bpf_map_ops cpu_map_ops = {
 static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu,
                             struct xdp_bulk_queue *bq)
 {
+       unsigned int processed = 0, drops = 0;
+       const int to_cpu = rcpu->cpu;
        struct ptr_ring *q;
        int i;
 
@@ -599,13 +610,16 @@ static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu,
 
                err = __ptr_ring_produce(q, xdp_pkt);
                if (err) {
-                       /* Free xdp_pkt */
-                       page_frag_free(xdp_pkt);
+                       drops++;
+                       page_frag_free(xdp_pkt); /* Free xdp_pkt */
                }
+               processed++;
        }
        bq->count = 0;
        spin_unlock(&q->producer_lock);
 
+       /* Feedback loop via tracepoints */
+       trace_xdp_cpumap_enqueue(rcpu->map_id, processed, drops, to_cpu);
        return 0;
 }