]> asedeno.scripts.mit.edu Git - linux.git/commitdiff
Merge branch 'improve_perf_barriers'
authorAlexei Starovoitov <ast@kernel.org>
Fri, 19 Oct 2018 20:43:09 +0000 (13:43 -0700)
committerAlexei Starovoitov <ast@kernel.org>
Fri, 19 Oct 2018 20:43:09 +0000 (13:43 -0700)
Daniel Borkmann says:

====================
This set first adds smp_* barrier variants to tools infrastructure
and updates perf and libbpf to make use of them. For details, please
see individual patches, thanks!

Arnaldo, if there are no objections, could this be routed via bpf-next
with Acked-by's due to later dependencies in libbpf? Alternatively,
I could also get the 2nd patch out during merge window, but perhaps
it's okay to do in one go as there shouldn't be much conflict in perf
itself.

Thanks!

v1 -> v2:
  - add common helper and switch to acquire/release variants
    when possible, thanks Peter!
====================

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
tools/arch/arm64/include/asm/barrier.h
tools/arch/ia64/include/asm/barrier.h
tools/arch/powerpc/include/asm/barrier.h
tools/arch/s390/include/asm/barrier.h
tools/arch/sparc/include/asm/barrier_64.h
tools/arch/x86/include/asm/barrier.h
tools/include/asm/barrier.h
tools/include/linux/ring_buffer.h [new file with mode: 0644]
tools/lib/bpf/libbpf.c
tools/perf/util/mmap.h

index 40bde6b235013f10c5c6d02ee007afa2dc2a94fa..12835ea0e4173c281e5ddfe0883595492e985b09 100644 (file)
 #define wmb()          asm volatile("dmb ishst" ::: "memory")
 #define rmb()          asm volatile("dmb ishld" ::: "memory")
 
+#define smp_store_release(p, v)                                        \
+do {                                                           \
+       union { typeof(*p) __val; char __c[1]; } __u =          \
+               { .__val = (__force typeof(*p)) (v) };          \
+                                                               \
+       switch (sizeof(*p)) {                                   \
+       case 1:                                                 \
+               asm volatile ("stlrb %w1, %0"                   \
+                               : "=Q" (*p)                     \
+                               : "r" (*(__u8 *)__u.__c)        \
+                               : "memory");                    \
+               break;                                          \
+       case 2:                                                 \
+               asm volatile ("stlrh %w1, %0"                   \
+                               : "=Q" (*p)                     \
+                               : "r" (*(__u16 *)__u.__c)       \
+                               : "memory");                    \
+               break;                                          \
+       case 4:                                                 \
+               asm volatile ("stlr %w1, %0"                    \
+                               : "=Q" (*p)                     \
+                               : "r" (*(__u32 *)__u.__c)       \
+                               : "memory");                    \
+               break;                                          \
+       case 8:                                                 \
+               asm volatile ("stlr %1, %0"                     \
+                               : "=Q" (*p)                     \
+                               : "r" (*(__u64 *)__u.__c)       \
+                               : "memory");                    \
+               break;                                          \
+       default:                                                \
+               /* Only to shut up gcc ... */                   \
+               mb();                                           \
+               break;                                          \
+       }                                                       \
+} while (0)
+
+#define smp_load_acquire(p)                                    \
+({                                                             \
+       union { typeof(*p) __val; char __c[1]; } __u;           \
+                                                               \
+       switch (sizeof(*p)) {                                   \
+       case 1:                                                 \
+               asm volatile ("ldarb %w0, %1"                   \
+                       : "=r" (*(__u8 *)__u.__c)               \
+                       : "Q" (*p) : "memory");                 \
+               break;                                          \
+       case 2:                                                 \
+               asm volatile ("ldarh %w0, %1"                   \
+                       : "=r" (*(__u16 *)__u.__c)              \
+                       : "Q" (*p) : "memory");                 \
+               break;                                          \
+       case 4:                                                 \
+               asm volatile ("ldar %w0, %1"                    \
+                       : "=r" (*(__u32 *)__u.__c)              \
+                       : "Q" (*p) : "memory");                 \
+               break;                                          \
+       case 8:                                                 \
+               asm volatile ("ldar %0, %1"                     \
+                       : "=r" (*(__u64 *)__u.__c)              \
+                       : "Q" (*p) : "memory");                 \
+               break;                                          \
+       default:                                                \
+               /* Only to shut up gcc ... */                   \
+               mb();                                           \
+               break;                                          \
+       }                                                       \
+       __u.__val;                                              \
+})
+
 #endif /* _TOOLS_LINUX_ASM_AARCH64_BARRIER_H */
index d808ee0e77b58bacc57e8b62d77237229d8dced0..4d471d9511a54db552c63951651539b7d0fdd3a2 100644 (file)
 #define rmb()          mb()
 #define wmb()          mb()
 
+#define smp_store_release(p, v)                        \
+do {                                           \
+       barrier();                              \
+       WRITE_ONCE(*p, v);                      \
+} while (0)
+
+#define smp_load_acquire(p)                    \
+({                                             \
+       typeof(*p) ___p1 = READ_ONCE(*p);       \
+       barrier();                              \
+       ___p1;                                  \
+})
+
 #endif /* _TOOLS_LINUX_ASM_IA64_BARRIER_H */
index a634da05bc9735932a0a79674b977639d8593073..905a2c66d96d9a9039c71cb651b9f9c9727a9e52 100644 (file)
 #define rmb()  __asm__ __volatile__ ("sync" : : : "memory")
 #define wmb()  __asm__ __volatile__ ("sync" : : : "memory")
 
+#if defined(__powerpc64__)
+#define smp_lwsync()   __asm__ __volatile__ ("lwsync" : : : "memory")
+
+#define smp_store_release(p, v)                        \
+do {                                           \
+       smp_lwsync();                           \
+       WRITE_ONCE(*p, v);                      \
+} while (0)
+
+#define smp_load_acquire(p)                    \
+({                                             \
+       typeof(*p) ___p1 = READ_ONCE(*p);       \
+       smp_lwsync();                           \
+       ___p1;                                  \
+})
+#endif /* defined(__powerpc64__) */
 #endif /* _TOOLS_LINUX_ASM_POWERPC_BARRIER_H */
index 5030c99f47d2057e916edd38a2d91ed15beb63c6..de362fa664d4f99c7138d495d8fdf53783001891 100644 (file)
 #define rmb()                          mb()
 #define wmb()                          mb()
 
+#define smp_store_release(p, v)                        \
+do {                                           \
+       barrier();                              \
+       WRITE_ONCE(*p, v);                      \
+} while (0)
+
+#define smp_load_acquire(p)                    \
+({                                             \
+       typeof(*p) ___p1 = READ_ONCE(*p);       \
+       barrier();                              \
+       ___p1;                                  \
+})
+
 #endif /* __TOOLS_LIB_ASM_BARRIER_H */
index ba61344287d50770e8415bd9798379c9597fbad4..cfb0fdc8ccf0934b119a8a566c80c88ee677116e 100644 (file)
@@ -40,4 +40,17 @@ do { __asm__ __volatile__("ba,pt     %%xcc, 1f\n\t" \
 #define rmb()  __asm__ __volatile__("":::"memory")
 #define wmb()  __asm__ __volatile__("":::"memory")
 
+#define smp_store_release(p, v)                        \
+do {                                           \
+       barrier();                              \
+       WRITE_ONCE(*p, v);                      \
+} while (0)
+
+#define smp_load_acquire(p)                    \
+({                                             \
+       typeof(*p) ___p1 = READ_ONCE(*p);       \
+       barrier();                              \
+       ___p1;                                  \
+})
+
 #endif /* !(__TOOLS_LINUX_SPARC64_BARRIER_H) */
index 8774dee27471e598084f5be568c7543ef49a16a5..58919868473c134f63a2ad42bd64bac8ac46fabd 100644 (file)
 #define wmb()  asm volatile("sfence" ::: "memory")
 #endif
 
+#if defined(__x86_64__)
+#define smp_store_release(p, v)                        \
+do {                                           \
+       barrier();                              \
+       WRITE_ONCE(*p, v);                      \
+} while (0)
+
+#define smp_load_acquire(p)                    \
+({                                             \
+       typeof(*p) ___p1 = READ_ONCE(*p);       \
+       barrier();                              \
+       ___p1;                                  \
+})
+#endif /* defined(__x86_64__) */
 #endif /* _TOOLS_LINUX_ASM_X86_BARRIER_H */
index 391d942536e53ebd6edd7dc28421e53d65e5c54d..8d378c57cb011414bd16fded0d977f9b64f933cf 100644 (file)
@@ -1,4 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/compiler.h>
 #if defined(__i386__) || defined(__x86_64__)
 #include "../../arch/x86/include/asm/barrier.h"
 #elif defined(__arm__)
 #else
 #include <asm-generic/barrier.h>
 #endif
+
+/*
+ * Generic fallback smp_*() definitions for archs that haven't
+ * been updated yet.
+ */
+
+#ifndef smp_rmb
+# define smp_rmb()     rmb()
+#endif
+
+#ifndef smp_wmb
+# define smp_wmb()     wmb()
+#endif
+
+#ifndef smp_mb
+# define smp_mb()      mb()
+#endif
+
+#ifndef smp_store_release
+# define smp_store_release(p, v)               \
+do {                                           \
+       smp_mb();                               \
+       WRITE_ONCE(*p, v);                      \
+} while (0)
+#endif
+
+#ifndef smp_load_acquire
+# define smp_load_acquire(p)                   \
+({                                             \
+       typeof(*p) ___p1 = READ_ONCE(*p);       \
+       smp_mb();                               \
+       ___p1;                                  \
+})
+#endif
diff --git a/tools/include/linux/ring_buffer.h b/tools/include/linux/ring_buffer.h
new file mode 100644 (file)
index 0000000..9a083ae
--- /dev/null
@@ -0,0 +1,73 @@
+#ifndef _TOOLS_LINUX_RING_BUFFER_H_
+#define _TOOLS_LINUX_RING_BUFFER_H_
+
+#include <asm/barrier.h>
+
+/*
+ * Contract with kernel for walking the perf ring buffer from
+ * user space requires the following barrier pairing (quote
+ * from kernel/events/ring_buffer.c):
+ *
+ *   Since the mmap() consumer (userspace) can run on a
+ *   different CPU:
+ *
+ *   kernel                             user
+ *
+ *   if (LOAD ->data_tail) {            LOAD ->data_head
+ *                      (A)             smp_rmb()       (C)
+ *      STORE $data                     LOAD $data
+ *      smp_wmb()       (B)             smp_mb()        (D)
+ *      STORE ->data_head               STORE ->data_tail
+ *   }
+ *
+ *   Where A pairs with D, and B pairs with C.
+ *
+ *   In our case A is a control dependency that separates the
+ *   load of the ->data_tail and the stores of $data. In case
+ *   ->data_tail indicates there is no room in the buffer to
+ *   store $data we do not.
+ *
+ *   D needs to be a full barrier since it separates the data
+ *   READ from the tail WRITE.
+ *
+ *   For B a WMB is sufficient since it separates two WRITEs,
+ *   and for C an RMB is sufficient since it separates two READs.
+ *
+ * Note, instead of B, C, D we could also use smp_store_release()
+ * in B and D as well as smp_load_acquire() in C.
+ *
+ * However, this optimization does not make sense for all kernel
+ * supported architectures since for a fair number it would
+ * resolve into READ_ONCE() + smp_mb() pair for smp_load_acquire(),
+ * and smp_mb() + WRITE_ONCE() pair for smp_store_release().
+ *
+ * Thus for those smp_wmb() in B and smp_rmb() in C would still
+ * be less expensive. For the case of D this has either the same
+ * cost or is less expensive, for example, due to TSO x86 can
+ * avoid the CPU barrier entirely.
+ */
+
+static inline u64 ring_buffer_read_head(struct perf_event_mmap_page *base)
+{
+/*
+ * Architectures where smp_load_acquire() does not fallback to
+ * READ_ONCE() + smp_mb() pair.
+ */
+#if defined(__x86_64__) || defined(__aarch64__) || defined(__powerpc64__) || \
+    defined(__ia64__) || defined(__sparc__) && defined(__arch64__)
+       return smp_load_acquire(&base->data_head);
+#else
+       u64 head = READ_ONCE(base->data_head);
+
+       smp_rmb();
+       return head;
+#endif
+}
+
+static inline void ring_buffer_write_tail(struct perf_event_mmap_page *base,
+                                         u64 tail)
+{
+       smp_store_release(&base->data_tail, tail);
+}
+
+#endif /* _TOOLS_LINUX_RING_BUFFER_H_ */
index bd71efcc53bed29e8445e54102e19cb45ef5a9ef..0c21355f04a774d531e83b84fec0307ebf1b8501 100644 (file)
@@ -27,6 +27,7 @@
 #include <linux/list.h>
 #include <linux/limits.h>
 #include <linux/perf_event.h>
+#include <linux/ring_buffer.h>
 #include <sys/stat.h>
 #include <sys/types.h>
 #include <sys/vfs.h>
@@ -2418,13 +2419,12 @@ bpf_perf_event_read_simple(void *mem, unsigned long size,
                           unsigned long page_size, void **buf, size_t *buf_len,
                           bpf_perf_event_print_t fn, void *priv)
 {
-       volatile struct perf_event_mmap_page *header = mem;
+       struct perf_event_mmap_page *header = mem;
+       __u64 data_head = ring_buffer_read_head(header);
        __u64 data_tail = header->data_tail;
-       __u64 data_head = header->data_head;
        int ret = LIBBPF_PERF_EVENT_ERROR;
        void *base, *begin, *end;
 
-       asm volatile("" ::: "memory"); /* in real code it should be smp_rmb() */
        if (data_head == data_tail)
                return LIBBPF_PERF_EVENT_CONT;
 
@@ -2467,8 +2467,6 @@ bpf_perf_event_read_simple(void *mem, unsigned long size,
                data_tail += ehdr->size;
        }
 
-       __sync_synchronize(); /* smp_mb() */
-       header->data_tail = data_tail;
-
+       ring_buffer_write_tail(header, data_tail);
        return ret;
 }
index 05a6d47c79561d5a62c04ae9d600a0ecd16f39b4..8f6531fd4dadbdacb51208b0fc8e0ded11b12b34 100644 (file)
@@ -4,7 +4,7 @@
 #include <linux/compiler.h>
 #include <linux/refcount.h>
 #include <linux/types.h>
-#include <asm/barrier.h>
+#include <linux/ring_buffer.h>
 #include <stdbool.h>
 #include "auxtrace.h"
 #include "event.h"
@@ -71,21 +71,12 @@ void perf_mmap__consume(struct perf_mmap *map);
 
 static inline u64 perf_mmap__read_head(struct perf_mmap *mm)
 {
-       struct perf_event_mmap_page *pc = mm->base;
-       u64 head = READ_ONCE(pc->data_head);
-       rmb();
-       return head;
+       return ring_buffer_read_head(mm->base);
 }
 
 static inline void perf_mmap__write_tail(struct perf_mmap *md, u64 tail)
 {
-       struct perf_event_mmap_page *pc = md->base;
-
-       /*
-        * ensure all reads are done before we write the tail out.
-        */
-       mb();
-       pc->data_tail = tail;
+       ring_buffer_write_tail(md->base, tail);
 }
 
 union perf_event *perf_mmap__read_forward(struct perf_mmap *map);