]> asedeno.scripts.mit.edu Git - linux.git/blob - kernel/trace/trace.c
tracing: Add conditional snapshot
[linux.git] / kernel / trace / trace.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/seq_file.h>
21 #include <linux/notifier.h>
22 #include <linux/irqflags.h>
23 #include <linux/debugfs.h>
24 #include <linux/tracefs.h>
25 #include <linux/pagemap.h>
26 #include <linux/hardirq.h>
27 #include <linux/linkage.h>
28 #include <linux/uaccess.h>
29 #include <linux/vmalloc.h>
30 #include <linux/ftrace.h>
31 #include <linux/module.h>
32 #include <linux/percpu.h>
33 #include <linux/splice.h>
34 #include <linux/kdebug.h>
35 #include <linux/string.h>
36 #include <linux/mount.h>
37 #include <linux/rwsem.h>
38 #include <linux/slab.h>
39 #include <linux/ctype.h>
40 #include <linux/init.h>
41 #include <linux/poll.h>
42 #include <linux/nmi.h>
43 #include <linux/fs.h>
44 #include <linux/trace.h>
45 #include <linux/sched/clock.h>
46 #include <linux/sched/rt.h>
47
48 #include "trace.h"
49 #include "trace_output.h"
50
51 /*
52  * On boot up, the ring buffer is set to the minimum size, so that
53  * we do not waste memory on systems that are not using tracing.
54  */
55 bool ring_buffer_expanded;
56
57 /*
58  * We need to change this state when a selftest is running.
59  * A selftest will lurk into the ring-buffer to count the
60  * entries inserted during the selftest although some concurrent
61  * insertions into the ring-buffer such as trace_printk could occurred
62  * at the same time, giving false positive or negative results.
63  */
64 static bool __read_mostly tracing_selftest_running;
65
66 /*
67  * If a tracer is running, we do not want to run SELFTEST.
68  */
69 bool __read_mostly tracing_selftest_disabled;
70
71 /* Pipe tracepoints to printk */
72 struct trace_iterator *tracepoint_print_iter;
73 int tracepoint_printk;
74 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
75
76 /* For tracers that don't implement custom flags */
77 static struct tracer_opt dummy_tracer_opt[] = {
78         { }
79 };
80
81 static int
82 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
83 {
84         return 0;
85 }
86
87 /*
88  * To prevent the comm cache from being overwritten when no
89  * tracing is active, only save the comm when a trace event
90  * occurred.
91  */
92 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
93
94 /*
95  * Kill all tracing for good (never come back).
96  * It is initialized to 1 but will turn to zero if the initialization
97  * of the tracer is successful. But that is the only place that sets
98  * this back to zero.
99  */
100 static int tracing_disabled = 1;
101
102 cpumask_var_t __read_mostly     tracing_buffer_mask;
103
104 /*
105  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
106  *
107  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
108  * is set, then ftrace_dump is called. This will output the contents
109  * of the ftrace buffers to the console.  This is very useful for
110  * capturing traces that lead to crashes and outputing it to a
111  * serial console.
112  *
113  * It is default off, but you can enable it with either specifying
114  * "ftrace_dump_on_oops" in the kernel command line, or setting
115  * /proc/sys/kernel/ftrace_dump_on_oops
116  * Set 1 if you want to dump buffers of all CPUs
117  * Set 2 if you want to dump the buffer of the CPU that triggered oops
118  */
119
120 enum ftrace_dump_mode ftrace_dump_on_oops;
121
122 /* When set, tracing will stop when a WARN*() is hit */
123 int __disable_trace_on_warning;
124
125 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
126 /* Map of enums to their values, for "eval_map" file */
127 struct trace_eval_map_head {
128         struct module                   *mod;
129         unsigned long                   length;
130 };
131
132 union trace_eval_map_item;
133
134 struct trace_eval_map_tail {
135         /*
136          * "end" is first and points to NULL as it must be different
137          * than "mod" or "eval_string"
138          */
139         union trace_eval_map_item       *next;
140         const char                      *end;   /* points to NULL */
141 };
142
143 static DEFINE_MUTEX(trace_eval_mutex);
144
145 /*
146  * The trace_eval_maps are saved in an array with two extra elements,
147  * one at the beginning, and one at the end. The beginning item contains
148  * the count of the saved maps (head.length), and the module they
149  * belong to if not built in (head.mod). The ending item contains a
150  * pointer to the next array of saved eval_map items.
151  */
152 union trace_eval_map_item {
153         struct trace_eval_map           map;
154         struct trace_eval_map_head      head;
155         struct trace_eval_map_tail      tail;
156 };
157
158 static union trace_eval_map_item *trace_eval_maps;
159 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
160
161 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
162
163 #define MAX_TRACER_SIZE         100
164 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
165 static char *default_bootup_tracer;
166
167 static bool allocate_snapshot;
168
169 static int __init set_cmdline_ftrace(char *str)
170 {
171         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
172         default_bootup_tracer = bootup_tracer_buf;
173         /* We are using ftrace early, expand it */
174         ring_buffer_expanded = true;
175         return 1;
176 }
177 __setup("ftrace=", set_cmdline_ftrace);
178
179 static int __init set_ftrace_dump_on_oops(char *str)
180 {
181         if (*str++ != '=' || !*str) {
182                 ftrace_dump_on_oops = DUMP_ALL;
183                 return 1;
184         }
185
186         if (!strcmp("orig_cpu", str)) {
187                 ftrace_dump_on_oops = DUMP_ORIG;
188                 return 1;
189         }
190
191         return 0;
192 }
193 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
194
195 static int __init stop_trace_on_warning(char *str)
196 {
197         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
198                 __disable_trace_on_warning = 1;
199         return 1;
200 }
201 __setup("traceoff_on_warning", stop_trace_on_warning);
202
203 static int __init boot_alloc_snapshot(char *str)
204 {
205         allocate_snapshot = true;
206         /* We also need the main ring buffer expanded */
207         ring_buffer_expanded = true;
208         return 1;
209 }
210 __setup("alloc_snapshot", boot_alloc_snapshot);
211
212
213 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
214
215 static int __init set_trace_boot_options(char *str)
216 {
217         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
218         return 0;
219 }
220 __setup("trace_options=", set_trace_boot_options);
221
222 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
223 static char *trace_boot_clock __initdata;
224
225 static int __init set_trace_boot_clock(char *str)
226 {
227         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
228         trace_boot_clock = trace_boot_clock_buf;
229         return 0;
230 }
231 __setup("trace_clock=", set_trace_boot_clock);
232
233 static int __init set_tracepoint_printk(char *str)
234 {
235         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
236                 tracepoint_printk = 1;
237         return 1;
238 }
239 __setup("tp_printk", set_tracepoint_printk);
240
241 unsigned long long ns2usecs(u64 nsec)
242 {
243         nsec += 500;
244         do_div(nsec, 1000);
245         return nsec;
246 }
247
248 /* trace_flags holds trace_options default values */
249 #define TRACE_DEFAULT_FLAGS                                             \
250         (FUNCTION_DEFAULT_FLAGS |                                       \
251          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
252          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
253          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
254          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
255
256 /* trace_options that are only supported by global_trace */
257 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
258                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
259
260 /* trace_flags that are default zero for instances */
261 #define ZEROED_TRACE_FLAGS \
262         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
263
264 /*
265  * The global_trace is the descriptor that holds the top-level tracing
266  * buffers for the live tracing.
267  */
268 static struct trace_array global_trace = {
269         .trace_flags = TRACE_DEFAULT_FLAGS,
270 };
271
272 LIST_HEAD(ftrace_trace_arrays);
273
274 int trace_array_get(struct trace_array *this_tr)
275 {
276         struct trace_array *tr;
277         int ret = -ENODEV;
278
279         mutex_lock(&trace_types_lock);
280         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
281                 if (tr == this_tr) {
282                         tr->ref++;
283                         ret = 0;
284                         break;
285                 }
286         }
287         mutex_unlock(&trace_types_lock);
288
289         return ret;
290 }
291
292 static void __trace_array_put(struct trace_array *this_tr)
293 {
294         WARN_ON(!this_tr->ref);
295         this_tr->ref--;
296 }
297
298 void trace_array_put(struct trace_array *this_tr)
299 {
300         mutex_lock(&trace_types_lock);
301         __trace_array_put(this_tr);
302         mutex_unlock(&trace_types_lock);
303 }
304
305 int call_filter_check_discard(struct trace_event_call *call, void *rec,
306                               struct ring_buffer *buffer,
307                               struct ring_buffer_event *event)
308 {
309         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
310             !filter_match_preds(call->filter, rec)) {
311                 __trace_event_discard_commit(buffer, event);
312                 return 1;
313         }
314
315         return 0;
316 }
317
318 void trace_free_pid_list(struct trace_pid_list *pid_list)
319 {
320         vfree(pid_list->pids);
321         kfree(pid_list);
322 }
323
324 /**
325  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
326  * @filtered_pids: The list of pids to check
327  * @search_pid: The PID to find in @filtered_pids
328  *
329  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
330  */
331 bool
332 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
333 {
334         /*
335          * If pid_max changed after filtered_pids was created, we
336          * by default ignore all pids greater than the previous pid_max.
337          */
338         if (search_pid >= filtered_pids->pid_max)
339                 return false;
340
341         return test_bit(search_pid, filtered_pids->pids);
342 }
343
344 /**
345  * trace_ignore_this_task - should a task be ignored for tracing
346  * @filtered_pids: The list of pids to check
347  * @task: The task that should be ignored if not filtered
348  *
349  * Checks if @task should be traced or not from @filtered_pids.
350  * Returns true if @task should *NOT* be traced.
351  * Returns false if @task should be traced.
352  */
353 bool
354 trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
355 {
356         /*
357          * Return false, because if filtered_pids does not exist,
358          * all pids are good to trace.
359          */
360         if (!filtered_pids)
361                 return false;
362
363         return !trace_find_filtered_pid(filtered_pids, task->pid);
364 }
365
366 /**
367  * trace_pid_filter_add_remove_task - Add or remove a task from a pid_list
368  * @pid_list: The list to modify
369  * @self: The current task for fork or NULL for exit
370  * @task: The task to add or remove
371  *
372  * If adding a task, if @self is defined, the task is only added if @self
373  * is also included in @pid_list. This happens on fork and tasks should
374  * only be added when the parent is listed. If @self is NULL, then the
375  * @task pid will be removed from the list, which would happen on exit
376  * of a task.
377  */
378 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
379                                   struct task_struct *self,
380                                   struct task_struct *task)
381 {
382         if (!pid_list)
383                 return;
384
385         /* For forks, we only add if the forking task is listed */
386         if (self) {
387                 if (!trace_find_filtered_pid(pid_list, self->pid))
388                         return;
389         }
390
391         /* Sorry, but we don't support pid_max changing after setting */
392         if (task->pid >= pid_list->pid_max)
393                 return;
394
395         /* "self" is set for forks, and NULL for exits */
396         if (self)
397                 set_bit(task->pid, pid_list->pids);
398         else
399                 clear_bit(task->pid, pid_list->pids);
400 }
401
402 /**
403  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
404  * @pid_list: The pid list to show
405  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
406  * @pos: The position of the file
407  *
408  * This is used by the seq_file "next" operation to iterate the pids
409  * listed in a trace_pid_list structure.
410  *
411  * Returns the pid+1 as we want to display pid of zero, but NULL would
412  * stop the iteration.
413  */
414 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
415 {
416         unsigned long pid = (unsigned long)v;
417
418         (*pos)++;
419
420         /* pid already is +1 of the actual prevous bit */
421         pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
422
423         /* Return pid + 1 to allow zero to be represented */
424         if (pid < pid_list->pid_max)
425                 return (void *)(pid + 1);
426
427         return NULL;
428 }
429
430 /**
431  * trace_pid_start - Used for seq_file to start reading pid lists
432  * @pid_list: The pid list to show
433  * @pos: The position of the file
434  *
435  * This is used by seq_file "start" operation to start the iteration
436  * of listing pids.
437  *
438  * Returns the pid+1 as we want to display pid of zero, but NULL would
439  * stop the iteration.
440  */
441 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
442 {
443         unsigned long pid;
444         loff_t l = 0;
445
446         pid = find_first_bit(pid_list->pids, pid_list->pid_max);
447         if (pid >= pid_list->pid_max)
448                 return NULL;
449
450         /* Return pid + 1 so that zero can be the exit value */
451         for (pid++; pid && l < *pos;
452              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
453                 ;
454         return (void *)pid;
455 }
456
457 /**
458  * trace_pid_show - show the current pid in seq_file processing
459  * @m: The seq_file structure to write into
460  * @v: A void pointer of the pid (+1) value to display
461  *
462  * Can be directly used by seq_file operations to display the current
463  * pid value.
464  */
465 int trace_pid_show(struct seq_file *m, void *v)
466 {
467         unsigned long pid = (unsigned long)v - 1;
468
469         seq_printf(m, "%lu\n", pid);
470         return 0;
471 }
472
473 /* 128 should be much more than enough */
474 #define PID_BUF_SIZE            127
475
476 int trace_pid_write(struct trace_pid_list *filtered_pids,
477                     struct trace_pid_list **new_pid_list,
478                     const char __user *ubuf, size_t cnt)
479 {
480         struct trace_pid_list *pid_list;
481         struct trace_parser parser;
482         unsigned long val;
483         int nr_pids = 0;
484         ssize_t read = 0;
485         ssize_t ret = 0;
486         loff_t pos;
487         pid_t pid;
488
489         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
490                 return -ENOMEM;
491
492         /*
493          * Always recreate a new array. The write is an all or nothing
494          * operation. Always create a new array when adding new pids by
495          * the user. If the operation fails, then the current list is
496          * not modified.
497          */
498         pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
499         if (!pid_list)
500                 return -ENOMEM;
501
502         pid_list->pid_max = READ_ONCE(pid_max);
503
504         /* Only truncating will shrink pid_max */
505         if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
506                 pid_list->pid_max = filtered_pids->pid_max;
507
508         pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
509         if (!pid_list->pids) {
510                 kfree(pid_list);
511                 return -ENOMEM;
512         }
513
514         if (filtered_pids) {
515                 /* copy the current bits to the new max */
516                 for_each_set_bit(pid, filtered_pids->pids,
517                                  filtered_pids->pid_max) {
518                         set_bit(pid, pid_list->pids);
519                         nr_pids++;
520                 }
521         }
522
523         while (cnt > 0) {
524
525                 pos = 0;
526
527                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
528                 if (ret < 0 || !trace_parser_loaded(&parser))
529                         break;
530
531                 read += ret;
532                 ubuf += ret;
533                 cnt -= ret;
534
535                 ret = -EINVAL;
536                 if (kstrtoul(parser.buffer, 0, &val))
537                         break;
538                 if (val >= pid_list->pid_max)
539                         break;
540
541                 pid = (pid_t)val;
542
543                 set_bit(pid, pid_list->pids);
544                 nr_pids++;
545
546                 trace_parser_clear(&parser);
547                 ret = 0;
548         }
549         trace_parser_put(&parser);
550
551         if (ret < 0) {
552                 trace_free_pid_list(pid_list);
553                 return ret;
554         }
555
556         if (!nr_pids) {
557                 /* Cleared the list of pids */
558                 trace_free_pid_list(pid_list);
559                 read = ret;
560                 pid_list = NULL;
561         }
562
563         *new_pid_list = pid_list;
564
565         return read;
566 }
567
568 static u64 buffer_ftrace_now(struct trace_buffer *buf, int cpu)
569 {
570         u64 ts;
571
572         /* Early boot up does not have a buffer yet */
573         if (!buf->buffer)
574                 return trace_clock_local();
575
576         ts = ring_buffer_time_stamp(buf->buffer, cpu);
577         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
578
579         return ts;
580 }
581
582 u64 ftrace_now(int cpu)
583 {
584         return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
585 }
586
587 /**
588  * tracing_is_enabled - Show if global_trace has been disabled
589  *
590  * Shows if the global trace has been enabled or not. It uses the
591  * mirror flag "buffer_disabled" to be used in fast paths such as for
592  * the irqsoff tracer. But it may be inaccurate due to races. If you
593  * need to know the accurate state, use tracing_is_on() which is a little
594  * slower, but accurate.
595  */
596 int tracing_is_enabled(void)
597 {
598         /*
599          * For quick access (irqsoff uses this in fast path), just
600          * return the mirror variable of the state of the ring buffer.
601          * It's a little racy, but we don't really care.
602          */
603         smp_rmb();
604         return !global_trace.buffer_disabled;
605 }
606
607 /*
608  * trace_buf_size is the size in bytes that is allocated
609  * for a buffer. Note, the number of bytes is always rounded
610  * to page size.
611  *
612  * This number is purposely set to a low number of 16384.
613  * If the dump on oops happens, it will be much appreciated
614  * to not have to wait for all that output. Anyway this can be
615  * boot time and run time configurable.
616  */
617 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
618
619 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
620
621 /* trace_types holds a link list of available tracers. */
622 static struct tracer            *trace_types __read_mostly;
623
624 /*
625  * trace_types_lock is used to protect the trace_types list.
626  */
627 DEFINE_MUTEX(trace_types_lock);
628
629 /*
630  * serialize the access of the ring buffer
631  *
632  * ring buffer serializes readers, but it is low level protection.
633  * The validity of the events (which returns by ring_buffer_peek() ..etc)
634  * are not protected by ring buffer.
635  *
636  * The content of events may become garbage if we allow other process consumes
637  * these events concurrently:
638  *   A) the page of the consumed events may become a normal page
639  *      (not reader page) in ring buffer, and this page will be rewrited
640  *      by events producer.
641  *   B) The page of the consumed events may become a page for splice_read,
642  *      and this page will be returned to system.
643  *
644  * These primitives allow multi process access to different cpu ring buffer
645  * concurrently.
646  *
647  * These primitives don't distinguish read-only and read-consume access.
648  * Multi read-only access are also serialized.
649  */
650
651 #ifdef CONFIG_SMP
652 static DECLARE_RWSEM(all_cpu_access_lock);
653 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
654
655 static inline void trace_access_lock(int cpu)
656 {
657         if (cpu == RING_BUFFER_ALL_CPUS) {
658                 /* gain it for accessing the whole ring buffer. */
659                 down_write(&all_cpu_access_lock);
660         } else {
661                 /* gain it for accessing a cpu ring buffer. */
662
663                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
664                 down_read(&all_cpu_access_lock);
665
666                 /* Secondly block other access to this @cpu ring buffer. */
667                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
668         }
669 }
670
671 static inline void trace_access_unlock(int cpu)
672 {
673         if (cpu == RING_BUFFER_ALL_CPUS) {
674                 up_write(&all_cpu_access_lock);
675         } else {
676                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
677                 up_read(&all_cpu_access_lock);
678         }
679 }
680
681 static inline void trace_access_lock_init(void)
682 {
683         int cpu;
684
685         for_each_possible_cpu(cpu)
686                 mutex_init(&per_cpu(cpu_access_lock, cpu));
687 }
688
689 #else
690
691 static DEFINE_MUTEX(access_lock);
692
693 static inline void trace_access_lock(int cpu)
694 {
695         (void)cpu;
696         mutex_lock(&access_lock);
697 }
698
699 static inline void trace_access_unlock(int cpu)
700 {
701         (void)cpu;
702         mutex_unlock(&access_lock);
703 }
704
705 static inline void trace_access_lock_init(void)
706 {
707 }
708
709 #endif
710
711 #ifdef CONFIG_STACKTRACE
712 static void __ftrace_trace_stack(struct ring_buffer *buffer,
713                                  unsigned long flags,
714                                  int skip, int pc, struct pt_regs *regs);
715 static inline void ftrace_trace_stack(struct trace_array *tr,
716                                       struct ring_buffer *buffer,
717                                       unsigned long flags,
718                                       int skip, int pc, struct pt_regs *regs);
719
720 #else
721 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
722                                         unsigned long flags,
723                                         int skip, int pc, struct pt_regs *regs)
724 {
725 }
726 static inline void ftrace_trace_stack(struct trace_array *tr,
727                                       struct ring_buffer *buffer,
728                                       unsigned long flags,
729                                       int skip, int pc, struct pt_regs *regs)
730 {
731 }
732
733 #endif
734
735 static __always_inline void
736 trace_event_setup(struct ring_buffer_event *event,
737                   int type, unsigned long flags, int pc)
738 {
739         struct trace_entry *ent = ring_buffer_event_data(event);
740
741         tracing_generic_entry_update(ent, flags, pc);
742         ent->type = type;
743 }
744
745 static __always_inline struct ring_buffer_event *
746 __trace_buffer_lock_reserve(struct ring_buffer *buffer,
747                           int type,
748                           unsigned long len,
749                           unsigned long flags, int pc)
750 {
751         struct ring_buffer_event *event;
752
753         event = ring_buffer_lock_reserve(buffer, len);
754         if (event != NULL)
755                 trace_event_setup(event, type, flags, pc);
756
757         return event;
758 }
759
760 void tracer_tracing_on(struct trace_array *tr)
761 {
762         if (tr->trace_buffer.buffer)
763                 ring_buffer_record_on(tr->trace_buffer.buffer);
764         /*
765          * This flag is looked at when buffers haven't been allocated
766          * yet, or by some tracers (like irqsoff), that just want to
767          * know if the ring buffer has been disabled, but it can handle
768          * races of where it gets disabled but we still do a record.
769          * As the check is in the fast path of the tracers, it is more
770          * important to be fast than accurate.
771          */
772         tr->buffer_disabled = 0;
773         /* Make the flag seen by readers */
774         smp_wmb();
775 }
776
777 /**
778  * tracing_on - enable tracing buffers
779  *
780  * This function enables tracing buffers that may have been
781  * disabled with tracing_off.
782  */
783 void tracing_on(void)
784 {
785         tracer_tracing_on(&global_trace);
786 }
787 EXPORT_SYMBOL_GPL(tracing_on);
788
789
790 static __always_inline void
791 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
792 {
793         __this_cpu_write(trace_taskinfo_save, true);
794
795         /* If this is the temp buffer, we need to commit fully */
796         if (this_cpu_read(trace_buffered_event) == event) {
797                 /* Length is in event->array[0] */
798                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
799                 /* Release the temp buffer */
800                 this_cpu_dec(trace_buffered_event_cnt);
801         } else
802                 ring_buffer_unlock_commit(buffer, event);
803 }
804
805 /**
806  * __trace_puts - write a constant string into the trace buffer.
807  * @ip:    The address of the caller
808  * @str:   The constant string to write
809  * @size:  The size of the string.
810  */
811 int __trace_puts(unsigned long ip, const char *str, int size)
812 {
813         struct ring_buffer_event *event;
814         struct ring_buffer *buffer;
815         struct print_entry *entry;
816         unsigned long irq_flags;
817         int alloc;
818         int pc;
819
820         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
821                 return 0;
822
823         pc = preempt_count();
824
825         if (unlikely(tracing_selftest_running || tracing_disabled))
826                 return 0;
827
828         alloc = sizeof(*entry) + size + 2; /* possible \n added */
829
830         local_save_flags(irq_flags);
831         buffer = global_trace.trace_buffer.buffer;
832         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
833                                             irq_flags, pc);
834         if (!event)
835                 return 0;
836
837         entry = ring_buffer_event_data(event);
838         entry->ip = ip;
839
840         memcpy(&entry->buf, str, size);
841
842         /* Add a newline if necessary */
843         if (entry->buf[size - 1] != '\n') {
844                 entry->buf[size] = '\n';
845                 entry->buf[size + 1] = '\0';
846         } else
847                 entry->buf[size] = '\0';
848
849         __buffer_unlock_commit(buffer, event);
850         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
851
852         return size;
853 }
854 EXPORT_SYMBOL_GPL(__trace_puts);
855
856 /**
857  * __trace_bputs - write the pointer to a constant string into trace buffer
858  * @ip:    The address of the caller
859  * @str:   The constant string to write to the buffer to
860  */
861 int __trace_bputs(unsigned long ip, const char *str)
862 {
863         struct ring_buffer_event *event;
864         struct ring_buffer *buffer;
865         struct bputs_entry *entry;
866         unsigned long irq_flags;
867         int size = sizeof(struct bputs_entry);
868         int pc;
869
870         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
871                 return 0;
872
873         pc = preempt_count();
874
875         if (unlikely(tracing_selftest_running || tracing_disabled))
876                 return 0;
877
878         local_save_flags(irq_flags);
879         buffer = global_trace.trace_buffer.buffer;
880         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
881                                             irq_flags, pc);
882         if (!event)
883                 return 0;
884
885         entry = ring_buffer_event_data(event);
886         entry->ip                       = ip;
887         entry->str                      = str;
888
889         __buffer_unlock_commit(buffer, event);
890         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
891
892         return 1;
893 }
894 EXPORT_SYMBOL_GPL(__trace_bputs);
895
896 #ifdef CONFIG_TRACER_SNAPSHOT
897 void tracing_snapshot_instance_cond(struct trace_array *tr, void *cond_data)
898 {
899         struct tracer *tracer = tr->current_trace;
900         unsigned long flags;
901
902         if (in_nmi()) {
903                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
904                 internal_trace_puts("*** snapshot is being ignored        ***\n");
905                 return;
906         }
907
908         if (!tr->allocated_snapshot) {
909                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
910                 internal_trace_puts("*** stopping trace here!   ***\n");
911                 tracing_off();
912                 return;
913         }
914
915         /* Note, snapshot can not be used when the tracer uses it */
916         if (tracer->use_max_tr) {
917                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
918                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
919                 return;
920         }
921
922         local_irq_save(flags);
923         update_max_tr(tr, current, smp_processor_id(), cond_data);
924         local_irq_restore(flags);
925 }
926
927 void tracing_snapshot_instance(struct trace_array *tr)
928 {
929         tracing_snapshot_instance_cond(tr, NULL);
930 }
931
932 /**
933  * tracing_snapshot - take a snapshot of the current buffer.
934  *
935  * This causes a swap between the snapshot buffer and the current live
936  * tracing buffer. You can use this to take snapshots of the live
937  * trace when some condition is triggered, but continue to trace.
938  *
939  * Note, make sure to allocate the snapshot with either
940  * a tracing_snapshot_alloc(), or by doing it manually
941  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
942  *
943  * If the snapshot buffer is not allocated, it will stop tracing.
944  * Basically making a permanent snapshot.
945  */
946 void tracing_snapshot(void)
947 {
948         struct trace_array *tr = &global_trace;
949
950         tracing_snapshot_instance(tr);
951 }
952 EXPORT_SYMBOL_GPL(tracing_snapshot);
953
954 /**
955  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
956  * @tr:         The tracing instance to snapshot
957  * @cond_data:  The data to be tested conditionally, and possibly saved
958  *
959  * This is the same as tracing_snapshot() except that the snapshot is
960  * conditional - the snapshot will only happen if the
961  * cond_snapshot.update() implementation receiving the cond_data
962  * returns true, which means that the trace array's cond_snapshot
963  * update() operation used the cond_data to determine whether the
964  * snapshot should be taken, and if it was, presumably saved it along
965  * with the snapshot.
966  */
967 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
968 {
969         tracing_snapshot_instance_cond(tr, cond_data);
970 }
971 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
972
973 /**
974  * tracing_snapshot_cond_data - get the user data associated with a snapshot
975  * @tr:         The tracing instance
976  *
977  * When the user enables a conditional snapshot using
978  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
979  * with the snapshot.  This accessor is used to retrieve it.
980  *
981  * Should not be called from cond_snapshot.update(), since it takes
982  * the tr->max_lock lock, which the code calling
983  * cond_snapshot.update() has already done.
984  *
985  * Returns the cond_data associated with the trace array's snapshot.
986  */
987 void *tracing_cond_snapshot_data(struct trace_array *tr)
988 {
989         void *cond_data = NULL;
990
991         arch_spin_lock(&tr->max_lock);
992
993         if (tr->cond_snapshot)
994                 cond_data = tr->cond_snapshot->cond_data;
995
996         arch_spin_unlock(&tr->max_lock);
997
998         return cond_data;
999 }
1000 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1001
1002 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
1003                                         struct trace_buffer *size_buf, int cpu_id);
1004 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
1005
1006 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1007 {
1008         int ret;
1009
1010         if (!tr->allocated_snapshot) {
1011
1012                 /* allocate spare buffer */
1013                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1014                                    &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
1015                 if (ret < 0)
1016                         return ret;
1017
1018                 tr->allocated_snapshot = true;
1019         }
1020
1021         return 0;
1022 }
1023
1024 static void free_snapshot(struct trace_array *tr)
1025 {
1026         /*
1027          * We don't free the ring buffer. instead, resize it because
1028          * The max_tr ring buffer has some state (e.g. ring->clock) and
1029          * we want preserve it.
1030          */
1031         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1032         set_buffer_entries(&tr->max_buffer, 1);
1033         tracing_reset_online_cpus(&tr->max_buffer);
1034         tr->allocated_snapshot = false;
1035 }
1036
1037 /**
1038  * tracing_alloc_snapshot - allocate snapshot buffer.
1039  *
1040  * This only allocates the snapshot buffer if it isn't already
1041  * allocated - it doesn't also take a snapshot.
1042  *
1043  * This is meant to be used in cases where the snapshot buffer needs
1044  * to be set up for events that can't sleep but need to be able to
1045  * trigger a snapshot.
1046  */
1047 int tracing_alloc_snapshot(void)
1048 {
1049         struct trace_array *tr = &global_trace;
1050         int ret;
1051
1052         ret = tracing_alloc_snapshot_instance(tr);
1053         WARN_ON(ret < 0);
1054
1055         return ret;
1056 }
1057 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1058
1059 /**
1060  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1061  *
1062  * This is similar to tracing_snapshot(), but it will allocate the
1063  * snapshot buffer if it isn't already allocated. Use this only
1064  * where it is safe to sleep, as the allocation may sleep.
1065  *
1066  * This causes a swap between the snapshot buffer and the current live
1067  * tracing buffer. You can use this to take snapshots of the live
1068  * trace when some condition is triggered, but continue to trace.
1069  */
1070 void tracing_snapshot_alloc(void)
1071 {
1072         int ret;
1073
1074         ret = tracing_alloc_snapshot();
1075         if (ret < 0)
1076                 return;
1077
1078         tracing_snapshot();
1079 }
1080 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1081
1082 /**
1083  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1084  * @tr:         The tracing instance
1085  * @cond_data:  User data to associate with the snapshot
1086  * @update:     Implementation of the cond_snapshot update function
1087  *
1088  * Check whether the conditional snapshot for the given instance has
1089  * already been enabled, or if the current tracer is already using a
1090  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1091  * save the cond_data and update function inside.
1092  *
1093  * Returns 0 if successful, error otherwise.
1094  */
1095 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1096                                  cond_update_fn_t update)
1097 {
1098         struct cond_snapshot *cond_snapshot;
1099         int ret = 0;
1100
1101         cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1102         if (!cond_snapshot)
1103                 return -ENOMEM;
1104
1105         cond_snapshot->cond_data = cond_data;
1106         cond_snapshot->update = update;
1107
1108         mutex_lock(&trace_types_lock);
1109
1110         ret = tracing_alloc_snapshot_instance(tr);
1111         if (ret)
1112                 goto fail_unlock;
1113
1114         if (tr->current_trace->use_max_tr) {
1115                 ret = -EBUSY;
1116                 goto fail_unlock;
1117         }
1118
1119         if (tr->cond_snapshot) {
1120                 ret = -EBUSY;
1121                 goto fail_unlock;
1122         }
1123
1124         arch_spin_lock(&tr->max_lock);
1125         tr->cond_snapshot = cond_snapshot;
1126         arch_spin_unlock(&tr->max_lock);
1127
1128         mutex_unlock(&trace_types_lock);
1129
1130         return ret;
1131
1132  fail_unlock:
1133         mutex_unlock(&trace_types_lock);
1134         kfree(cond_snapshot);
1135         return ret;
1136 }
1137 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1138
1139 /**
1140  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1141  * @tr:         The tracing instance
1142  *
1143  * Check whether the conditional snapshot for the given instance is
1144  * enabled; if so, free the cond_snapshot associated with it,
1145  * otherwise return -EINVAL.
1146  *
1147  * Returns 0 if successful, error otherwise.
1148  */
1149 int tracing_snapshot_cond_disable(struct trace_array *tr)
1150 {
1151         int ret = 0;
1152
1153         arch_spin_lock(&tr->max_lock);
1154
1155         if (!tr->cond_snapshot)
1156                 ret = -EINVAL;
1157         else {
1158                 kfree(tr->cond_snapshot);
1159                 tr->cond_snapshot = NULL;
1160         }
1161
1162         arch_spin_unlock(&tr->max_lock);
1163
1164         return ret;
1165 }
1166 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1167 #else
1168 void tracing_snapshot(void)
1169 {
1170         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1171 }
1172 EXPORT_SYMBOL_GPL(tracing_snapshot);
1173 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1174 {
1175         WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1176 }
1177 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1178 int tracing_alloc_snapshot(void)
1179 {
1180         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1181         return -ENODEV;
1182 }
1183 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1184 void tracing_snapshot_alloc(void)
1185 {
1186         /* Give warning */
1187         tracing_snapshot();
1188 }
1189 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1190 void *tracing_cond_snapshot_data(struct trace_array *tr)
1191 {
1192         return NULL;
1193 }
1194 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1195 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1196 {
1197         return -ENODEV;
1198 }
1199 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1200 int tracing_snapshot_cond_disable(struct trace_array *tr)
1201 {
1202         return false;
1203 }
1204 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1205 #endif /* CONFIG_TRACER_SNAPSHOT */
1206
1207 void tracer_tracing_off(struct trace_array *tr)
1208 {
1209         if (tr->trace_buffer.buffer)
1210                 ring_buffer_record_off(tr->trace_buffer.buffer);
1211         /*
1212          * This flag is looked at when buffers haven't been allocated
1213          * yet, or by some tracers (like irqsoff), that just want to
1214          * know if the ring buffer has been disabled, but it can handle
1215          * races of where it gets disabled but we still do a record.
1216          * As the check is in the fast path of the tracers, it is more
1217          * important to be fast than accurate.
1218          */
1219         tr->buffer_disabled = 1;
1220         /* Make the flag seen by readers */
1221         smp_wmb();
1222 }
1223
1224 /**
1225  * tracing_off - turn off tracing buffers
1226  *
1227  * This function stops the tracing buffers from recording data.
1228  * It does not disable any overhead the tracers themselves may
1229  * be causing. This function simply causes all recording to
1230  * the ring buffers to fail.
1231  */
1232 void tracing_off(void)
1233 {
1234         tracer_tracing_off(&global_trace);
1235 }
1236 EXPORT_SYMBOL_GPL(tracing_off);
1237
1238 void disable_trace_on_warning(void)
1239 {
1240         if (__disable_trace_on_warning)
1241                 tracing_off();
1242 }
1243
1244 /**
1245  * tracer_tracing_is_on - show real state of ring buffer enabled
1246  * @tr : the trace array to know if ring buffer is enabled
1247  *
1248  * Shows real state of the ring buffer if it is enabled or not.
1249  */
1250 bool tracer_tracing_is_on(struct trace_array *tr)
1251 {
1252         if (tr->trace_buffer.buffer)
1253                 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1254         return !tr->buffer_disabled;
1255 }
1256
1257 /**
1258  * tracing_is_on - show state of ring buffers enabled
1259  */
1260 int tracing_is_on(void)
1261 {
1262         return tracer_tracing_is_on(&global_trace);
1263 }
1264 EXPORT_SYMBOL_GPL(tracing_is_on);
1265
1266 static int __init set_buf_size(char *str)
1267 {
1268         unsigned long buf_size;
1269
1270         if (!str)
1271                 return 0;
1272         buf_size = memparse(str, &str);
1273         /* nr_entries can not be zero */
1274         if (buf_size == 0)
1275                 return 0;
1276         trace_buf_size = buf_size;
1277         return 1;
1278 }
1279 __setup("trace_buf_size=", set_buf_size);
1280
1281 static int __init set_tracing_thresh(char *str)
1282 {
1283         unsigned long threshold;
1284         int ret;
1285
1286         if (!str)
1287                 return 0;
1288         ret = kstrtoul(str, 0, &threshold);
1289         if (ret < 0)
1290                 return 0;
1291         tracing_thresh = threshold * 1000;
1292         return 1;
1293 }
1294 __setup("tracing_thresh=", set_tracing_thresh);
1295
1296 unsigned long nsecs_to_usecs(unsigned long nsecs)
1297 {
1298         return nsecs / 1000;
1299 }
1300
1301 /*
1302  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1303  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1304  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1305  * of strings in the order that the evals (enum) were defined.
1306  */
1307 #undef C
1308 #define C(a, b) b
1309
1310 /* These must match the bit postions in trace_iterator_flags */
1311 static const char *trace_options[] = {
1312         TRACE_FLAGS
1313         NULL
1314 };
1315
1316 static struct {
1317         u64 (*func)(void);
1318         const char *name;
1319         int in_ns;              /* is this clock in nanoseconds? */
1320 } trace_clocks[] = {
1321         { trace_clock_local,            "local",        1 },
1322         { trace_clock_global,           "global",       1 },
1323         { trace_clock_counter,          "counter",      0 },
1324         { trace_clock_jiffies,          "uptime",       0 },
1325         { trace_clock,                  "perf",         1 },
1326         { ktime_get_mono_fast_ns,       "mono",         1 },
1327         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1328         { ktime_get_boot_fast_ns,       "boot",         1 },
1329         ARCH_TRACE_CLOCKS
1330 };
1331
1332 bool trace_clock_in_ns(struct trace_array *tr)
1333 {
1334         if (trace_clocks[tr->clock_id].in_ns)
1335                 return true;
1336
1337         return false;
1338 }
1339
1340 /*
1341  * trace_parser_get_init - gets the buffer for trace parser
1342  */
1343 int trace_parser_get_init(struct trace_parser *parser, int size)
1344 {
1345         memset(parser, 0, sizeof(*parser));
1346
1347         parser->buffer = kmalloc(size, GFP_KERNEL);
1348         if (!parser->buffer)
1349                 return 1;
1350
1351         parser->size = size;
1352         return 0;
1353 }
1354
1355 /*
1356  * trace_parser_put - frees the buffer for trace parser
1357  */
1358 void trace_parser_put(struct trace_parser *parser)
1359 {
1360         kfree(parser->buffer);
1361         parser->buffer = NULL;
1362 }
1363
1364 /*
1365  * trace_get_user - reads the user input string separated by  space
1366  * (matched by isspace(ch))
1367  *
1368  * For each string found the 'struct trace_parser' is updated,
1369  * and the function returns.
1370  *
1371  * Returns number of bytes read.
1372  *
1373  * See kernel/trace/trace.h for 'struct trace_parser' details.
1374  */
1375 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1376         size_t cnt, loff_t *ppos)
1377 {
1378         char ch;
1379         size_t read = 0;
1380         ssize_t ret;
1381
1382         if (!*ppos)
1383                 trace_parser_clear(parser);
1384
1385         ret = get_user(ch, ubuf++);
1386         if (ret)
1387                 goto out;
1388
1389         read++;
1390         cnt--;
1391
1392         /*
1393          * The parser is not finished with the last write,
1394          * continue reading the user input without skipping spaces.
1395          */
1396         if (!parser->cont) {
1397                 /* skip white space */
1398                 while (cnt && isspace(ch)) {
1399                         ret = get_user(ch, ubuf++);
1400                         if (ret)
1401                                 goto out;
1402                         read++;
1403                         cnt--;
1404                 }
1405
1406                 parser->idx = 0;
1407
1408                 /* only spaces were written */
1409                 if (isspace(ch) || !ch) {
1410                         *ppos += read;
1411                         ret = read;
1412                         goto out;
1413                 }
1414         }
1415
1416         /* read the non-space input */
1417         while (cnt && !isspace(ch) && ch) {
1418                 if (parser->idx < parser->size - 1)
1419                         parser->buffer[parser->idx++] = ch;
1420                 else {
1421                         ret = -EINVAL;
1422                         goto out;
1423                 }
1424                 ret = get_user(ch, ubuf++);
1425                 if (ret)
1426                         goto out;
1427                 read++;
1428                 cnt--;
1429         }
1430
1431         /* We either got finished input or we have to wait for another call. */
1432         if (isspace(ch) || !ch) {
1433                 parser->buffer[parser->idx] = 0;
1434                 parser->cont = false;
1435         } else if (parser->idx < parser->size - 1) {
1436                 parser->cont = true;
1437                 parser->buffer[parser->idx++] = ch;
1438                 /* Make sure the parsed string always terminates with '\0'. */
1439                 parser->buffer[parser->idx] = 0;
1440         } else {
1441                 ret = -EINVAL;
1442                 goto out;
1443         }
1444
1445         *ppos += read;
1446         ret = read;
1447
1448 out:
1449         return ret;
1450 }
1451
1452 /* TODO add a seq_buf_to_buffer() */
1453 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1454 {
1455         int len;
1456
1457         if (trace_seq_used(s) <= s->seq.readpos)
1458                 return -EBUSY;
1459
1460         len = trace_seq_used(s) - s->seq.readpos;
1461         if (cnt > len)
1462                 cnt = len;
1463         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1464
1465         s->seq.readpos += cnt;
1466         return cnt;
1467 }
1468
1469 unsigned long __read_mostly     tracing_thresh;
1470
1471 #ifdef CONFIG_TRACER_MAX_TRACE
1472 /*
1473  * Copy the new maximum trace into the separate maximum-trace
1474  * structure. (this way the maximum trace is permanently saved,
1475  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1476  */
1477 static void
1478 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1479 {
1480         struct trace_buffer *trace_buf = &tr->trace_buffer;
1481         struct trace_buffer *max_buf = &tr->max_buffer;
1482         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1483         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1484
1485         max_buf->cpu = cpu;
1486         max_buf->time_start = data->preempt_timestamp;
1487
1488         max_data->saved_latency = tr->max_latency;
1489         max_data->critical_start = data->critical_start;
1490         max_data->critical_end = data->critical_end;
1491
1492         memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1493         max_data->pid = tsk->pid;
1494         /*
1495          * If tsk == current, then use current_uid(), as that does not use
1496          * RCU. The irq tracer can be called out of RCU scope.
1497          */
1498         if (tsk == current)
1499                 max_data->uid = current_uid();
1500         else
1501                 max_data->uid = task_uid(tsk);
1502
1503         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1504         max_data->policy = tsk->policy;
1505         max_data->rt_priority = tsk->rt_priority;
1506
1507         /* record this tasks comm */
1508         tracing_record_cmdline(tsk);
1509 }
1510
1511 /**
1512  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1513  * @tr: tracer
1514  * @tsk: the task with the latency
1515  * @cpu: The cpu that initiated the trace.
1516  * @cond_data: User data associated with a conditional snapshot
1517  *
1518  * Flip the buffers between the @tr and the max_tr and record information
1519  * about which task was the cause of this latency.
1520  */
1521 void
1522 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1523               void *cond_data)
1524 {
1525         if (tr->stop_count)
1526                 return;
1527
1528         WARN_ON_ONCE(!irqs_disabled());
1529
1530         if (!tr->allocated_snapshot) {
1531                 /* Only the nop tracer should hit this when disabling */
1532                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1533                 return;
1534         }
1535
1536         arch_spin_lock(&tr->max_lock);
1537
1538         /* Inherit the recordable setting from trace_buffer */
1539         if (ring_buffer_record_is_set_on(tr->trace_buffer.buffer))
1540                 ring_buffer_record_on(tr->max_buffer.buffer);
1541         else
1542                 ring_buffer_record_off(tr->max_buffer.buffer);
1543
1544 #ifdef CONFIG_TRACER_SNAPSHOT
1545         if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1546                 goto out_unlock;
1547 #endif
1548         swap(tr->trace_buffer.buffer, tr->max_buffer.buffer);
1549
1550         __update_max_tr(tr, tsk, cpu);
1551
1552  out_unlock:
1553         arch_spin_unlock(&tr->max_lock);
1554 }
1555
1556 /**
1557  * update_max_tr_single - only copy one trace over, and reset the rest
1558  * @tr - tracer
1559  * @tsk - task with the latency
1560  * @cpu - the cpu of the buffer to copy.
1561  *
1562  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1563  */
1564 void
1565 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1566 {
1567         int ret;
1568
1569         if (tr->stop_count)
1570                 return;
1571
1572         WARN_ON_ONCE(!irqs_disabled());
1573         if (!tr->allocated_snapshot) {
1574                 /* Only the nop tracer should hit this when disabling */
1575                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1576                 return;
1577         }
1578
1579         arch_spin_lock(&tr->max_lock);
1580
1581         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1582
1583         if (ret == -EBUSY) {
1584                 /*
1585                  * We failed to swap the buffer due to a commit taking
1586                  * place on this CPU. We fail to record, but we reset
1587                  * the max trace buffer (no one writes directly to it)
1588                  * and flag that it failed.
1589                  */
1590                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1591                         "Failed to swap buffers due to commit in progress\n");
1592         }
1593
1594         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1595
1596         __update_max_tr(tr, tsk, cpu);
1597         arch_spin_unlock(&tr->max_lock);
1598 }
1599 #endif /* CONFIG_TRACER_MAX_TRACE */
1600
1601 static int wait_on_pipe(struct trace_iterator *iter, int full)
1602 {
1603         /* Iterators are static, they should be filled or empty */
1604         if (trace_buffer_iter(iter, iter->cpu_file))
1605                 return 0;
1606
1607         return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1608                                 full);
1609 }
1610
1611 #ifdef CONFIG_FTRACE_STARTUP_TEST
1612 static bool selftests_can_run;
1613
1614 struct trace_selftests {
1615         struct list_head                list;
1616         struct tracer                   *type;
1617 };
1618
1619 static LIST_HEAD(postponed_selftests);
1620
1621 static int save_selftest(struct tracer *type)
1622 {
1623         struct trace_selftests *selftest;
1624
1625         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1626         if (!selftest)
1627                 return -ENOMEM;
1628
1629         selftest->type = type;
1630         list_add(&selftest->list, &postponed_selftests);
1631         return 0;
1632 }
1633
1634 static int run_tracer_selftest(struct tracer *type)
1635 {
1636         struct trace_array *tr = &global_trace;
1637         struct tracer *saved_tracer = tr->current_trace;
1638         int ret;
1639
1640         if (!type->selftest || tracing_selftest_disabled)
1641                 return 0;
1642
1643         /*
1644          * If a tracer registers early in boot up (before scheduling is
1645          * initialized and such), then do not run its selftests yet.
1646          * Instead, run it a little later in the boot process.
1647          */
1648         if (!selftests_can_run)
1649                 return save_selftest(type);
1650
1651         /*
1652          * Run a selftest on this tracer.
1653          * Here we reset the trace buffer, and set the current
1654          * tracer to be this tracer. The tracer can then run some
1655          * internal tracing to verify that everything is in order.
1656          * If we fail, we do not register this tracer.
1657          */
1658         tracing_reset_online_cpus(&tr->trace_buffer);
1659
1660         tr->current_trace = type;
1661
1662 #ifdef CONFIG_TRACER_MAX_TRACE
1663         if (type->use_max_tr) {
1664                 /* If we expanded the buffers, make sure the max is expanded too */
1665                 if (ring_buffer_expanded)
1666                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1667                                            RING_BUFFER_ALL_CPUS);
1668                 tr->allocated_snapshot = true;
1669         }
1670 #endif
1671
1672         /* the test is responsible for initializing and enabling */
1673         pr_info("Testing tracer %s: ", type->name);
1674         ret = type->selftest(type, tr);
1675         /* the test is responsible for resetting too */
1676         tr->current_trace = saved_tracer;
1677         if (ret) {
1678                 printk(KERN_CONT "FAILED!\n");
1679                 /* Add the warning after printing 'FAILED' */
1680                 WARN_ON(1);
1681                 return -1;
1682         }
1683         /* Only reset on passing, to avoid touching corrupted buffers */
1684         tracing_reset_online_cpus(&tr->trace_buffer);
1685
1686 #ifdef CONFIG_TRACER_MAX_TRACE
1687         if (type->use_max_tr) {
1688                 tr->allocated_snapshot = false;
1689
1690                 /* Shrink the max buffer again */
1691                 if (ring_buffer_expanded)
1692                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1693                                            RING_BUFFER_ALL_CPUS);
1694         }
1695 #endif
1696
1697         printk(KERN_CONT "PASSED\n");
1698         return 0;
1699 }
1700
1701 static __init int init_trace_selftests(void)
1702 {
1703         struct trace_selftests *p, *n;
1704         struct tracer *t, **last;
1705         int ret;
1706
1707         selftests_can_run = true;
1708
1709         mutex_lock(&trace_types_lock);
1710
1711         if (list_empty(&postponed_selftests))
1712                 goto out;
1713
1714         pr_info("Running postponed tracer tests:\n");
1715
1716         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1717                 ret = run_tracer_selftest(p->type);
1718                 /* If the test fails, then warn and remove from available_tracers */
1719                 if (ret < 0) {
1720                         WARN(1, "tracer: %s failed selftest, disabling\n",
1721                              p->type->name);
1722                         last = &trace_types;
1723                         for (t = trace_types; t; t = t->next) {
1724                                 if (t == p->type) {
1725                                         *last = t->next;
1726                                         break;
1727                                 }
1728                                 last = &t->next;
1729                         }
1730                 }
1731                 list_del(&p->list);
1732                 kfree(p);
1733         }
1734
1735  out:
1736         mutex_unlock(&trace_types_lock);
1737
1738         return 0;
1739 }
1740 core_initcall(init_trace_selftests);
1741 #else
1742 static inline int run_tracer_selftest(struct tracer *type)
1743 {
1744         return 0;
1745 }
1746 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1747
1748 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1749
1750 static void __init apply_trace_boot_options(void);
1751
1752 /**
1753  * register_tracer - register a tracer with the ftrace system.
1754  * @type - the plugin for the tracer
1755  *
1756  * Register a new plugin tracer.
1757  */
1758 int __init register_tracer(struct tracer *type)
1759 {
1760         struct tracer *t;
1761         int ret = 0;
1762
1763         if (!type->name) {
1764                 pr_info("Tracer must have a name\n");
1765                 return -1;
1766         }
1767
1768         if (strlen(type->name) >= MAX_TRACER_SIZE) {
1769                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1770                 return -1;
1771         }
1772
1773         mutex_lock(&trace_types_lock);
1774
1775         tracing_selftest_running = true;
1776
1777         for (t = trace_types; t; t = t->next) {
1778                 if (strcmp(type->name, t->name) == 0) {
1779                         /* already found */
1780                         pr_info("Tracer %s already registered\n",
1781                                 type->name);
1782                         ret = -1;
1783                         goto out;
1784                 }
1785         }
1786
1787         if (!type->set_flag)
1788                 type->set_flag = &dummy_set_flag;
1789         if (!type->flags) {
1790                 /*allocate a dummy tracer_flags*/
1791                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1792                 if (!type->flags) {
1793                         ret = -ENOMEM;
1794                         goto out;
1795                 }
1796                 type->flags->val = 0;
1797                 type->flags->opts = dummy_tracer_opt;
1798         } else
1799                 if (!type->flags->opts)
1800                         type->flags->opts = dummy_tracer_opt;
1801
1802         /* store the tracer for __set_tracer_option */
1803         type->flags->trace = type;
1804
1805         ret = run_tracer_selftest(type);
1806         if (ret < 0)
1807                 goto out;
1808
1809         type->next = trace_types;
1810         trace_types = type;
1811         add_tracer_options(&global_trace, type);
1812
1813  out:
1814         tracing_selftest_running = false;
1815         mutex_unlock(&trace_types_lock);
1816
1817         if (ret || !default_bootup_tracer)
1818                 goto out_unlock;
1819
1820         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1821                 goto out_unlock;
1822
1823         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1824         /* Do we want this tracer to start on bootup? */
1825         tracing_set_tracer(&global_trace, type->name);
1826         default_bootup_tracer = NULL;
1827
1828         apply_trace_boot_options();
1829
1830         /* disable other selftests, since this will break it. */
1831         tracing_selftest_disabled = true;
1832 #ifdef CONFIG_FTRACE_STARTUP_TEST
1833         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1834                type->name);
1835 #endif
1836
1837  out_unlock:
1838         return ret;
1839 }
1840
1841 void tracing_reset(struct trace_buffer *buf, int cpu)
1842 {
1843         struct ring_buffer *buffer = buf->buffer;
1844
1845         if (!buffer)
1846                 return;
1847
1848         ring_buffer_record_disable(buffer);
1849
1850         /* Make sure all commits have finished */
1851         synchronize_rcu();
1852         ring_buffer_reset_cpu(buffer, cpu);
1853
1854         ring_buffer_record_enable(buffer);
1855 }
1856
1857 void tracing_reset_online_cpus(struct trace_buffer *buf)
1858 {
1859         struct ring_buffer *buffer = buf->buffer;
1860         int cpu;
1861
1862         if (!buffer)
1863                 return;
1864
1865         ring_buffer_record_disable(buffer);
1866
1867         /* Make sure all commits have finished */
1868         synchronize_rcu();
1869
1870         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1871
1872         for_each_online_cpu(cpu)
1873                 ring_buffer_reset_cpu(buffer, cpu);
1874
1875         ring_buffer_record_enable(buffer);
1876 }
1877
1878 /* Must have trace_types_lock held */
1879 void tracing_reset_all_online_cpus(void)
1880 {
1881         struct trace_array *tr;
1882
1883         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1884                 if (!tr->clear_trace)
1885                         continue;
1886                 tr->clear_trace = false;
1887                 tracing_reset_online_cpus(&tr->trace_buffer);
1888 #ifdef CONFIG_TRACER_MAX_TRACE
1889                 tracing_reset_online_cpus(&tr->max_buffer);
1890 #endif
1891         }
1892 }
1893
1894 static int *tgid_map;
1895
1896 #define SAVED_CMDLINES_DEFAULT 128
1897 #define NO_CMDLINE_MAP UINT_MAX
1898 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1899 struct saved_cmdlines_buffer {
1900         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1901         unsigned *map_cmdline_to_pid;
1902         unsigned cmdline_num;
1903         int cmdline_idx;
1904         char *saved_cmdlines;
1905 };
1906 static struct saved_cmdlines_buffer *savedcmd;
1907
1908 /* temporary disable recording */
1909 static atomic_t trace_record_taskinfo_disabled __read_mostly;
1910
1911 static inline char *get_saved_cmdlines(int idx)
1912 {
1913         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1914 }
1915
1916 static inline void set_cmdline(int idx, const char *cmdline)
1917 {
1918         memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1919 }
1920
1921 static int allocate_cmdlines_buffer(unsigned int val,
1922                                     struct saved_cmdlines_buffer *s)
1923 {
1924         s->map_cmdline_to_pid = kmalloc_array(val,
1925                                               sizeof(*s->map_cmdline_to_pid),
1926                                               GFP_KERNEL);
1927         if (!s->map_cmdline_to_pid)
1928                 return -ENOMEM;
1929
1930         s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
1931         if (!s->saved_cmdlines) {
1932                 kfree(s->map_cmdline_to_pid);
1933                 return -ENOMEM;
1934         }
1935
1936         s->cmdline_idx = 0;
1937         s->cmdline_num = val;
1938         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1939                sizeof(s->map_pid_to_cmdline));
1940         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1941                val * sizeof(*s->map_cmdline_to_pid));
1942
1943         return 0;
1944 }
1945
1946 static int trace_create_savedcmd(void)
1947 {
1948         int ret;
1949
1950         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1951         if (!savedcmd)
1952                 return -ENOMEM;
1953
1954         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1955         if (ret < 0) {
1956                 kfree(savedcmd);
1957                 savedcmd = NULL;
1958                 return -ENOMEM;
1959         }
1960
1961         return 0;
1962 }
1963
1964 int is_tracing_stopped(void)
1965 {
1966         return global_trace.stop_count;
1967 }
1968
1969 /**
1970  * tracing_start - quick start of the tracer
1971  *
1972  * If tracing is enabled but was stopped by tracing_stop,
1973  * this will start the tracer back up.
1974  */
1975 void tracing_start(void)
1976 {
1977         struct ring_buffer *buffer;
1978         unsigned long flags;
1979
1980         if (tracing_disabled)
1981                 return;
1982
1983         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1984         if (--global_trace.stop_count) {
1985                 if (global_trace.stop_count < 0) {
1986                         /* Someone screwed up their debugging */
1987                         WARN_ON_ONCE(1);
1988                         global_trace.stop_count = 0;
1989                 }
1990                 goto out;
1991         }
1992
1993         /* Prevent the buffers from switching */
1994         arch_spin_lock(&global_trace.max_lock);
1995
1996         buffer = global_trace.trace_buffer.buffer;
1997         if (buffer)
1998                 ring_buffer_record_enable(buffer);
1999
2000 #ifdef CONFIG_TRACER_MAX_TRACE
2001         buffer = global_trace.max_buffer.buffer;
2002         if (buffer)
2003                 ring_buffer_record_enable(buffer);
2004 #endif
2005
2006         arch_spin_unlock(&global_trace.max_lock);
2007
2008  out:
2009         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2010 }
2011
2012 static void tracing_start_tr(struct trace_array *tr)
2013 {
2014         struct ring_buffer *buffer;
2015         unsigned long flags;
2016
2017         if (tracing_disabled)
2018                 return;
2019
2020         /* If global, we need to also start the max tracer */
2021         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2022                 return tracing_start();
2023
2024         raw_spin_lock_irqsave(&tr->start_lock, flags);
2025
2026         if (--tr->stop_count) {
2027                 if (tr->stop_count < 0) {
2028                         /* Someone screwed up their debugging */
2029                         WARN_ON_ONCE(1);
2030                         tr->stop_count = 0;
2031                 }
2032                 goto out;
2033         }
2034
2035         buffer = tr->trace_buffer.buffer;
2036         if (buffer)
2037                 ring_buffer_record_enable(buffer);
2038
2039  out:
2040         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2041 }
2042
2043 /**
2044  * tracing_stop - quick stop of the tracer
2045  *
2046  * Light weight way to stop tracing. Use in conjunction with
2047  * tracing_start.
2048  */
2049 void tracing_stop(void)
2050 {
2051         struct ring_buffer *buffer;
2052         unsigned long flags;
2053
2054         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2055         if (global_trace.stop_count++)
2056                 goto out;
2057
2058         /* Prevent the buffers from switching */
2059         arch_spin_lock(&global_trace.max_lock);
2060
2061         buffer = global_trace.trace_buffer.buffer;
2062         if (buffer)
2063                 ring_buffer_record_disable(buffer);
2064
2065 #ifdef CONFIG_TRACER_MAX_TRACE
2066         buffer = global_trace.max_buffer.buffer;
2067         if (buffer)
2068                 ring_buffer_record_disable(buffer);
2069 #endif
2070
2071         arch_spin_unlock(&global_trace.max_lock);
2072
2073  out:
2074         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2075 }
2076
2077 static void tracing_stop_tr(struct trace_array *tr)
2078 {
2079         struct ring_buffer *buffer;
2080         unsigned long flags;
2081
2082         /* If global, we need to also stop the max tracer */
2083         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2084                 return tracing_stop();
2085
2086         raw_spin_lock_irqsave(&tr->start_lock, flags);
2087         if (tr->stop_count++)
2088                 goto out;
2089
2090         buffer = tr->trace_buffer.buffer;
2091         if (buffer)
2092                 ring_buffer_record_disable(buffer);
2093
2094  out:
2095         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2096 }
2097
2098 static int trace_save_cmdline(struct task_struct *tsk)
2099 {
2100         unsigned pid, idx;
2101
2102         /* treat recording of idle task as a success */
2103         if (!tsk->pid)
2104                 return 1;
2105
2106         if (unlikely(tsk->pid > PID_MAX_DEFAULT))
2107                 return 0;
2108
2109         /*
2110          * It's not the end of the world if we don't get
2111          * the lock, but we also don't want to spin
2112          * nor do we want to disable interrupts,
2113          * so if we miss here, then better luck next time.
2114          */
2115         if (!arch_spin_trylock(&trace_cmdline_lock))
2116                 return 0;
2117
2118         idx = savedcmd->map_pid_to_cmdline[tsk->pid];
2119         if (idx == NO_CMDLINE_MAP) {
2120                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2121
2122                 /*
2123                  * Check whether the cmdline buffer at idx has a pid
2124                  * mapped. We are going to overwrite that entry so we
2125                  * need to clear the map_pid_to_cmdline. Otherwise we
2126                  * would read the new comm for the old pid.
2127                  */
2128                 pid = savedcmd->map_cmdline_to_pid[idx];
2129                 if (pid != NO_CMDLINE_MAP)
2130                         savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
2131
2132                 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2133                 savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
2134
2135                 savedcmd->cmdline_idx = idx;
2136         }
2137
2138         set_cmdline(idx, tsk->comm);
2139
2140         arch_spin_unlock(&trace_cmdline_lock);
2141
2142         return 1;
2143 }
2144
2145 static void __trace_find_cmdline(int pid, char comm[])
2146 {
2147         unsigned map;
2148
2149         if (!pid) {
2150                 strcpy(comm, "<idle>");
2151                 return;
2152         }
2153
2154         if (WARN_ON_ONCE(pid < 0)) {
2155                 strcpy(comm, "<XXX>");
2156                 return;
2157         }
2158
2159         if (pid > PID_MAX_DEFAULT) {
2160                 strcpy(comm, "<...>");
2161                 return;
2162         }
2163
2164         map = savedcmd->map_pid_to_cmdline[pid];
2165         if (map != NO_CMDLINE_MAP)
2166                 strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2167         else
2168                 strcpy(comm, "<...>");
2169 }
2170
2171 void trace_find_cmdline(int pid, char comm[])
2172 {
2173         preempt_disable();
2174         arch_spin_lock(&trace_cmdline_lock);
2175
2176         __trace_find_cmdline(pid, comm);
2177
2178         arch_spin_unlock(&trace_cmdline_lock);
2179         preempt_enable();
2180 }
2181
2182 int trace_find_tgid(int pid)
2183 {
2184         if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2185                 return 0;
2186
2187         return tgid_map[pid];
2188 }
2189
2190 static int trace_save_tgid(struct task_struct *tsk)
2191 {
2192         /* treat recording of idle task as a success */
2193         if (!tsk->pid)
2194                 return 1;
2195
2196         if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2197                 return 0;
2198
2199         tgid_map[tsk->pid] = tsk->tgid;
2200         return 1;
2201 }
2202
2203 static bool tracing_record_taskinfo_skip(int flags)
2204 {
2205         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2206                 return true;
2207         if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2208                 return true;
2209         if (!__this_cpu_read(trace_taskinfo_save))
2210                 return true;
2211         return false;
2212 }
2213
2214 /**
2215  * tracing_record_taskinfo - record the task info of a task
2216  *
2217  * @task  - task to record
2218  * @flags - TRACE_RECORD_CMDLINE for recording comm
2219  *        - TRACE_RECORD_TGID for recording tgid
2220  */
2221 void tracing_record_taskinfo(struct task_struct *task, int flags)
2222 {
2223         bool done;
2224
2225         if (tracing_record_taskinfo_skip(flags))
2226                 return;
2227
2228         /*
2229          * Record as much task information as possible. If some fail, continue
2230          * to try to record the others.
2231          */
2232         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2233         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2234
2235         /* If recording any information failed, retry again soon. */
2236         if (!done)
2237                 return;
2238
2239         __this_cpu_write(trace_taskinfo_save, false);
2240 }
2241
2242 /**
2243  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2244  *
2245  * @prev - previous task during sched_switch
2246  * @next - next task during sched_switch
2247  * @flags - TRACE_RECORD_CMDLINE for recording comm
2248  *          TRACE_RECORD_TGID for recording tgid
2249  */
2250 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2251                                           struct task_struct *next, int flags)
2252 {
2253         bool done;
2254
2255         if (tracing_record_taskinfo_skip(flags))
2256                 return;
2257
2258         /*
2259          * Record as much task information as possible. If some fail, continue
2260          * to try to record the others.
2261          */
2262         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2263         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2264         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2265         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2266
2267         /* If recording any information failed, retry again soon. */
2268         if (!done)
2269                 return;
2270
2271         __this_cpu_write(trace_taskinfo_save, false);
2272 }
2273
2274 /* Helpers to record a specific task information */
2275 void tracing_record_cmdline(struct task_struct *task)
2276 {
2277         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2278 }
2279
2280 void tracing_record_tgid(struct task_struct *task)
2281 {
2282         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2283 }
2284
2285 /*
2286  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2287  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2288  * simplifies those functions and keeps them in sync.
2289  */
2290 enum print_line_t trace_handle_return(struct trace_seq *s)
2291 {
2292         return trace_seq_has_overflowed(s) ?
2293                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2294 }
2295 EXPORT_SYMBOL_GPL(trace_handle_return);
2296
2297 void
2298 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
2299                              int pc)
2300 {
2301         struct task_struct *tsk = current;
2302
2303         entry->preempt_count            = pc & 0xff;
2304         entry->pid                      = (tsk) ? tsk->pid : 0;
2305         entry->flags =
2306 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2307                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2308 #else
2309                 TRACE_FLAG_IRQS_NOSUPPORT |
2310 #endif
2311                 ((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
2312                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2313                 ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2314                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2315                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2316 }
2317 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2318
2319 struct ring_buffer_event *
2320 trace_buffer_lock_reserve(struct ring_buffer *buffer,
2321                           int type,
2322                           unsigned long len,
2323                           unsigned long flags, int pc)
2324 {
2325         return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2326 }
2327
2328 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2329 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2330 static int trace_buffered_event_ref;
2331
2332 /**
2333  * trace_buffered_event_enable - enable buffering events
2334  *
2335  * When events are being filtered, it is quicker to use a temporary
2336  * buffer to write the event data into if there's a likely chance
2337  * that it will not be committed. The discard of the ring buffer
2338  * is not as fast as committing, and is much slower than copying
2339  * a commit.
2340  *
2341  * When an event is to be filtered, allocate per cpu buffers to
2342  * write the event data into, and if the event is filtered and discarded
2343  * it is simply dropped, otherwise, the entire data is to be committed
2344  * in one shot.
2345  */
2346 void trace_buffered_event_enable(void)
2347 {
2348         struct ring_buffer_event *event;
2349         struct page *page;
2350         int cpu;
2351
2352         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2353
2354         if (trace_buffered_event_ref++)
2355                 return;
2356
2357         for_each_tracing_cpu(cpu) {
2358                 page = alloc_pages_node(cpu_to_node(cpu),
2359                                         GFP_KERNEL | __GFP_NORETRY, 0);
2360                 if (!page)
2361                         goto failed;
2362
2363                 event = page_address(page);
2364                 memset(event, 0, sizeof(*event));
2365
2366                 per_cpu(trace_buffered_event, cpu) = event;
2367
2368                 preempt_disable();
2369                 if (cpu == smp_processor_id() &&
2370                     this_cpu_read(trace_buffered_event) !=
2371                     per_cpu(trace_buffered_event, cpu))
2372                         WARN_ON_ONCE(1);
2373                 preempt_enable();
2374         }
2375
2376         return;
2377  failed:
2378         trace_buffered_event_disable();
2379 }
2380
2381 static void enable_trace_buffered_event(void *data)
2382 {
2383         /* Probably not needed, but do it anyway */
2384         smp_rmb();
2385         this_cpu_dec(trace_buffered_event_cnt);
2386 }
2387
2388 static void disable_trace_buffered_event(void *data)
2389 {
2390         this_cpu_inc(trace_buffered_event_cnt);
2391 }
2392
2393 /**
2394  * trace_buffered_event_disable - disable buffering events
2395  *
2396  * When a filter is removed, it is faster to not use the buffered
2397  * events, and to commit directly into the ring buffer. Free up
2398  * the temp buffers when there are no more users. This requires
2399  * special synchronization with current events.
2400  */
2401 void trace_buffered_event_disable(void)
2402 {
2403         int cpu;
2404
2405         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2406
2407         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2408                 return;
2409
2410         if (--trace_buffered_event_ref)
2411                 return;
2412
2413         preempt_disable();
2414         /* For each CPU, set the buffer as used. */
2415         smp_call_function_many(tracing_buffer_mask,
2416                                disable_trace_buffered_event, NULL, 1);
2417         preempt_enable();
2418
2419         /* Wait for all current users to finish */
2420         synchronize_rcu();
2421
2422         for_each_tracing_cpu(cpu) {
2423                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2424                 per_cpu(trace_buffered_event, cpu) = NULL;
2425         }
2426         /*
2427          * Make sure trace_buffered_event is NULL before clearing
2428          * trace_buffered_event_cnt.
2429          */
2430         smp_wmb();
2431
2432         preempt_disable();
2433         /* Do the work on each cpu */
2434         smp_call_function_many(tracing_buffer_mask,
2435                                enable_trace_buffered_event, NULL, 1);
2436         preempt_enable();
2437 }
2438
2439 static struct ring_buffer *temp_buffer;
2440
2441 struct ring_buffer_event *
2442 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2443                           struct trace_event_file *trace_file,
2444                           int type, unsigned long len,
2445                           unsigned long flags, int pc)
2446 {
2447         struct ring_buffer_event *entry;
2448         int val;
2449
2450         *current_rb = trace_file->tr->trace_buffer.buffer;
2451
2452         if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
2453              (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2454             (entry = this_cpu_read(trace_buffered_event))) {
2455                 /* Try to use the per cpu buffer first */
2456                 val = this_cpu_inc_return(trace_buffered_event_cnt);
2457                 if (val == 1) {
2458                         trace_event_setup(entry, type, flags, pc);
2459                         entry->array[0] = len;
2460                         return entry;
2461                 }
2462                 this_cpu_dec(trace_buffered_event_cnt);
2463         }
2464
2465         entry = __trace_buffer_lock_reserve(*current_rb,
2466                                             type, len, flags, pc);
2467         /*
2468          * If tracing is off, but we have triggers enabled
2469          * we still need to look at the event data. Use the temp_buffer
2470          * to store the trace event for the tigger to use. It's recusive
2471          * safe and will not be recorded anywhere.
2472          */
2473         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2474                 *current_rb = temp_buffer;
2475                 entry = __trace_buffer_lock_reserve(*current_rb,
2476                                                     type, len, flags, pc);
2477         }
2478         return entry;
2479 }
2480 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2481
2482 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2483 static DEFINE_MUTEX(tracepoint_printk_mutex);
2484
2485 static void output_printk(struct trace_event_buffer *fbuffer)
2486 {
2487         struct trace_event_call *event_call;
2488         struct trace_event *event;
2489         unsigned long flags;
2490         struct trace_iterator *iter = tracepoint_print_iter;
2491
2492         /* We should never get here if iter is NULL */
2493         if (WARN_ON_ONCE(!iter))
2494                 return;
2495
2496         event_call = fbuffer->trace_file->event_call;
2497         if (!event_call || !event_call->event.funcs ||
2498             !event_call->event.funcs->trace)
2499                 return;
2500
2501         event = &fbuffer->trace_file->event_call->event;
2502
2503         spin_lock_irqsave(&tracepoint_iter_lock, flags);
2504         trace_seq_init(&iter->seq);
2505         iter->ent = fbuffer->entry;
2506         event_call->event.funcs->trace(iter, 0, event);
2507         trace_seq_putc(&iter->seq, 0);
2508         printk("%s", iter->seq.buffer);
2509
2510         spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2511 }
2512
2513 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2514                              void __user *buffer, size_t *lenp,
2515                              loff_t *ppos)
2516 {
2517         int save_tracepoint_printk;
2518         int ret;
2519
2520         mutex_lock(&tracepoint_printk_mutex);
2521         save_tracepoint_printk = tracepoint_printk;
2522
2523         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2524
2525         /*
2526          * This will force exiting early, as tracepoint_printk
2527          * is always zero when tracepoint_printk_iter is not allocated
2528          */
2529         if (!tracepoint_print_iter)
2530                 tracepoint_printk = 0;
2531
2532         if (save_tracepoint_printk == tracepoint_printk)
2533                 goto out;
2534
2535         if (tracepoint_printk)
2536                 static_key_enable(&tracepoint_printk_key.key);
2537         else
2538                 static_key_disable(&tracepoint_printk_key.key);
2539
2540  out:
2541         mutex_unlock(&tracepoint_printk_mutex);
2542
2543         return ret;
2544 }
2545
2546 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2547 {
2548         if (static_key_false(&tracepoint_printk_key.key))
2549                 output_printk(fbuffer);
2550
2551         event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer,
2552                                     fbuffer->event, fbuffer->entry,
2553                                     fbuffer->flags, fbuffer->pc);
2554 }
2555 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2556
2557 /*
2558  * Skip 3:
2559  *
2560  *   trace_buffer_unlock_commit_regs()
2561  *   trace_event_buffer_commit()
2562  *   trace_event_raw_event_xxx()
2563  */
2564 # define STACK_SKIP 3
2565
2566 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2567                                      struct ring_buffer *buffer,
2568                                      struct ring_buffer_event *event,
2569                                      unsigned long flags, int pc,
2570                                      struct pt_regs *regs)
2571 {
2572         __buffer_unlock_commit(buffer, event);
2573
2574         /*
2575          * If regs is not set, then skip the necessary functions.
2576          * Note, we can still get here via blktrace, wakeup tracer
2577          * and mmiotrace, but that's ok if they lose a function or
2578          * two. They are not that meaningful.
2579          */
2580         ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
2581         ftrace_trace_userstack(buffer, flags, pc);
2582 }
2583
2584 /*
2585  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2586  */
2587 void
2588 trace_buffer_unlock_commit_nostack(struct ring_buffer *buffer,
2589                                    struct ring_buffer_event *event)
2590 {
2591         __buffer_unlock_commit(buffer, event);
2592 }
2593
2594 static void
2595 trace_process_export(struct trace_export *export,
2596                struct ring_buffer_event *event)
2597 {
2598         struct trace_entry *entry;
2599         unsigned int size = 0;
2600
2601         entry = ring_buffer_event_data(event);
2602         size = ring_buffer_event_length(event);
2603         export->write(export, entry, size);
2604 }
2605
2606 static DEFINE_MUTEX(ftrace_export_lock);
2607
2608 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2609
2610 static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2611
2612 static inline void ftrace_exports_enable(void)
2613 {
2614         static_branch_enable(&ftrace_exports_enabled);
2615 }
2616
2617 static inline void ftrace_exports_disable(void)
2618 {
2619         static_branch_disable(&ftrace_exports_enabled);
2620 }
2621
2622 static void ftrace_exports(struct ring_buffer_event *event)
2623 {
2624         struct trace_export *export;
2625
2626         preempt_disable_notrace();
2627
2628         export = rcu_dereference_raw_notrace(ftrace_exports_list);
2629         while (export) {
2630                 trace_process_export(export, event);
2631                 export = rcu_dereference_raw_notrace(export->next);
2632         }
2633
2634         preempt_enable_notrace();
2635 }
2636
2637 static inline void
2638 add_trace_export(struct trace_export **list, struct trace_export *export)
2639 {
2640         rcu_assign_pointer(export->next, *list);
2641         /*
2642          * We are entering export into the list but another
2643          * CPU might be walking that list. We need to make sure
2644          * the export->next pointer is valid before another CPU sees
2645          * the export pointer included into the list.
2646          */
2647         rcu_assign_pointer(*list, export);
2648 }
2649
2650 static inline int
2651 rm_trace_export(struct trace_export **list, struct trace_export *export)
2652 {
2653         struct trace_export **p;
2654
2655         for (p = list; *p != NULL; p = &(*p)->next)
2656                 if (*p == export)
2657                         break;
2658
2659         if (*p != export)
2660                 return -1;
2661
2662         rcu_assign_pointer(*p, (*p)->next);
2663
2664         return 0;
2665 }
2666
2667 static inline void
2668 add_ftrace_export(struct trace_export **list, struct trace_export *export)
2669 {
2670         if (*list == NULL)
2671                 ftrace_exports_enable();
2672
2673         add_trace_export(list, export);
2674 }
2675
2676 static inline int
2677 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2678 {
2679         int ret;
2680
2681         ret = rm_trace_export(list, export);
2682         if (*list == NULL)
2683                 ftrace_exports_disable();
2684
2685         return ret;
2686 }
2687
2688 int register_ftrace_export(struct trace_export *export)
2689 {
2690         if (WARN_ON_ONCE(!export->write))
2691                 return -1;
2692
2693         mutex_lock(&ftrace_export_lock);
2694
2695         add_ftrace_export(&ftrace_exports_list, export);
2696
2697         mutex_unlock(&ftrace_export_lock);
2698
2699         return 0;
2700 }
2701 EXPORT_SYMBOL_GPL(register_ftrace_export);
2702
2703 int unregister_ftrace_export(struct trace_export *export)
2704 {
2705         int ret;
2706
2707         mutex_lock(&ftrace_export_lock);
2708
2709         ret = rm_ftrace_export(&ftrace_exports_list, export);
2710
2711         mutex_unlock(&ftrace_export_lock);
2712
2713         return ret;
2714 }
2715 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2716
2717 void
2718 trace_function(struct trace_array *tr,
2719                unsigned long ip, unsigned long parent_ip, unsigned long flags,
2720                int pc)
2721 {
2722         struct trace_event_call *call = &event_function;
2723         struct ring_buffer *buffer = tr->trace_buffer.buffer;
2724         struct ring_buffer_event *event;
2725         struct ftrace_entry *entry;
2726
2727         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2728                                             flags, pc);
2729         if (!event)
2730                 return;
2731         entry   = ring_buffer_event_data(event);
2732         entry->ip                       = ip;
2733         entry->parent_ip                = parent_ip;
2734
2735         if (!call_filter_check_discard(call, entry, buffer, event)) {
2736                 if (static_branch_unlikely(&ftrace_exports_enabled))
2737                         ftrace_exports(event);
2738                 __buffer_unlock_commit(buffer, event);
2739         }
2740 }
2741
2742 #ifdef CONFIG_STACKTRACE
2743
2744 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
2745 struct ftrace_stack {
2746         unsigned long           calls[FTRACE_STACK_MAX_ENTRIES];
2747 };
2748
2749 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
2750 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2751
2752 static void __ftrace_trace_stack(struct ring_buffer *buffer,
2753                                  unsigned long flags,
2754                                  int skip, int pc, struct pt_regs *regs)
2755 {
2756         struct trace_event_call *call = &event_kernel_stack;
2757         struct ring_buffer_event *event;
2758         struct stack_entry *entry;
2759         struct stack_trace trace;
2760         int use_stack;
2761         int size = FTRACE_STACK_ENTRIES;
2762
2763         trace.nr_entries        = 0;
2764         trace.skip              = skip;
2765
2766         /*
2767          * Add one, for this function and the call to save_stack_trace()
2768          * If regs is set, then these functions will not be in the way.
2769          */
2770 #ifndef CONFIG_UNWINDER_ORC
2771         if (!regs)
2772                 trace.skip++;
2773 #endif
2774
2775         /*
2776          * Since events can happen in NMIs there's no safe way to
2777          * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2778          * or NMI comes in, it will just have to use the default
2779          * FTRACE_STACK_SIZE.
2780          */
2781         preempt_disable_notrace();
2782
2783         use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
2784         /*
2785          * We don't need any atomic variables, just a barrier.
2786          * If an interrupt comes in, we don't care, because it would
2787          * have exited and put the counter back to what we want.
2788          * We just need a barrier to keep gcc from moving things
2789          * around.
2790          */
2791         barrier();
2792         if (use_stack == 1) {
2793                 trace.entries           = this_cpu_ptr(ftrace_stack.calls);
2794                 trace.max_entries       = FTRACE_STACK_MAX_ENTRIES;
2795
2796                 if (regs)
2797                         save_stack_trace_regs(regs, &trace);
2798                 else
2799                         save_stack_trace(&trace);
2800
2801                 if (trace.nr_entries > size)
2802                         size = trace.nr_entries;
2803         } else
2804                 /* From now on, use_stack is a boolean */
2805                 use_stack = 0;
2806
2807         size *= sizeof(unsigned long);
2808
2809         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2810                                             sizeof(*entry) + size, flags, pc);
2811         if (!event)
2812                 goto out;
2813         entry = ring_buffer_event_data(event);
2814
2815         memset(&entry->caller, 0, size);
2816
2817         if (use_stack)
2818                 memcpy(&entry->caller, trace.entries,
2819                        trace.nr_entries * sizeof(unsigned long));
2820         else {
2821                 trace.max_entries       = FTRACE_STACK_ENTRIES;
2822                 trace.entries           = entry->caller;
2823                 if (regs)
2824                         save_stack_trace_regs(regs, &trace);
2825                 else
2826                         save_stack_trace(&trace);
2827         }
2828
2829         entry->size = trace.nr_entries;
2830
2831         if (!call_filter_check_discard(call, entry, buffer, event))
2832                 __buffer_unlock_commit(buffer, event);
2833
2834  out:
2835         /* Again, don't let gcc optimize things here */
2836         barrier();
2837         __this_cpu_dec(ftrace_stack_reserve);
2838         preempt_enable_notrace();
2839
2840 }
2841
2842 static inline void ftrace_trace_stack(struct trace_array *tr,
2843                                       struct ring_buffer *buffer,
2844                                       unsigned long flags,
2845                                       int skip, int pc, struct pt_regs *regs)
2846 {
2847         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2848                 return;
2849
2850         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
2851 }
2852
2853 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2854                    int pc)
2855 {
2856         struct ring_buffer *buffer = tr->trace_buffer.buffer;
2857
2858         if (rcu_is_watching()) {
2859                 __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2860                 return;
2861         }
2862
2863         /*
2864          * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
2865          * but if the above rcu_is_watching() failed, then the NMI
2866          * triggered someplace critical, and rcu_irq_enter() should
2867          * not be called from NMI.
2868          */
2869         if (unlikely(in_nmi()))
2870                 return;
2871
2872         rcu_irq_enter_irqson();
2873         __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2874         rcu_irq_exit_irqson();
2875 }
2876
2877 /**
2878  * trace_dump_stack - record a stack back trace in the trace buffer
2879  * @skip: Number of functions to skip (helper handlers)
2880  */
2881 void trace_dump_stack(int skip)
2882 {
2883         unsigned long flags;
2884
2885         if (tracing_disabled || tracing_selftest_running)
2886                 return;
2887
2888         local_save_flags(flags);
2889
2890 #ifndef CONFIG_UNWINDER_ORC
2891         /* Skip 1 to skip this function. */
2892         skip++;
2893 #endif
2894         __ftrace_trace_stack(global_trace.trace_buffer.buffer,
2895                              flags, skip, preempt_count(), NULL);
2896 }
2897 EXPORT_SYMBOL_GPL(trace_dump_stack);
2898
2899 static DEFINE_PER_CPU(int, user_stack_count);
2900
2901 void
2902 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
2903 {
2904         struct trace_event_call *call = &event_user_stack;
2905         struct ring_buffer_event *event;
2906         struct userstack_entry *entry;
2907         struct stack_trace trace;
2908
2909         if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
2910                 return;
2911
2912         /*
2913          * NMIs can not handle page faults, even with fix ups.
2914          * The save user stack can (and often does) fault.
2915          */
2916         if (unlikely(in_nmi()))
2917                 return;
2918
2919         /*
2920          * prevent recursion, since the user stack tracing may
2921          * trigger other kernel events.
2922          */
2923         preempt_disable();
2924         if (__this_cpu_read(user_stack_count))
2925                 goto out;
2926
2927         __this_cpu_inc(user_stack_count);
2928
2929         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2930                                             sizeof(*entry), flags, pc);
2931         if (!event)
2932                 goto out_drop_count;
2933         entry   = ring_buffer_event_data(event);
2934
2935         entry->tgid             = current->tgid;
2936         memset(&entry->caller, 0, sizeof(entry->caller));
2937
2938         trace.nr_entries        = 0;
2939         trace.max_entries       = FTRACE_STACK_ENTRIES;
2940         trace.skip              = 0;
2941         trace.entries           = entry->caller;
2942
2943         save_stack_trace_user(&trace);
2944         if (!call_filter_check_discard(call, entry, buffer, event))
2945                 __buffer_unlock_commit(buffer, event);
2946
2947  out_drop_count:
2948         __this_cpu_dec(user_stack_count);
2949  out:
2950         preempt_enable();
2951 }
2952
2953 #ifdef UNUSED
2954 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
2955 {
2956         ftrace_trace_userstack(tr, flags, preempt_count());
2957 }
2958 #endif /* UNUSED */
2959
2960 #endif /* CONFIG_STACKTRACE */
2961
2962 /* created for use with alloc_percpu */
2963 struct trace_buffer_struct {
2964         int nesting;
2965         char buffer[4][TRACE_BUF_SIZE];
2966 };
2967
2968 static struct trace_buffer_struct *trace_percpu_buffer;
2969
2970 /*
2971  * Thise allows for lockless recording.  If we're nested too deeply, then
2972  * this returns NULL.
2973  */
2974 static char *get_trace_buf(void)
2975 {
2976         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
2977
2978         if (!buffer || buffer->nesting >= 4)
2979                 return NULL;
2980
2981         buffer->nesting++;
2982
2983         /* Interrupts must see nesting incremented before we use the buffer */
2984         barrier();
2985         return &buffer->buffer[buffer->nesting][0];
2986 }
2987
2988 static void put_trace_buf(void)
2989 {
2990         /* Don't let the decrement of nesting leak before this */
2991         barrier();
2992         this_cpu_dec(trace_percpu_buffer->nesting);
2993 }
2994
2995 static int alloc_percpu_trace_buffer(void)
2996 {
2997         struct trace_buffer_struct *buffers;
2998
2999         buffers = alloc_percpu(struct trace_buffer_struct);
3000         if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
3001                 return -ENOMEM;
3002
3003         trace_percpu_buffer = buffers;
3004         return 0;
3005 }
3006
3007 static int buffers_allocated;
3008
3009 void trace_printk_init_buffers(void)
3010 {
3011         if (buffers_allocated)
3012                 return;
3013
3014         if (alloc_percpu_trace_buffer())
3015                 return;
3016
3017         /* trace_printk() is for debug use only. Don't use it in production. */
3018
3019         pr_warn("\n");
3020         pr_warn("**********************************************************\n");
3021         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3022         pr_warn("**                                                      **\n");
3023         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3024         pr_warn("**                                                      **\n");
3025         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3026         pr_warn("** unsafe for production use.                           **\n");
3027         pr_warn("**                                                      **\n");
3028         pr_warn("** If you see this message and you are not debugging    **\n");
3029         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3030         pr_warn("**                                                      **\n");
3031         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3032         pr_warn("**********************************************************\n");
3033
3034         /* Expand the buffers to set size */
3035         tracing_update_buffers();
3036
3037         buffers_allocated = 1;
3038
3039         /*
3040          * trace_printk_init_buffers() can be called by modules.
3041          * If that happens, then we need to start cmdline recording
3042          * directly here. If the global_trace.buffer is already
3043          * allocated here, then this was called by module code.
3044          */
3045         if (global_trace.trace_buffer.buffer)
3046                 tracing_start_cmdline_record();
3047 }
3048
3049 void trace_printk_start_comm(void)
3050 {
3051         /* Start tracing comms if trace printk is set */
3052         if (!buffers_allocated)
3053                 return;
3054         tracing_start_cmdline_record();
3055 }
3056
3057 static void trace_printk_start_stop_comm(int enabled)
3058 {
3059         if (!buffers_allocated)
3060                 return;
3061
3062         if (enabled)
3063                 tracing_start_cmdline_record();
3064         else
3065                 tracing_stop_cmdline_record();
3066 }
3067
3068 /**
3069  * trace_vbprintk - write binary msg to tracing buffer
3070  *
3071  */
3072 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3073 {
3074         struct trace_event_call *call = &event_bprint;
3075         struct ring_buffer_event *event;
3076         struct ring_buffer *buffer;
3077         struct trace_array *tr = &global_trace;
3078         struct bprint_entry *entry;
3079         unsigned long flags;
3080         char *tbuffer;
3081         int len = 0, size, pc;
3082
3083         if (unlikely(tracing_selftest_running || tracing_disabled))
3084                 return 0;
3085
3086         /* Don't pollute graph traces with trace_vprintk internals */
3087         pause_graph_tracing();
3088
3089         pc = preempt_count();
3090         preempt_disable_notrace();
3091
3092         tbuffer = get_trace_buf();
3093         if (!tbuffer) {
3094                 len = 0;
3095                 goto out_nobuffer;
3096         }
3097
3098         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3099
3100         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3101                 goto out;
3102
3103         local_save_flags(flags);
3104         size = sizeof(*entry) + sizeof(u32) * len;
3105         buffer = tr->trace_buffer.buffer;
3106         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3107                                             flags, pc);
3108         if (!event)
3109                 goto out;
3110         entry = ring_buffer_event_data(event);
3111         entry->ip                       = ip;
3112         entry->fmt                      = fmt;
3113
3114         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3115         if (!call_filter_check_discard(call, entry, buffer, event)) {
3116                 __buffer_unlock_commit(buffer, event);
3117                 ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
3118         }
3119
3120 out:
3121         put_trace_buf();
3122
3123 out_nobuffer:
3124         preempt_enable_notrace();
3125         unpause_graph_tracing();
3126
3127         return len;
3128 }
3129 EXPORT_SYMBOL_GPL(trace_vbprintk);
3130
3131 __printf(3, 0)
3132 static int
3133 __trace_array_vprintk(struct ring_buffer *buffer,
3134                       unsigned long ip, const char *fmt, va_list args)
3135 {
3136         struct trace_event_call *call = &event_print;
3137         struct ring_buffer_event *event;
3138         int len = 0, size, pc;
3139         struct print_entry *entry;
3140         unsigned long flags;
3141         char *tbuffer;
3142
3143         if (tracing_disabled || tracing_selftest_running)
3144                 return 0;
3145
3146         /* Don't pollute graph traces with trace_vprintk internals */
3147         pause_graph_tracing();
3148
3149         pc = preempt_count();
3150         preempt_disable_notrace();
3151
3152
3153         tbuffer = get_trace_buf();
3154         if (!tbuffer) {
3155                 len = 0;
3156                 goto out_nobuffer;
3157         }
3158
3159         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3160
3161         local_save_flags(flags);
3162         size = sizeof(*entry) + len + 1;
3163         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3164                                             flags, pc);
3165         if (!event)
3166                 goto out;
3167         entry = ring_buffer_event_data(event);
3168         entry->ip = ip;
3169
3170         memcpy(&entry->buf, tbuffer, len + 1);
3171         if (!call_filter_check_discard(call, entry, buffer, event)) {
3172                 __buffer_unlock_commit(buffer, event);
3173                 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
3174         }
3175
3176 out:
3177         put_trace_buf();
3178
3179 out_nobuffer:
3180         preempt_enable_notrace();
3181         unpause_graph_tracing();
3182
3183         return len;
3184 }
3185
3186 __printf(3, 0)
3187 int trace_array_vprintk(struct trace_array *tr,
3188                         unsigned long ip, const char *fmt, va_list args)
3189 {
3190         return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
3191 }
3192
3193 __printf(3, 0)
3194 int trace_array_printk(struct trace_array *tr,
3195                        unsigned long ip, const char *fmt, ...)
3196 {
3197         int ret;
3198         va_list ap;
3199
3200         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3201                 return 0;
3202
3203         va_start(ap, fmt);
3204         ret = trace_array_vprintk(tr, ip, fmt, ap);
3205         va_end(ap);
3206         return ret;
3207 }
3208
3209 __printf(3, 4)
3210 int trace_array_printk_buf(struct ring_buffer *buffer,
3211                            unsigned long ip, const char *fmt, ...)
3212 {
3213         int ret;
3214         va_list ap;
3215
3216         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3217                 return 0;
3218
3219         va_start(ap, fmt);
3220         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3221         va_end(ap);
3222         return ret;
3223 }
3224
3225 __printf(2, 0)
3226 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3227 {
3228         return trace_array_vprintk(&global_trace, ip, fmt, args);
3229 }
3230 EXPORT_SYMBOL_GPL(trace_vprintk);
3231
3232 static void trace_iterator_increment(struct trace_iterator *iter)
3233 {
3234         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3235
3236         iter->idx++;
3237         if (buf_iter)
3238                 ring_buffer_read(buf_iter, NULL);
3239 }
3240
3241 static struct trace_entry *
3242 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3243                 unsigned long *lost_events)
3244 {
3245         struct ring_buffer_event *event;
3246         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3247
3248         if (buf_iter)
3249                 event = ring_buffer_iter_peek(buf_iter, ts);
3250         else
3251                 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
3252                                          lost_events);
3253
3254         if (event) {
3255                 iter->ent_size = ring_buffer_event_length(event);
3256                 return ring_buffer_event_data(event);
3257         }
3258         iter->ent_size = 0;
3259         return NULL;
3260 }
3261
3262 static struct trace_entry *
3263 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3264                   unsigned long *missing_events, u64 *ent_ts)
3265 {
3266         struct ring_buffer *buffer = iter->trace_buffer->buffer;
3267         struct trace_entry *ent, *next = NULL;
3268         unsigned long lost_events = 0, next_lost = 0;
3269         int cpu_file = iter->cpu_file;
3270         u64 next_ts = 0, ts;
3271         int next_cpu = -1;
3272         int next_size = 0;
3273         int cpu;
3274
3275         /*
3276          * If we are in a per_cpu trace file, don't bother by iterating over
3277          * all cpu and peek directly.
3278          */
3279         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3280                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3281                         return NULL;
3282                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3283                 if (ent_cpu)
3284                         *ent_cpu = cpu_file;
3285
3286                 return ent;
3287         }
3288
3289         for_each_tracing_cpu(cpu) {
3290
3291                 if (ring_buffer_empty_cpu(buffer, cpu))
3292                         continue;
3293
3294                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3295
3296                 /*
3297                  * Pick the entry with the smallest timestamp:
3298                  */
3299                 if (ent && (!next || ts < next_ts)) {
3300                         next = ent;
3301                         next_cpu = cpu;
3302                         next_ts = ts;
3303                         next_lost = lost_events;
3304                         next_size = iter->ent_size;
3305                 }
3306         }
3307
3308         iter->ent_size = next_size;
3309
3310         if (ent_cpu)
3311                 *ent_cpu = next_cpu;
3312
3313         if (ent_ts)
3314                 *ent_ts = next_ts;
3315
3316         if (missing_events)
3317                 *missing_events = next_lost;
3318
3319         return next;
3320 }
3321
3322 /* Find the next real entry, without updating the iterator itself */
3323 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3324                                           int *ent_cpu, u64 *ent_ts)
3325 {
3326         return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3327 }
3328
3329 /* Find the next real entry, and increment the iterator to the next entry */
3330 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3331 {
3332         iter->ent = __find_next_entry(iter, &iter->cpu,
3333                                       &iter->lost_events, &iter->ts);
3334
3335         if (iter->ent)
3336                 trace_iterator_increment(iter);
3337
3338         return iter->ent ? iter : NULL;
3339 }
3340
3341 static void trace_consume(struct trace_iterator *iter)
3342 {
3343         ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
3344                             &iter->lost_events);
3345 }
3346
3347 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3348 {
3349         struct trace_iterator *iter = m->private;
3350         int i = (int)*pos;
3351         void *ent;
3352
3353         WARN_ON_ONCE(iter->leftover);
3354
3355         (*pos)++;
3356
3357         /* can't go backwards */
3358         if (iter->idx > i)
3359                 return NULL;
3360
3361         if (iter->idx < 0)
3362                 ent = trace_find_next_entry_inc(iter);
3363         else
3364                 ent = iter;
3365
3366         while (ent && iter->idx < i)
3367                 ent = trace_find_next_entry_inc(iter);
3368
3369         iter->pos = *pos;
3370
3371         return ent;
3372 }
3373
3374 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3375 {
3376         struct ring_buffer_event *event;
3377         struct ring_buffer_iter *buf_iter;
3378         unsigned long entries = 0;
3379         u64 ts;
3380
3381         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
3382
3383         buf_iter = trace_buffer_iter(iter, cpu);
3384         if (!buf_iter)
3385                 return;
3386
3387         ring_buffer_iter_reset(buf_iter);
3388
3389         /*
3390          * We could have the case with the max latency tracers
3391          * that a reset never took place on a cpu. This is evident
3392          * by the timestamp being before the start of the buffer.
3393          */
3394         while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
3395                 if (ts >= iter->trace_buffer->time_start)
3396                         break;
3397                 entries++;
3398                 ring_buffer_read(buf_iter, NULL);
3399         }
3400
3401         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
3402 }
3403
3404 /*
3405  * The current tracer is copied to avoid a global locking
3406  * all around.
3407  */
3408 static void *s_start(struct seq_file *m, loff_t *pos)
3409 {
3410         struct trace_iterator *iter = m->private;
3411         struct trace_array *tr = iter->tr;
3412         int cpu_file = iter->cpu_file;
3413         void *p = NULL;
3414         loff_t l = 0;
3415         int cpu;
3416
3417         /*
3418          * copy the tracer to avoid using a global lock all around.
3419          * iter->trace is a copy of current_trace, the pointer to the
3420          * name may be used instead of a strcmp(), as iter->trace->name
3421          * will point to the same string as current_trace->name.
3422          */
3423         mutex_lock(&trace_types_lock);
3424         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3425                 *iter->trace = *tr->current_trace;
3426         mutex_unlock(&trace_types_lock);
3427
3428 #ifdef CONFIG_TRACER_MAX_TRACE
3429         if (iter->snapshot && iter->trace->use_max_tr)
3430                 return ERR_PTR(-EBUSY);
3431 #endif
3432
3433         if (!iter->snapshot)
3434                 atomic_inc(&trace_record_taskinfo_disabled);
3435
3436         if (*pos != iter->pos) {
3437                 iter->ent = NULL;
3438                 iter->cpu = 0;
3439                 iter->idx = -1;
3440
3441                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
3442                         for_each_tracing_cpu(cpu)
3443                                 tracing_iter_reset(iter, cpu);
3444                 } else
3445                         tracing_iter_reset(iter, cpu_file);
3446
3447                 iter->leftover = 0;
3448                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3449                         ;
3450
3451         } else {
3452                 /*
3453                  * If we overflowed the seq_file before, then we want
3454                  * to just reuse the trace_seq buffer again.
3455                  */
3456                 if (iter->leftover)
3457                         p = iter;
3458                 else {
3459                         l = *pos - 1;
3460                         p = s_next(m, p, &l);
3461                 }
3462         }
3463
3464         trace_event_read_lock();
3465         trace_access_lock(cpu_file);
3466         return p;
3467 }
3468
3469 static void s_stop(struct seq_file *m, void *p)
3470 {
3471         struct trace_iterator *iter = m->private;
3472
3473 #ifdef CONFIG_TRACER_MAX_TRACE
3474         if (iter->snapshot && iter->trace->use_max_tr)
3475                 return;
3476 #endif
3477
3478         if (!iter->snapshot)
3479                 atomic_dec(&trace_record_taskinfo_disabled);
3480
3481         trace_access_unlock(iter->cpu_file);
3482         trace_event_read_unlock();
3483 }
3484
3485 static void
3486 get_total_entries(struct trace_buffer *buf,
3487                   unsigned long *total, unsigned long *entries)
3488 {
3489         unsigned long count;
3490         int cpu;
3491
3492         *total = 0;
3493         *entries = 0;
3494
3495         for_each_tracing_cpu(cpu) {
3496                 count = ring_buffer_entries_cpu(buf->buffer, cpu);
3497                 /*
3498                  * If this buffer has skipped entries, then we hold all
3499                  * entries for the trace and we need to ignore the
3500                  * ones before the time stamp.
3501                  */
3502                 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3503                         count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3504                         /* total is the same as the entries */
3505                         *total += count;
3506                 } else
3507                         *total += count +
3508                                 ring_buffer_overrun_cpu(buf->buffer, cpu);
3509                 *entries += count;
3510         }
3511 }
3512
3513 static void print_lat_help_header(struct seq_file *m)
3514 {
3515         seq_puts(m, "#                  _------=> CPU#            \n"
3516                     "#                 / _-----=> irqs-off        \n"
3517                     "#                | / _----=> need-resched    \n"
3518                     "#                || / _---=> hardirq/softirq \n"
3519                     "#                ||| / _--=> preempt-depth   \n"
3520                     "#                |||| /     delay            \n"
3521                     "#  cmd     pid   ||||| time  |   caller      \n"
3522                     "#     \\   /      |||||  \\    |   /         \n");
3523 }
3524
3525 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
3526 {
3527         unsigned long total;
3528         unsigned long entries;
3529
3530         get_total_entries(buf, &total, &entries);
3531         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3532                    entries, total, num_online_cpus());
3533         seq_puts(m, "#\n");
3534 }
3535
3536 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m,
3537                                    unsigned int flags)
3538 {
3539         bool tgid = flags & TRACE_ITER_RECORD_TGID;
3540
3541         print_event_info(buf, m);
3542
3543         seq_printf(m, "#           TASK-PID   %s  CPU#   TIMESTAMP  FUNCTION\n", tgid ? "TGID     " : "");
3544         seq_printf(m, "#              | |     %s    |       |         |\n",      tgid ? "  |      " : "");
3545 }
3546
3547 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m,
3548                                        unsigned int flags)
3549 {
3550         bool tgid = flags & TRACE_ITER_RECORD_TGID;
3551         const char tgid_space[] = "          ";
3552         const char space[] = "  ";
3553
3554         seq_printf(m, "#                          %s  _-----=> irqs-off\n",
3555                    tgid ? tgid_space : space);
3556         seq_printf(m, "#                          %s / _----=> need-resched\n",
3557                    tgid ? tgid_space : space);
3558         seq_printf(m, "#                          %s| / _---=> hardirq/softirq\n",
3559                    tgid ? tgid_space : space);
3560         seq_printf(m, "#                          %s|| / _--=> preempt-depth\n",
3561                    tgid ? tgid_space : space);
3562         seq_printf(m, "#                          %s||| /     delay\n",
3563                    tgid ? tgid_space : space);
3564         seq_printf(m, "#           TASK-PID %sCPU#  ||||    TIMESTAMP  FUNCTION\n",
3565                    tgid ? "   TGID   " : space);
3566         seq_printf(m, "#              | |   %s  |   ||||       |         |\n",
3567                    tgid ? "     |    " : space);
3568 }
3569
3570 void
3571 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3572 {
3573         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3574         struct trace_buffer *buf = iter->trace_buffer;
3575         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3576         struct tracer *type = iter->trace;
3577         unsigned long entries;
3578         unsigned long total;
3579         const char *name = "preemption";
3580
3581         name = type->name;
3582
3583         get_total_entries(buf, &total, &entries);
3584
3585         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3586                    name, UTS_RELEASE);
3587         seq_puts(m, "# -----------------------------------"
3588                  "---------------------------------\n");
3589         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3590                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3591                    nsecs_to_usecs(data->saved_latency),
3592                    entries,
3593                    total,
3594                    buf->cpu,
3595 #if defined(CONFIG_PREEMPT_NONE)
3596                    "server",
3597 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3598                    "desktop",
3599 #elif defined(CONFIG_PREEMPT)
3600                    "preempt",
3601 #else
3602                    "unknown",
3603 #endif
3604                    /* These are reserved for later use */
3605                    0, 0, 0, 0);
3606 #ifdef CONFIG_SMP
3607         seq_printf(m, " #P:%d)\n", num_online_cpus());
3608 #else
3609         seq_puts(m, ")\n");
3610 #endif
3611         seq_puts(m, "#    -----------------\n");
3612         seq_printf(m, "#    | task: %.16s-%d "
3613                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3614                    data->comm, data->pid,
3615                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3616                    data->policy, data->rt_priority);
3617         seq_puts(m, "#    -----------------\n");
3618
3619         if (data->critical_start) {
3620                 seq_puts(m, "#  => started at: ");
3621                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3622                 trace_print_seq(m, &iter->seq);
3623                 seq_puts(m, "\n#  => ended at:   ");
3624                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3625                 trace_print_seq(m, &iter->seq);
3626                 seq_puts(m, "\n#\n");
3627         }
3628
3629         seq_puts(m, "#\n");
3630 }
3631
3632 static void test_cpu_buff_start(struct trace_iterator *iter)
3633 {
3634         struct trace_seq *s = &iter->seq;
3635         struct trace_array *tr = iter->tr;
3636
3637         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3638                 return;
3639
3640         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3641                 return;
3642
3643         if (cpumask_available(iter->started) &&
3644             cpumask_test_cpu(iter->cpu, iter->started))
3645                 return;
3646
3647         if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3648                 return;
3649
3650         if (cpumask_available(iter->started))
3651                 cpumask_set_cpu(iter->cpu, iter->started);
3652
3653         /* Don't print started cpu buffer for the first entry of the trace */
3654         if (iter->idx > 1)
3655                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3656                                 iter->cpu);
3657 }
3658
3659 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3660 {
3661         struct trace_array *tr = iter->tr;
3662         struct trace_seq *s = &iter->seq;
3663         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3664         struct trace_entry *entry;
3665         struct trace_event *event;
3666
3667         entry = iter->ent;
3668
3669         test_cpu_buff_start(iter);
3670
3671         event = ftrace_find_event(entry->type);
3672
3673         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3674                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3675                         trace_print_lat_context(iter);
3676                 else
3677                         trace_print_context(iter);
3678         }
3679
3680         if (trace_seq_has_overflowed(s))
3681                 return TRACE_TYPE_PARTIAL_LINE;
3682
3683         if (event)
3684                 return event->funcs->trace(iter, sym_flags, event);
3685
3686         trace_seq_printf(s, "Unknown type %d\n", entry->type);
3687
3688         return trace_handle_return(s);
3689 }
3690
3691 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3692 {
3693         struct trace_array *tr = iter->tr;
3694         struct trace_seq *s = &iter->seq;
3695         struct trace_entry *entry;
3696         struct trace_event *event;
3697
3698         entry = iter->ent;
3699
3700         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3701                 trace_seq_printf(s, "%d %d %llu ",
3702                                  entry->pid, iter->cpu, iter->ts);
3703
3704         if (trace_seq_has_overflowed(s))
3705                 return TRACE_TYPE_PARTIAL_LINE;
3706
3707         event = ftrace_find_event(entry->type);
3708         if (event)
3709                 return event->funcs->raw(iter, 0, event);
3710
3711         trace_seq_printf(s, "%d ?\n", entry->type);
3712
3713         return trace_handle_return(s);
3714 }
3715
3716 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3717 {
3718         struct trace_array *tr = iter->tr;
3719         struct trace_seq *s = &iter->seq;
3720         unsigned char newline = '\n';
3721         struct trace_entry *entry;
3722         struct trace_event *event;
3723
3724         entry = iter->ent;
3725
3726         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3727                 SEQ_PUT_HEX_FIELD(s, entry->pid);
3728                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
3729                 SEQ_PUT_HEX_FIELD(s, iter->ts);
3730                 if (trace_seq_has_overflowed(s))
3731                         return TRACE_TYPE_PARTIAL_LINE;
3732         }
3733
3734         event = ftrace_find_event(entry->type);
3735         if (event) {
3736                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
3737                 if (ret != TRACE_TYPE_HANDLED)
3738                         return ret;
3739         }
3740
3741         SEQ_PUT_FIELD(s, newline);
3742
3743         return trace_handle_return(s);
3744 }
3745
3746 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3747 {
3748         struct trace_array *tr = iter->tr;
3749         struct trace_seq *s = &iter->seq;
3750         struct trace_entry *entry;
3751         struct trace_event *event;
3752
3753         entry = iter->ent;
3754
3755         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3756                 SEQ_PUT_FIELD(s, entry->pid);
3757                 SEQ_PUT_FIELD(s, iter->cpu);
3758                 SEQ_PUT_FIELD(s, iter->ts);
3759                 if (trace_seq_has_overflowed(s))
3760                         return TRACE_TYPE_PARTIAL_LINE;
3761         }
3762
3763         event = ftrace_find_event(entry->type);
3764         return event ? event->funcs->binary(iter, 0, event) :
3765                 TRACE_TYPE_HANDLED;
3766 }
3767
3768 int trace_empty(struct trace_iterator *iter)
3769 {
3770         struct ring_buffer_iter *buf_iter;
3771         int cpu;
3772
3773         /* If we are looking at one CPU buffer, only check that one */
3774         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3775                 cpu = iter->cpu_file;
3776                 buf_iter = trace_buffer_iter(iter, cpu);
3777                 if (buf_iter) {
3778                         if (!ring_buffer_iter_empty(buf_iter))
3779                                 return 0;
3780                 } else {
3781                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3782                                 return 0;
3783                 }
3784                 return 1;
3785         }
3786
3787         for_each_tracing_cpu(cpu) {
3788                 buf_iter = trace_buffer_iter(iter, cpu);
3789                 if (buf_iter) {
3790                         if (!ring_buffer_iter_empty(buf_iter))
3791                                 return 0;
3792                 } else {
3793                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3794                                 return 0;
3795                 }
3796         }
3797
3798         return 1;
3799 }
3800
3801 /*  Called with trace_event_read_lock() held. */
3802 enum print_line_t print_trace_line(struct trace_iterator *iter)
3803 {
3804         struct trace_array *tr = iter->tr;
3805         unsigned long trace_flags = tr->trace_flags;
3806         enum print_line_t ret;
3807
3808         if (iter->lost_events) {
3809                 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3810                                  iter->cpu, iter->lost_events);
3811                 if (trace_seq_has_overflowed(&iter->seq))
3812                         return TRACE_TYPE_PARTIAL_LINE;
3813         }
3814
3815         if (iter->trace && iter->trace->print_line) {
3816                 ret = iter->trace->print_line(iter);
3817                 if (ret != TRACE_TYPE_UNHANDLED)
3818                         return ret;
3819         }
3820
3821         if (iter->ent->type == TRACE_BPUTS &&
3822                         trace_flags & TRACE_ITER_PRINTK &&
3823                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3824                 return trace_print_bputs_msg_only(iter);
3825
3826         if (iter->ent->type == TRACE_BPRINT &&
3827                         trace_flags & TRACE_ITER_PRINTK &&
3828                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3829                 return trace_print_bprintk_msg_only(iter);
3830
3831         if (iter->ent->type == TRACE_PRINT &&
3832                         trace_flags & TRACE_ITER_PRINTK &&
3833                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3834                 return trace_print_printk_msg_only(iter);
3835
3836         if (trace_flags & TRACE_ITER_BIN)
3837                 return print_bin_fmt(iter);
3838
3839         if (trace_flags & TRACE_ITER_HEX)
3840                 return print_hex_fmt(iter);
3841
3842         if (trace_flags & TRACE_ITER_RAW)
3843                 return print_raw_fmt(iter);
3844
3845         return print_trace_fmt(iter);
3846 }
3847
3848 void trace_latency_header(struct seq_file *m)
3849 {
3850         struct trace_iterator *iter = m->private;
3851         struct trace_array *tr = iter->tr;
3852
3853         /* print nothing if the buffers are empty */
3854         if (trace_empty(iter))
3855                 return;
3856
3857         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3858                 print_trace_header(m, iter);
3859
3860         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
3861                 print_lat_help_header(m);
3862 }
3863
3864 void trace_default_header(struct seq_file *m)
3865 {
3866         struct trace_iterator *iter = m->private;
3867         struct trace_array *tr = iter->tr;
3868         unsigned long trace_flags = tr->trace_flags;
3869
3870         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3871                 return;
3872
3873         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3874                 /* print nothing if the buffers are empty */
3875                 if (trace_empty(iter))
3876                         return;
3877                 print_trace_header(m, iter);
3878                 if (!(trace_flags & TRACE_ITER_VERBOSE))
3879                         print_lat_help_header(m);
3880         } else {
3881                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
3882                         if (trace_flags & TRACE_ITER_IRQ_INFO)
3883                                 print_func_help_header_irq(iter->trace_buffer,
3884                                                            m, trace_flags);
3885                         else
3886                                 print_func_help_header(iter->trace_buffer, m,
3887                                                        trace_flags);
3888                 }
3889         }
3890 }
3891
3892 static void test_ftrace_alive(struct seq_file *m)
3893 {
3894         if (!ftrace_is_dead())
3895                 return;
3896         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3897                     "#          MAY BE MISSING FUNCTION EVENTS\n");
3898 }
3899
3900 #ifdef CONFIG_TRACER_MAX_TRACE
3901 static void show_snapshot_main_help(struct seq_file *m)
3902 {
3903         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3904                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3905                     "#                      Takes a snapshot of the main buffer.\n"
3906                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3907                     "#                      (Doesn't have to be '2' works with any number that\n"
3908                     "#                       is not a '0' or '1')\n");
3909 }
3910
3911 static void show_snapshot_percpu_help(struct seq_file *m)
3912 {
3913         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3914 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3915         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3916                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
3917 #else
3918         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3919                     "#                     Must use main snapshot file to allocate.\n");
3920 #endif
3921         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3922                     "#                      (Doesn't have to be '2' works with any number that\n"
3923                     "#                       is not a '0' or '1')\n");
3924 }
3925
3926 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3927 {
3928         if (iter->tr->allocated_snapshot)
3929                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3930         else
3931                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3932
3933         seq_puts(m, "# Snapshot commands:\n");
3934         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3935                 show_snapshot_main_help(m);
3936         else
3937                 show_snapshot_percpu_help(m);
3938 }
3939 #else
3940 /* Should never be called */
3941 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3942 #endif
3943
3944 static int s_show(struct seq_file *m, void *v)
3945 {
3946         struct trace_iterator *iter = v;
3947         int ret;
3948
3949         if (iter->ent == NULL) {
3950                 if (iter->tr) {
3951                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
3952                         seq_puts(m, "#\n");
3953                         test_ftrace_alive(m);
3954                 }
3955                 if (iter->snapshot && trace_empty(iter))
3956                         print_snapshot_help(m, iter);
3957                 else if (iter->trace && iter->trace->print_header)
3958                         iter->trace->print_header(m);
3959                 else
3960                         trace_default_header(m);
3961
3962         } else if (iter->leftover) {
3963                 /*
3964                  * If we filled the seq_file buffer earlier, we
3965                  * want to just show it now.
3966                  */
3967                 ret = trace_print_seq(m, &iter->seq);
3968
3969                 /* ret should this time be zero, but you never know */
3970                 iter->leftover = ret;
3971
3972         } else {
3973                 print_trace_line(iter);
3974                 ret = trace_print_seq(m, &iter->seq);
3975                 /*
3976                  * If we overflow the seq_file buffer, then it will
3977                  * ask us for this data again at start up.
3978                  * Use that instead.
3979                  *  ret is 0 if seq_file write succeeded.
3980                  *        -1 otherwise.
3981                  */
3982                 iter->leftover = ret;
3983         }
3984
3985         return 0;
3986 }
3987
3988 /*
3989  * Should be used after trace_array_get(), trace_types_lock
3990  * ensures that i_cdev was already initialized.
3991  */
3992 static inline int tracing_get_cpu(struct inode *inode)
3993 {
3994         if (inode->i_cdev) /* See trace_create_cpu_file() */
3995                 return (long)inode->i_cdev - 1;
3996         return RING_BUFFER_ALL_CPUS;
3997 }
3998
3999 static const struct seq_operations tracer_seq_ops = {
4000         .start          = s_start,
4001         .next           = s_next,
4002         .stop           = s_stop,
4003         .show           = s_show,
4004 };
4005
4006 static struct trace_iterator *
4007 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4008 {
4009         struct trace_array *tr = inode->i_private;
4010         struct trace_iterator *iter;
4011         int cpu;
4012
4013         if (tracing_disabled)
4014                 return ERR_PTR(-ENODEV);
4015
4016         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4017         if (!iter)
4018                 return ERR_PTR(-ENOMEM);
4019
4020         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4021                                     GFP_KERNEL);
4022         if (!iter->buffer_iter)
4023                 goto release;
4024
4025         /*
4026          * We make a copy of the current tracer to avoid concurrent
4027          * changes on it while we are reading.
4028          */
4029         mutex_lock(&trace_types_lock);
4030         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4031         if (!iter->trace)
4032                 goto fail;
4033
4034         *iter->trace = *tr->current_trace;
4035
4036         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4037                 goto fail;
4038
4039         iter->tr = tr;
4040
4041 #ifdef CONFIG_TRACER_MAX_TRACE
4042         /* Currently only the top directory has a snapshot */
4043         if (tr->current_trace->print_max || snapshot)
4044                 iter->trace_buffer = &tr->max_buffer;
4045         else
4046 #endif
4047                 iter->trace_buffer = &tr->trace_buffer;
4048         iter->snapshot = snapshot;
4049         iter->pos = -1;
4050         iter->cpu_file = tracing_get_cpu(inode);
4051         mutex_init(&iter->mutex);
4052
4053         /* Notify the tracer early; before we stop tracing. */
4054         if (iter->trace && iter->trace->open)
4055                 iter->trace->open(iter);
4056
4057         /* Annotate start of buffers if we had overruns */
4058         if (ring_buffer_overruns(iter->trace_buffer->buffer))
4059                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4060
4061         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4062         if (trace_clocks[tr->clock_id].in_ns)
4063                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4064
4065         /* stop the trace while dumping if we are not opening "snapshot" */
4066         if (!iter->snapshot)
4067                 tracing_stop_tr(tr);
4068
4069         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4070                 for_each_tracing_cpu(cpu) {
4071                         iter->buffer_iter[cpu] =
4072                                 ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
4073                 }
4074                 ring_buffer_read_prepare_sync();
4075                 for_each_tracing_cpu(cpu) {
4076                         ring_buffer_read_start(iter->buffer_iter[cpu]);
4077                         tracing_iter_reset(iter, cpu);
4078                 }
4079         } else {
4080                 cpu = iter->cpu_file;
4081                 iter->buffer_iter[cpu] =
4082                         ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
4083                 ring_buffer_read_prepare_sync();
4084                 ring_buffer_read_start(iter->buffer_iter[cpu]);
4085                 tracing_iter_reset(iter, cpu);
4086         }
4087
4088         mutex_unlock(&trace_types_lock);
4089
4090         return iter;
4091
4092  fail:
4093         mutex_unlock(&trace_types_lock);
4094         kfree(iter->trace);
4095         kfree(iter->buffer_iter);
4096 release:
4097         seq_release_private(inode, file);
4098         return ERR_PTR(-ENOMEM);
4099 }
4100
4101 int tracing_open_generic(struct inode *inode, struct file *filp)
4102 {
4103         if (tracing_disabled)
4104                 return -ENODEV;
4105
4106         filp->private_data = inode->i_private;
4107         return 0;
4108 }
4109
4110 bool tracing_is_disabled(void)
4111 {
4112         return (tracing_disabled) ? true: false;
4113 }
4114
4115 /*
4116  * Open and update trace_array ref count.
4117  * Must have the current trace_array passed to it.
4118  */
4119 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4120 {
4121         struct trace_array *tr = inode->i_private;
4122
4123         if (tracing_disabled)
4124                 return -ENODEV;
4125
4126         if (trace_array_get(tr) < 0)
4127                 return -ENODEV;
4128
4129         filp->private_data = inode->i_private;
4130
4131         return 0;
4132 }
4133
4134 static int tracing_release(struct inode *inode, struct file *file)
4135 {
4136         struct trace_array *tr = inode->i_private;
4137         struct seq_file *m = file->private_data;
4138         struct trace_iterator *iter;
4139         int cpu;
4140
4141         if (!(file->f_mode & FMODE_READ)) {
4142                 trace_array_put(tr);
4143                 return 0;
4144         }
4145
4146         /* Writes do not use seq_file */
4147         iter = m->private;
4148         mutex_lock(&trace_types_lock);
4149
4150         for_each_tracing_cpu(cpu) {
4151                 if (iter->buffer_iter[cpu])
4152                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
4153         }
4154
4155         if (iter->trace && iter->trace->close)
4156                 iter->trace->close(iter);
4157
4158         if (!iter->snapshot)
4159                 /* reenable tracing if it was previously enabled */
4160                 tracing_start_tr(tr);
4161
4162         __trace_array_put(tr);
4163
4164         mutex_unlock(&trace_types_lock);
4165
4166         mutex_destroy(&iter->mutex);
4167         free_cpumask_var(iter->started);
4168         kfree(iter->trace);
4169         kfree(iter->buffer_iter);
4170         seq_release_private(inode, file);
4171
4172         return 0;
4173 }
4174
4175 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4176 {
4177         struct trace_array *tr = inode->i_private;
4178
4179         trace_array_put(tr);
4180         return 0;
4181 }
4182
4183 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4184 {
4185         struct trace_array *tr = inode->i_private;
4186
4187         trace_array_put(tr);
4188
4189         return single_release(inode, file);
4190 }
4191
4192 static int tracing_open(struct inode *inode, struct file *file)
4193 {
4194         struct trace_array *tr = inode->i_private;
4195         struct trace_iterator *iter;
4196         int ret = 0;
4197
4198         if (trace_array_get(tr) < 0)
4199                 return -ENODEV;
4200
4201         /* If this file was open for write, then erase contents */
4202         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4203                 int cpu = tracing_get_cpu(inode);
4204                 struct trace_buffer *trace_buf = &tr->trace_buffer;
4205
4206 #ifdef CONFIG_TRACER_MAX_TRACE
4207                 if (tr->current_trace->print_max)
4208                         trace_buf = &tr->max_buffer;
4209 #endif
4210
4211                 if (cpu == RING_BUFFER_ALL_CPUS)
4212                         tracing_reset_online_cpus(trace_buf);
4213                 else
4214                         tracing_reset(trace_buf, cpu);
4215         }
4216
4217         if (file->f_mode & FMODE_READ) {
4218                 iter = __tracing_open(inode, file, false);
4219                 if (IS_ERR(iter))
4220                         ret = PTR_ERR(iter);
4221                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4222                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
4223         }
4224
4225         if (ret < 0)
4226                 trace_array_put(tr);
4227
4228         return ret;
4229 }
4230
4231 /*
4232  * Some tracers are not suitable for instance buffers.
4233  * A tracer is always available for the global array (toplevel)
4234  * or if it explicitly states that it is.
4235  */
4236 static bool
4237 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4238 {
4239         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4240 }
4241
4242 /* Find the next tracer that this trace array may use */
4243 static struct tracer *
4244 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4245 {
4246         while (t && !trace_ok_for_array(t, tr))
4247                 t = t->next;
4248
4249         return t;
4250 }
4251
4252 static void *
4253 t_next(struct seq_file *m, void *v, loff_t *pos)
4254 {
4255         struct trace_array *tr = m->private;
4256         struct tracer *t = v;
4257
4258         (*pos)++;
4259
4260         if (t)
4261                 t = get_tracer_for_array(tr, t->next);
4262
4263         return t;
4264 }
4265
4266 static void *t_start(struct seq_file *m, loff_t *pos)
4267 {
4268         struct trace_array *tr = m->private;
4269         struct tracer *t;
4270         loff_t l = 0;
4271
4272         mutex_lock(&trace_types_lock);
4273
4274         t = get_tracer_for_array(tr, trace_types);
4275         for (; t && l < *pos; t = t_next(m, t, &l))
4276                         ;
4277
4278         return t;
4279 }
4280
4281 static void t_stop(struct seq_file *m, void *p)
4282 {
4283         mutex_unlock(&trace_types_lock);
4284 }
4285
4286 static int t_show(struct seq_file *m, void *v)
4287 {
4288         struct tracer *t = v;
4289
4290         if (!t)
4291                 return 0;
4292
4293         seq_puts(m, t->name);
4294         if (t->next)
4295                 seq_putc(m, ' ');
4296         else
4297                 seq_putc(m, '\n');
4298
4299         return 0;
4300 }
4301
4302 static const struct seq_operations show_traces_seq_ops = {
4303         .start          = t_start,
4304         .next           = t_next,
4305         .stop           = t_stop,
4306         .show           = t_show,
4307 };
4308
4309 static int show_traces_open(struct inode *inode, struct file *file)
4310 {
4311         struct trace_array *tr = inode->i_private;
4312         struct seq_file *m;
4313         int ret;
4314
4315         if (tracing_disabled)
4316                 return -ENODEV;
4317
4318         ret = seq_open(file, &show_traces_seq_ops);
4319         if (ret)
4320                 return ret;
4321
4322         m = file->private_data;
4323         m->private = tr;
4324
4325         return 0;
4326 }
4327
4328 static ssize_t
4329 tracing_write_stub(struct file *filp, const char __user *ubuf,
4330                    size_t count, loff_t *ppos)
4331 {
4332         return count;
4333 }
4334
4335 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4336 {
4337         int ret;
4338
4339         if (file->f_mode & FMODE_READ)
4340                 ret = seq_lseek(file, offset, whence);
4341         else
4342                 file->f_pos = ret = 0;
4343
4344         return ret;
4345 }
4346
4347 static const struct file_operations tracing_fops = {
4348         .open           = tracing_open,
4349         .read           = seq_read,
4350         .write          = tracing_write_stub,
4351         .llseek         = tracing_lseek,
4352         .release        = tracing_release,
4353 };
4354
4355 static const struct file_operations show_traces_fops = {
4356         .open           = show_traces_open,
4357         .read           = seq_read,
4358         .release        = seq_release,
4359         .llseek         = seq_lseek,
4360 };
4361
4362 static ssize_t
4363 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4364                      size_t count, loff_t *ppos)
4365 {
4366         struct trace_array *tr = file_inode(filp)->i_private;
4367         char *mask_str;
4368         int len;
4369
4370         len = snprintf(NULL, 0, "%*pb\n",
4371                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
4372         mask_str = kmalloc(len, GFP_KERNEL);
4373         if (!mask_str)
4374                 return -ENOMEM;
4375
4376         len = snprintf(mask_str, len, "%*pb\n",
4377                        cpumask_pr_args(tr->tracing_cpumask));
4378         if (len >= count) {
4379                 count = -EINVAL;
4380                 goto out_err;
4381         }
4382         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4383
4384 out_err:
4385         kfree(mask_str);
4386
4387         return count;
4388 }
4389
4390 static ssize_t
4391 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4392                       size_t count, loff_t *ppos)
4393 {
4394         struct trace_array *tr = file_inode(filp)->i_private;
4395         cpumask_var_t tracing_cpumask_new;
4396         int err, cpu;
4397
4398         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4399                 return -ENOMEM;
4400
4401         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4402         if (err)
4403                 goto err_unlock;
4404
4405         local_irq_disable();
4406         arch_spin_lock(&tr->max_lock);
4407         for_each_tracing_cpu(cpu) {
4408                 /*
4409                  * Increase/decrease the disabled counter if we are
4410                  * about to flip a bit in the cpumask:
4411                  */
4412                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4413                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4414                         atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4415                         ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
4416                 }
4417                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4418                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4419                         atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4420                         ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
4421                 }
4422         }
4423         arch_spin_unlock(&tr->max_lock);
4424         local_irq_enable();
4425
4426         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4427         free_cpumask_var(tracing_cpumask_new);
4428
4429         return count;
4430
4431 err_unlock:
4432         free_cpumask_var(tracing_cpumask_new);
4433
4434         return err;
4435 }
4436
4437 static const struct file_operations tracing_cpumask_fops = {
4438         .open           = tracing_open_generic_tr,
4439         .read           = tracing_cpumask_read,
4440         .write          = tracing_cpumask_write,
4441         .release        = tracing_release_generic_tr,
4442         .llseek         = generic_file_llseek,
4443 };
4444
4445 static int tracing_trace_options_show(struct seq_file *m, void *v)
4446 {
4447         struct tracer_opt *trace_opts;
4448         struct trace_array *tr = m->private;
4449         u32 tracer_flags;
4450         int i;
4451
4452         mutex_lock(&trace_types_lock);
4453         tracer_flags = tr->current_trace->flags->val;
4454         trace_opts = tr->current_trace->flags->opts;
4455
4456         for (i = 0; trace_options[i]; i++) {
4457                 if (tr->trace_flags & (1 << i))
4458                         seq_printf(m, "%s\n", trace_options[i]);
4459                 else
4460                         seq_printf(m, "no%s\n", trace_options[i]);
4461         }
4462
4463         for (i = 0; trace_opts[i].name; i++) {
4464                 if (tracer_flags & trace_opts[i].bit)
4465                         seq_printf(m, "%s\n", trace_opts[i].name);
4466                 else
4467                         seq_printf(m, "no%s\n", trace_opts[i].name);
4468         }
4469         mutex_unlock(&trace_types_lock);
4470
4471         return 0;
4472 }
4473
4474 static int __set_tracer_option(struct trace_array *tr,
4475                                struct tracer_flags *tracer_flags,
4476                                struct tracer_opt *opts, int neg)
4477 {
4478         struct tracer *trace = tracer_flags->trace;
4479         int ret;
4480
4481         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4482         if (ret)
4483                 return ret;
4484
4485         if (neg)
4486                 tracer_flags->val &= ~opts->bit;
4487         else
4488                 tracer_flags->val |= opts->bit;
4489         return 0;
4490 }
4491
4492 /* Try to assign a tracer specific option */
4493 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4494 {
4495         struct tracer *trace = tr->current_trace;
4496         struct tracer_flags *tracer_flags = trace->flags;
4497         struct tracer_opt *opts = NULL;
4498         int i;
4499
4500         for (i = 0; tracer_flags->opts[i].name; i++) {
4501                 opts = &tracer_flags->opts[i];
4502
4503                 if (strcmp(cmp, opts->name) == 0)
4504                         return __set_tracer_option(tr, trace->flags, opts, neg);
4505         }
4506
4507         return -EINVAL;
4508 }
4509
4510 /* Some tracers require overwrite to stay enabled */
4511 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4512 {
4513         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4514                 return -1;
4515
4516         return 0;
4517 }
4518
4519 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4520 {
4521         /* do nothing if flag is already set */
4522         if (!!(tr->trace_flags & mask) == !!enabled)
4523                 return 0;
4524
4525         /* Give the tracer a chance to approve the change */
4526         if (tr->current_trace->flag_changed)
4527                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4528                         return -EINVAL;
4529
4530         if (enabled)
4531                 tr->trace_flags |= mask;
4532         else
4533                 tr->trace_flags &= ~mask;
4534
4535         if (mask == TRACE_ITER_RECORD_CMD)
4536                 trace_event_enable_cmd_record(enabled);
4537
4538         if (mask == TRACE_ITER_RECORD_TGID) {
4539                 if (!tgid_map)
4540                         tgid_map = kcalloc(PID_MAX_DEFAULT + 1,
4541                                            sizeof(*tgid_map),
4542                                            GFP_KERNEL);
4543                 if (!tgid_map) {
4544                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4545                         return -ENOMEM;
4546                 }
4547
4548                 trace_event_enable_tgid_record(enabled);
4549         }
4550
4551         if (mask == TRACE_ITER_EVENT_FORK)
4552                 trace_event_follow_fork(tr, enabled);
4553
4554         if (mask == TRACE_ITER_FUNC_FORK)
4555                 ftrace_pid_follow_fork(tr, enabled);
4556
4557         if (mask == TRACE_ITER_OVERWRITE) {
4558                 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
4559 #ifdef CONFIG_TRACER_MAX_TRACE
4560                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4561 #endif
4562         }
4563
4564         if (mask == TRACE_ITER_PRINTK) {
4565                 trace_printk_start_stop_comm(enabled);
4566                 trace_printk_control(enabled);
4567         }
4568
4569         return 0;
4570 }
4571
4572 static int trace_set_options(struct trace_array *tr, char *option)
4573 {
4574         char *cmp;
4575         int neg = 0;
4576         int ret;
4577         size_t orig_len = strlen(option);
4578         int len;
4579
4580         cmp = strstrip(option);
4581
4582         len = str_has_prefix(cmp, "no");
4583         if (len)
4584                 neg = 1;
4585
4586         cmp += len;
4587
4588         mutex_lock(&trace_types_lock);
4589
4590         ret = match_string(trace_options, -1, cmp);
4591         /* If no option could be set, test the specific tracer options */
4592         if (ret < 0)
4593                 ret = set_tracer_option(tr, cmp, neg);
4594         else
4595                 ret = set_tracer_flag(tr, 1 << ret, !neg);
4596
4597         mutex_unlock(&trace_types_lock);
4598
4599         /*
4600          * If the first trailing whitespace is replaced with '\0' by strstrip,
4601          * turn it back into a space.
4602          */
4603         if (orig_len > strlen(option))
4604                 option[strlen(option)] = ' ';
4605
4606         return ret;
4607 }
4608
4609 static void __init apply_trace_boot_options(void)
4610 {
4611         char *buf = trace_boot_options_buf;
4612         char *option;
4613
4614         while (true) {
4615                 option = strsep(&buf, ",");
4616
4617                 if (!option)
4618                         break;
4619
4620                 if (*option)
4621                         trace_set_options(&global_trace, option);
4622
4623                 /* Put back the comma to allow this to be called again */
4624                 if (buf)
4625                         *(buf - 1) = ',';
4626         }
4627 }
4628
4629 static ssize_t
4630 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4631                         size_t cnt, loff_t *ppos)
4632 {
4633         struct seq_file *m = filp->private_data;
4634         struct trace_array *tr = m->private;
4635         char buf[64];
4636         int ret;
4637
4638         if (cnt >= sizeof(buf))
4639                 return -EINVAL;
4640
4641         if (copy_from_user(buf, ubuf, cnt))
4642                 return -EFAULT;
4643
4644         buf[cnt] = 0;
4645
4646         ret = trace_set_options(tr, buf);
4647         if (ret < 0)
4648                 return ret;
4649
4650         *ppos += cnt;
4651
4652         return cnt;
4653 }
4654
4655 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4656 {
4657         struct trace_array *tr = inode->i_private;
4658         int ret;
4659
4660         if (tracing_disabled)
4661                 return -ENODEV;
4662
4663         if (trace_array_get(tr) < 0)
4664                 return -ENODEV;
4665
4666         ret = single_open(file, tracing_trace_options_show, inode->i_private);
4667         if (ret < 0)
4668                 trace_array_put(tr);
4669
4670         return ret;
4671 }
4672
4673 static const struct file_operations tracing_iter_fops = {
4674         .open           = tracing_trace_options_open,
4675         .read           = seq_read,
4676         .llseek         = seq_lseek,
4677         .release        = tracing_single_release_tr,
4678         .write          = tracing_trace_options_write,
4679 };
4680
4681 static const char readme_msg[] =
4682         "tracing mini-HOWTO:\n\n"
4683         "# echo 0 > tracing_on : quick way to disable tracing\n"
4684         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4685         " Important files:\n"
4686         "  trace\t\t\t- The static contents of the buffer\n"
4687         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
4688         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4689         "  current_tracer\t- function and latency tracers\n"
4690         "  available_tracers\t- list of configured tracers for current_tracer\n"
4691         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4692         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4693         "  trace_clock\t\t-change the clock used to order events\n"
4694         "       local:   Per cpu clock but may not be synced across CPUs\n"
4695         "      global:   Synced across CPUs but slows tracing down.\n"
4696         "     counter:   Not a clock, but just an increment\n"
4697         "      uptime:   Jiffy counter from time of boot\n"
4698         "        perf:   Same clock that perf events use\n"
4699 #ifdef CONFIG_X86_64
4700         "     x86-tsc:   TSC cycle counter\n"
4701 #endif
4702         "\n  timestamp_mode\t-view the mode used to timestamp events\n"
4703         "       delta:   Delta difference against a buffer-wide timestamp\n"
4704         "    absolute:   Absolute (standalone) timestamp\n"
4705         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4706         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4707         "  tracing_cpumask\t- Limit which CPUs to trace\n"
4708         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4709         "\t\t\t  Remove sub-buffer with rmdir\n"
4710         "  trace_options\t\t- Set format or modify how tracing happens\n"
4711         "\t\t\t  Disable an option by adding a suffix 'no' to the\n"
4712         "\t\t\t  option name\n"
4713         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4714 #ifdef CONFIG_DYNAMIC_FTRACE
4715         "\n  available_filter_functions - list of functions that can be filtered on\n"
4716         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
4717         "\t\t\t  functions\n"
4718         "\t     accepts: func_full_name or glob-matching-pattern\n"
4719         "\t     modules: Can select a group via module\n"
4720         "\t      Format: :mod:<module-name>\n"
4721         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
4722         "\t    triggers: a command to perform when function is hit\n"
4723         "\t      Format: <function>:<trigger>[:count]\n"
4724         "\t     trigger: traceon, traceoff\n"
4725         "\t\t      enable_event:<system>:<event>\n"
4726         "\t\t      disable_event:<system>:<event>\n"
4727 #ifdef CONFIG_STACKTRACE
4728         "\t\t      stacktrace\n"
4729 #endif
4730 #ifdef CONFIG_TRACER_SNAPSHOT
4731         "\t\t      snapshot\n"
4732 #endif
4733         "\t\t      dump\n"
4734         "\t\t      cpudump\n"
4735         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
4736         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
4737         "\t     The first one will disable tracing every time do_fault is hit\n"
4738         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
4739         "\t       The first time do trap is hit and it disables tracing, the\n"
4740         "\t       counter will decrement to 2. If tracing is already disabled,\n"
4741         "\t       the counter will not decrement. It only decrements when the\n"
4742         "\t       trigger did work\n"
4743         "\t     To remove trigger without count:\n"
4744         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
4745         "\t     To remove trigger with a count:\n"
4746         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4747         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
4748         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4749         "\t    modules: Can select a group via module command :mod:\n"
4750         "\t    Does not accept triggers\n"
4751 #endif /* CONFIG_DYNAMIC_FTRACE */
4752 #ifdef CONFIG_FUNCTION_TRACER
4753         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4754         "\t\t    (function)\n"
4755 #endif
4756 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4757         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4758         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4759         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4760 #endif
4761 #ifdef CONFIG_TRACER_SNAPSHOT
4762         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
4763         "\t\t\t  snapshot buffer. Read the contents for more\n"
4764         "\t\t\t  information\n"
4765 #endif
4766 #ifdef CONFIG_STACK_TRACER
4767         "  stack_trace\t\t- Shows the max stack trace when active\n"
4768         "  stack_max_size\t- Shows current max stack size that was traced\n"
4769         "\t\t\t  Write into this file to reset the max size (trigger a\n"
4770         "\t\t\t  new trace)\n"
4771 #ifdef CONFIG_DYNAMIC_FTRACE
4772         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4773         "\t\t\t  traces\n"
4774 #endif
4775 #endif /* CONFIG_STACK_TRACER */
4776 #ifdef CONFIG_DYNAMIC_EVENTS
4777         "  dynamic_events\t\t- Add/remove/show the generic dynamic events\n"
4778         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4779 #endif
4780 #ifdef CONFIG_KPROBE_EVENTS
4781         "  kprobe_events\t\t- Add/remove/show the kernel dynamic events\n"
4782         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4783 #endif
4784 #ifdef CONFIG_UPROBE_EVENTS
4785         "  uprobe_events\t\t- Add/remove/show the userspace dynamic events\n"
4786         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4787 #endif
4788 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
4789         "\t  accepts: event-definitions (one definition per line)\n"
4790         "\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
4791         "\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
4792 #ifdef CONFIG_HIST_TRIGGERS
4793         "\t           s:[synthetic/]<event> <field> [<field>]\n"
4794 #endif
4795         "\t           -:[<group>/]<event>\n"
4796 #ifdef CONFIG_KPROBE_EVENTS
4797         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4798   "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4799 #endif
4800 #ifdef CONFIG_UPROBE_EVENTS
4801   "   place (uprobe): <path>:<offset>[(ref_ctr_offset)]\n"
4802 #endif
4803         "\t     args: <name>=fetcharg[:type]\n"
4804         "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
4805 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
4806         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>\n"
4807 #else
4808         "\t           $stack<index>, $stack, $retval, $comm\n"
4809 #endif
4810         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
4811         "\t           b<bit-width>@<bit-offset>/<container-size>,\n"
4812         "\t           <type>\\[<array-size>\\]\n"
4813 #ifdef CONFIG_HIST_TRIGGERS
4814         "\t    field: <stype> <name>;\n"
4815         "\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
4816         "\t           [unsigned] char/int/long\n"
4817 #endif
4818 #endif
4819         "  events/\t\t- Directory containing all trace event subsystems:\n"
4820         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4821         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
4822         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4823         "\t\t\t  events\n"
4824         "      filter\t\t- If set, only events passing filter are traced\n"
4825         "  events/<system>/<event>/\t- Directory containing control files for\n"
4826         "\t\t\t  <event>:\n"
4827         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4828         "      filter\t\t- If set, only events passing filter are traced\n"
4829         "      trigger\t\t- If set, a command to perform when event is hit\n"
4830         "\t    Format: <trigger>[:count][if <filter>]\n"
4831         "\t   trigger: traceon, traceoff\n"
4832         "\t            enable_event:<system>:<event>\n"
4833         "\t            disable_event:<system>:<event>\n"
4834 #ifdef CONFIG_HIST_TRIGGERS
4835         "\t            enable_hist:<system>:<event>\n"
4836         "\t            disable_hist:<system>:<event>\n"
4837 #endif
4838 #ifdef CONFIG_STACKTRACE
4839         "\t\t    stacktrace\n"
4840 #endif
4841 #ifdef CONFIG_TRACER_SNAPSHOT
4842         "\t\t    snapshot\n"
4843 #endif
4844 #ifdef CONFIG_HIST_TRIGGERS
4845         "\t\t    hist (see below)\n"
4846 #endif
4847         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
4848         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
4849         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4850         "\t                  events/block/block_unplug/trigger\n"
4851         "\t   The first disables tracing every time block_unplug is hit.\n"
4852         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
4853         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
4854         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4855         "\t   Like function triggers, the counter is only decremented if it\n"
4856         "\t    enabled or disabled tracing.\n"
4857         "\t   To remove a trigger without a count:\n"
4858         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
4859         "\t   To remove a trigger with a count:\n"
4860         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
4861         "\t   Filters can be ignored when removing a trigger.\n"
4862 #ifdef CONFIG_HIST_TRIGGERS
4863         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
4864         "\t    Format: hist:keys=<field1[,field2,...]>\n"
4865         "\t            [:values=<field1[,field2,...]>]\n"
4866         "\t            [:sort=<field1[,field2,...]>]\n"
4867         "\t            [:size=#entries]\n"
4868         "\t            [:pause][:continue][:clear]\n"
4869         "\t            [:name=histname1]\n"
4870         "\t            [:<handler>.<action>]\n"
4871         "\t            [if <filter>]\n\n"
4872         "\t    When a matching event is hit, an entry is added to a hash\n"
4873         "\t    table using the key(s) and value(s) named, and the value of a\n"
4874         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
4875         "\t    correspond to fields in the event's format description.  Keys\n"
4876         "\t    can be any field, or the special string 'stacktrace'.\n"
4877         "\t    Compound keys consisting of up to two fields can be specified\n"
4878         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
4879         "\t    fields.  Sort keys consisting of up to two fields can be\n"
4880         "\t    specified using the 'sort' keyword.  The sort direction can\n"
4881         "\t    be modified by appending '.descending' or '.ascending' to a\n"
4882         "\t    sort field.  The 'size' parameter can be used to specify more\n"
4883         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
4884         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
4885         "\t    its histogram data will be shared with other triggers of the\n"
4886         "\t    same name, and trigger hits will update this common data.\n\n"
4887         "\t    Reading the 'hist' file for the event will dump the hash\n"
4888         "\t    table in its entirety to stdout.  If there are multiple hist\n"
4889         "\t    triggers attached to an event, there will be a table for each\n"
4890         "\t    trigger in the output.  The table displayed for a named\n"
4891         "\t    trigger will be the same as any other instance having the\n"
4892         "\t    same name.  The default format used to display a given field\n"
4893         "\t    can be modified by appending any of the following modifiers\n"
4894         "\t    to the field name, as applicable:\n\n"
4895         "\t            .hex        display a number as a hex value\n"
4896         "\t            .sym        display an address as a symbol\n"
4897         "\t            .sym-offset display an address as a symbol and offset\n"
4898         "\t            .execname   display a common_pid as a program name\n"
4899         "\t            .syscall    display a syscall id as a syscall name\n"
4900         "\t            .log2       display log2 value rather than raw number\n"
4901         "\t            .usecs      display a common_timestamp in microseconds\n\n"
4902         "\t    The 'pause' parameter can be used to pause an existing hist\n"
4903         "\t    trigger or to start a hist trigger but not log any events\n"
4904         "\t    until told to do so.  'continue' can be used to start or\n"
4905         "\t    restart a paused hist trigger.\n\n"
4906         "\t    The 'clear' parameter will clear the contents of a running\n"
4907         "\t    hist trigger and leave its current paused/active state\n"
4908         "\t    unchanged.\n\n"
4909         "\t    The enable_hist and disable_hist triggers can be used to\n"
4910         "\t    have one event conditionally start and stop another event's\n"
4911         "\t    already-attached hist trigger.  The syntax is analagous to\n"
4912         "\t    the enable_event and disable_event triggers.\n\n"
4913         "\t    Hist trigger handlers and actions are executed whenever a\n"
4914         "\t    a histogram entry is added or updated.  They take the form:\n\n"
4915         "\t        <handler>.<action>\n\n"
4916         "\t    The available handlers are:\n\n"
4917         "\t        onmatch(matching.event)  - invoke on addition or update\n"
4918         "\t        onmax(var)               - invoke if var exceeds current max\n\n"
4919         "\t    The available actions are:\n\n"
4920         "\t        <synthetic_event>(param list)        - generate synthetic event\n"
4921         "\t        save(field,...)                      - save current event fields\n"
4922 #endif
4923 ;
4924
4925 static ssize_t
4926 tracing_readme_read(struct file *filp, char __user *ubuf,
4927                        size_t cnt, loff_t *ppos)
4928 {
4929         return simple_read_from_buffer(ubuf, cnt, ppos,
4930                                         readme_msg, strlen(readme_msg));
4931 }
4932
4933 static const struct file_operations tracing_readme_fops = {
4934         .open           = tracing_open_generic,
4935         .read           = tracing_readme_read,
4936         .llseek         = generic_file_llseek,
4937 };
4938
4939 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
4940 {
4941         int *ptr = v;
4942
4943         if (*pos || m->count)
4944                 ptr++;
4945
4946         (*pos)++;
4947
4948         for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
4949                 if (trace_find_tgid(*ptr))
4950                         return ptr;
4951         }
4952
4953         return NULL;
4954 }
4955
4956 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
4957 {
4958         void *v;
4959         loff_t l = 0;
4960
4961         if (!tgid_map)
4962                 return NULL;
4963
4964         v = &tgid_map[0];
4965         while (l <= *pos) {
4966                 v = saved_tgids_next(m, v, &l);
4967                 if (!v)
4968                         return NULL;
4969         }
4970
4971         return v;
4972 }
4973
4974 static void saved_tgids_stop(struct seq_file *m, void *v)
4975 {
4976 }
4977
4978 static int saved_tgids_show(struct seq_file *m, void *v)
4979 {
4980         int pid = (int *)v - tgid_map;
4981
4982         seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
4983         return 0;
4984 }
4985
4986 static const struct seq_operations tracing_saved_tgids_seq_ops = {
4987         .start          = saved_tgids_start,
4988         .stop           = saved_tgids_stop,
4989         .next           = saved_tgids_next,
4990         .show           = saved_tgids_show,
4991 };
4992
4993 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
4994 {
4995         if (tracing_disabled)
4996                 return -ENODEV;
4997
4998         return seq_open(filp, &tracing_saved_tgids_seq_ops);
4999 }
5000
5001
5002 static const struct file_operations tracing_saved_tgids_fops = {
5003         .open           = tracing_saved_tgids_open,
5004         .read           = seq_read,
5005         .llseek         = seq_lseek,
5006         .release        = seq_release,
5007 };
5008
5009 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5010 {
5011         unsigned int *ptr = v;
5012
5013         if (*pos || m->count)
5014                 ptr++;
5015
5016         (*pos)++;
5017
5018         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5019              ptr++) {
5020                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5021                         continue;
5022
5023                 return ptr;
5024         }
5025
5026         return NULL;
5027 }
5028
5029 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5030 {
5031         void *v;
5032         loff_t l = 0;
5033
5034         preempt_disable();
5035         arch_spin_lock(&trace_cmdline_lock);
5036
5037         v = &savedcmd->map_cmdline_to_pid[0];
5038         while (l <= *pos) {
5039                 v = saved_cmdlines_next(m, v, &l);
5040                 if (!v)
5041                         return NULL;
5042         }
5043
5044         return v;
5045 }
5046
5047 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5048 {
5049         arch_spin_unlock(&trace_cmdline_lock);
5050         preempt_enable();
5051 }
5052
5053 static int saved_cmdlines_show(struct seq_file *m, void *v)
5054 {
5055         char buf[TASK_COMM_LEN];
5056         unsigned int *pid = v;
5057
5058         __trace_find_cmdline(*pid, buf);
5059         seq_printf(m, "%d %s\n", *pid, buf);
5060         return 0;
5061 }
5062
5063 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5064         .start          = saved_cmdlines_start,
5065         .next           = saved_cmdlines_next,
5066         .stop           = saved_cmdlines_stop,
5067         .show           = saved_cmdlines_show,
5068 };
5069
5070 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5071 {
5072         if (tracing_disabled)
5073                 return -ENODEV;
5074
5075         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5076 }
5077
5078 static const struct file_operations tracing_saved_cmdlines_fops = {
5079         .open           = tracing_saved_cmdlines_open,
5080         .read           = seq_read,
5081         .llseek         = seq_lseek,
5082         .release        = seq_release,
5083 };
5084
5085 static ssize_t
5086 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5087                                  size_t cnt, loff_t *ppos)
5088 {
5089         char buf[64];
5090         int r;
5091
5092         arch_spin_lock(&trace_cmdline_lock);
5093         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5094         arch_spin_unlock(&trace_cmdline_lock);
5095
5096         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5097 }
5098
5099 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5100 {
5101         kfree(s->saved_cmdlines);
5102         kfree(s->map_cmdline_to_pid);
5103         kfree(s);
5104 }
5105
5106 static int tracing_resize_saved_cmdlines(unsigned int val)
5107 {
5108         struct saved_cmdlines_buffer *s, *savedcmd_temp;
5109
5110         s = kmalloc(sizeof(*s), GFP_KERNEL);
5111         if (!s)
5112                 return -ENOMEM;
5113
5114         if (allocate_cmdlines_buffer(val, s) < 0) {
5115                 kfree(s);
5116                 return -ENOMEM;
5117         }
5118
5119         arch_spin_lock(&trace_cmdline_lock);
5120         savedcmd_temp = savedcmd;
5121         savedcmd = s;
5122         arch_spin_unlock(&trace_cmdline_lock);
5123         free_saved_cmdlines_buffer(savedcmd_temp);
5124
5125         return 0;
5126 }
5127
5128 static ssize_t
5129 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5130                                   size_t cnt, loff_t *ppos)
5131 {
5132         unsigned long val;
5133         int ret;
5134
5135         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5136         if (ret)
5137                 return ret;
5138
5139         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
5140         if (!val || val > PID_MAX_DEFAULT)
5141                 return -EINVAL;
5142
5143         ret = tracing_resize_saved_cmdlines((unsigned int)val);
5144         if (ret < 0)
5145                 return ret;
5146
5147         *ppos += cnt;
5148
5149         return cnt;
5150 }
5151
5152 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5153         .open           = tracing_open_generic,
5154         .read           = tracing_saved_cmdlines_size_read,
5155         .write          = tracing_saved_cmdlines_size_write,
5156 };
5157
5158 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5159 static union trace_eval_map_item *
5160 update_eval_map(union trace_eval_map_item *ptr)
5161 {
5162         if (!ptr->map.eval_string) {
5163                 if (ptr->tail.next) {
5164                         ptr = ptr->tail.next;
5165                         /* Set ptr to the next real item (skip head) */
5166                         ptr++;
5167                 } else
5168                         return NULL;
5169         }
5170         return ptr;
5171 }
5172
5173 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5174 {
5175         union trace_eval_map_item *ptr = v;
5176
5177         /*
5178          * Paranoid! If ptr points to end, we don't want to increment past it.
5179          * This really should never happen.
5180          */
5181         ptr = update_eval_map(ptr);
5182         if (WARN_ON_ONCE(!ptr))
5183                 return NULL;
5184
5185         ptr++;
5186
5187         (*pos)++;
5188
5189         ptr = update_eval_map(ptr);
5190
5191         return ptr;
5192 }
5193
5194 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5195 {
5196         union trace_eval_map_item *v;
5197         loff_t l = 0;
5198
5199         mutex_lock(&trace_eval_mutex);
5200
5201         v = trace_eval_maps;
5202         if (v)
5203                 v++;
5204
5205         while (v && l < *pos) {
5206                 v = eval_map_next(m, v, &l);
5207         }
5208
5209         return v;
5210 }
5211
5212 static void eval_map_stop(struct seq_file *m, void *v)
5213 {
5214         mutex_unlock(&trace_eval_mutex);
5215 }
5216
5217 static int eval_map_show(struct seq_file *m, void *v)
5218 {
5219         union trace_eval_map_item *ptr = v;
5220
5221         seq_printf(m, "%s %ld (%s)\n",
5222                    ptr->map.eval_string, ptr->map.eval_value,
5223                    ptr->map.system);
5224
5225         return 0;
5226 }
5227
5228 static const struct seq_operations tracing_eval_map_seq_ops = {
5229         .start          = eval_map_start,
5230         .next           = eval_map_next,
5231         .stop           = eval_map_stop,
5232         .show           = eval_map_show,
5233 };
5234
5235 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5236 {
5237         if (tracing_disabled)
5238                 return -ENODEV;
5239
5240         return seq_open(filp, &tracing_eval_map_seq_ops);
5241 }
5242
5243 static const struct file_operations tracing_eval_map_fops = {
5244         .open           = tracing_eval_map_open,
5245         .read           = seq_read,
5246         .llseek         = seq_lseek,
5247         .release        = seq_release,
5248 };
5249
5250 static inline union trace_eval_map_item *
5251 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5252 {
5253         /* Return tail of array given the head */
5254         return ptr + ptr->head.length + 1;
5255 }
5256
5257 static void
5258 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5259                            int len)
5260 {
5261         struct trace_eval_map **stop;
5262         struct trace_eval_map **map;
5263         union trace_eval_map_item *map_array;
5264         union trace_eval_map_item *ptr;
5265
5266         stop = start + len;
5267
5268         /*
5269          * The trace_eval_maps contains the map plus a head and tail item,
5270          * where the head holds the module and length of array, and the
5271          * tail holds a pointer to the next list.
5272          */
5273         map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5274         if (!map_array) {
5275                 pr_warn("Unable to allocate trace eval mapping\n");
5276                 return;
5277         }
5278
5279         mutex_lock(&trace_eval_mutex);
5280
5281         if (!trace_eval_maps)
5282                 trace_eval_maps = map_array;
5283         else {
5284                 ptr = trace_eval_maps;
5285                 for (;;) {
5286                         ptr = trace_eval_jmp_to_tail(ptr);
5287                         if (!ptr->tail.next)
5288                                 break;
5289                         ptr = ptr->tail.next;
5290
5291                 }
5292                 ptr->tail.next = map_array;
5293         }
5294         map_array->head.mod = mod;
5295         map_array->head.length = len;
5296         map_array++;
5297
5298         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5299                 map_array->map = **map;
5300                 map_array++;
5301         }
5302         memset(map_array, 0, sizeof(*map_array));
5303
5304         mutex_unlock(&trace_eval_mutex);
5305 }
5306
5307 static void trace_create_eval_file(struct dentry *d_tracer)
5308 {
5309         trace_create_file("eval_map", 0444, d_tracer,
5310                           NULL, &tracing_eval_map_fops);
5311 }
5312
5313 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5314 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5315 static inline void trace_insert_eval_map_file(struct module *mod,
5316                               struct trace_eval_map **start, int len) { }
5317 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5318
5319 static void trace_insert_eval_map(struct module *mod,
5320                                   struct trace_eval_map **start, int len)
5321 {
5322         struct trace_eval_map **map;
5323
5324         if (len <= 0)
5325                 return;
5326
5327         map = start;
5328
5329         trace_event_eval_update(map, len);
5330
5331         trace_insert_eval_map_file(mod, start, len);
5332 }
5333
5334 static ssize_t
5335 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5336                        size_t cnt, loff_t *ppos)
5337 {
5338         struct trace_array *tr = filp->private_data;
5339         char buf[MAX_TRACER_SIZE+2];
5340         int r;
5341
5342         mutex_lock(&trace_types_lock);
5343         r = sprintf(buf, "%s\n", tr->current_trace->name);
5344         mutex_unlock(&trace_types_lock);
5345
5346         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5347 }
5348
5349 int tracer_init(struct tracer *t, struct trace_array *tr)
5350 {
5351         tracing_reset_online_cpus(&tr->trace_buffer);
5352         return t->init(tr);
5353 }
5354
5355 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
5356 {
5357         int cpu;
5358
5359         for_each_tracing_cpu(cpu)
5360                 per_cpu_ptr(buf->data, cpu)->entries = val;
5361 }
5362
5363 #ifdef CONFIG_TRACER_MAX_TRACE
5364 /* resize @tr's buffer to the size of @size_tr's entries */
5365 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
5366                                         struct trace_buffer *size_buf, int cpu_id)
5367 {
5368         int cpu, ret = 0;
5369
5370         if (cpu_id == RING_BUFFER_ALL_CPUS) {
5371                 for_each_tracing_cpu(cpu) {
5372                         ret = ring_buffer_resize(trace_buf->buffer,
5373                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5374                         if (ret < 0)
5375                                 break;
5376                         per_cpu_ptr(trace_buf->data, cpu)->entries =
5377                                 per_cpu_ptr(size_buf->data, cpu)->entries;
5378                 }
5379         } else {
5380                 ret = ring_buffer_resize(trace_buf->buffer,
5381                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5382                 if (ret == 0)
5383                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5384                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
5385         }
5386
5387         return ret;
5388 }
5389 #endif /* CONFIG_TRACER_MAX_TRACE */
5390
5391 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5392                                         unsigned long size, int cpu)
5393 {
5394         int ret;
5395
5396         /*
5397          * If kernel or user changes the size of the ring buffer
5398          * we use the size that was given, and we can forget about
5399          * expanding it later.
5400          */
5401         ring_buffer_expanded = true;
5402
5403         /* May be called before buffers are initialized */
5404         if (!tr->trace_buffer.buffer)
5405                 return 0;
5406
5407         ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
5408         if (ret < 0)
5409                 return ret;
5410
5411 #ifdef CONFIG_TRACER_MAX_TRACE
5412         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5413             !tr->current_trace->use_max_tr)
5414                 goto out;
5415
5416         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5417         if (ret < 0) {
5418                 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
5419                                                      &tr->trace_buffer, cpu);
5420                 if (r < 0) {
5421                         /*
5422                          * AARGH! We are left with different
5423                          * size max buffer!!!!
5424                          * The max buffer is our "snapshot" buffer.
5425                          * When a tracer needs a snapshot (one of the
5426                          * latency tracers), it swaps the max buffer
5427                          * with the saved snap shot. We succeeded to
5428                          * update the size of the main buffer, but failed to
5429                          * update the size of the max buffer. But when we tried
5430                          * to reset the main buffer to the original size, we
5431                          * failed there too. This is very unlikely to
5432                          * happen, but if it does, warn and kill all
5433                          * tracing.
5434                          */
5435                         WARN_ON(1);
5436                         tracing_disabled = 1;
5437                 }
5438                 return ret;
5439         }
5440
5441         if (cpu == RING_BUFFER_ALL_CPUS)
5442                 set_buffer_entries(&tr->max_buffer, size);
5443         else
5444                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5445
5446  out:
5447 #endif /* CONFIG_TRACER_MAX_TRACE */
5448
5449         if (cpu == RING_BUFFER_ALL_CPUS)
5450                 set_buffer_entries(&tr->trace_buffer, size);
5451         else
5452                 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
5453
5454         return ret;
5455 }
5456
5457 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5458                                           unsigned long size, int cpu_id)
5459 {
5460         int ret = size;
5461
5462         mutex_lock(&trace_types_lock);
5463
5464         if (cpu_id != RING_BUFFER_ALL_CPUS) {
5465                 /* make sure, this cpu is enabled in the mask */
5466                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5467                         ret = -EINVAL;
5468                         goto out;
5469                 }
5470         }
5471
5472         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5473         if (ret < 0)
5474                 ret = -ENOMEM;
5475
5476 out:
5477         mutex_unlock(&trace_types_lock);
5478
5479         return ret;
5480 }
5481
5482
5483 /**
5484  * tracing_update_buffers - used by tracing facility to expand ring buffers
5485  *
5486  * To save on memory when the tracing is never used on a system with it
5487  * configured in. The ring buffers are set to a minimum size. But once
5488  * a user starts to use the tracing facility, then they need to grow
5489  * to their default size.
5490  *
5491  * This function is to be called when a tracer is about to be used.
5492  */
5493 int tracing_update_buffers(void)
5494 {
5495         int ret = 0;
5496
5497         mutex_lock(&trace_types_lock);
5498         if (!ring_buffer_expanded)
5499                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5500                                                 RING_BUFFER_ALL_CPUS);
5501         mutex_unlock(&trace_types_lock);
5502
5503         return ret;
5504 }
5505
5506 struct trace_option_dentry;
5507
5508 static void
5509 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5510
5511 /*
5512  * Used to clear out the tracer before deletion of an instance.
5513  * Must have trace_types_lock held.
5514  */
5515 static void tracing_set_nop(struct trace_array *tr)
5516 {
5517         if (tr->current_trace == &nop_trace)
5518                 return;
5519         
5520         tr->current_trace->enabled--;
5521
5522         if (tr->current_trace->reset)
5523                 tr->current_trace->reset(tr);
5524
5525         tr->current_trace = &nop_trace;
5526 }
5527
5528 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5529 {
5530         /* Only enable if the directory has been created already. */
5531         if (!tr->dir)
5532                 return;
5533
5534         create_trace_option_files(tr, t);
5535 }
5536
5537 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
5538 {
5539         struct tracer *t;
5540 #ifdef CONFIG_TRACER_MAX_TRACE
5541         bool had_max_tr;
5542 #endif
5543         int ret = 0;
5544
5545         mutex_lock(&trace_types_lock);
5546
5547         if (!ring_buffer_expanded) {
5548                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5549                                                 RING_BUFFER_ALL_CPUS);
5550                 if (ret < 0)
5551                         goto out;
5552                 ret = 0;
5553         }
5554
5555         for (t = trace_types; t; t = t->next) {
5556                 if (strcmp(t->name, buf) == 0)
5557                         break;
5558         }
5559         if (!t) {
5560                 ret = -EINVAL;
5561                 goto out;
5562         }
5563         if (t == tr->current_trace)
5564                 goto out;
5565
5566 #ifdef CONFIG_TRACER_SNAPSHOT
5567         if (t->use_max_tr) {
5568                 arch_spin_lock(&tr->max_lock);
5569                 if (tr->cond_snapshot)
5570                         ret = -EBUSY;
5571                 arch_spin_unlock(&tr->max_lock);
5572                 if (ret)
5573                         goto out;
5574         }
5575 #endif
5576         /* Some tracers won't work on kernel command line */
5577         if (system_state < SYSTEM_RUNNING && t->noboot) {
5578                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5579                         t->name);
5580                 goto out;
5581         }
5582
5583         /* Some tracers are only allowed for the top level buffer */
5584         if (!trace_ok_for_array(t, tr)) {
5585                 ret = -EINVAL;
5586                 goto out;
5587         }
5588
5589         /* If trace pipe files are being read, we can't change the tracer */
5590         if (tr->current_trace->ref) {
5591                 ret = -EBUSY;
5592                 goto out;
5593         }
5594
5595         trace_branch_disable();
5596
5597         tr->current_trace->enabled--;
5598
5599         if (tr->current_trace->reset)
5600                 tr->current_trace->reset(tr);
5601
5602         /* Current trace needs to be nop_trace before synchronize_rcu */
5603         tr->current_trace = &nop_trace;
5604
5605 #ifdef CONFIG_TRACER_MAX_TRACE
5606         had_max_tr = tr->allocated_snapshot;
5607
5608         if (had_max_tr && !t->use_max_tr) {
5609                 /*
5610                  * We need to make sure that the update_max_tr sees that
5611                  * current_trace changed to nop_trace to keep it from
5612                  * swapping the buffers after we resize it.
5613                  * The update_max_tr is called from interrupts disabled
5614                  * so a synchronized_sched() is sufficient.
5615                  */
5616                 synchronize_rcu();
5617                 free_snapshot(tr);
5618         }
5619 #endif
5620
5621 #ifdef CONFIG_TRACER_MAX_TRACE
5622         if (t->use_max_tr && !had_max_tr) {
5623                 ret = tracing_alloc_snapshot_instance(tr);
5624                 if (ret < 0)
5625                         goto out;
5626         }
5627 #endif
5628
5629         if (t->init) {
5630                 ret = tracer_init(t, tr);
5631                 if (ret)
5632                         goto out;
5633         }
5634
5635         tr->current_trace = t;
5636         tr->current_trace->enabled++;
5637         trace_branch_enable(tr);
5638  out:
5639         mutex_unlock(&trace_types_lock);
5640
5641         return ret;
5642 }
5643
5644 static ssize_t
5645 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5646                         size_t cnt, loff_t *ppos)
5647 {
5648         struct trace_array *tr = filp->private_data;
5649         char buf[MAX_TRACER_SIZE+1];
5650         int i;
5651         size_t ret;
5652         int err;
5653
5654         ret = cnt;
5655
5656         if (cnt > MAX_TRACER_SIZE)
5657                 cnt = MAX_TRACER_SIZE;
5658
5659         if (copy_from_user(buf, ubuf, cnt))
5660                 return -EFAULT;
5661
5662         buf[cnt] = 0;
5663
5664         /* strip ending whitespace. */
5665         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
5666                 buf[i] = 0;
5667
5668         err = tracing_set_tracer(tr, buf);
5669         if (err)
5670                 return err;
5671
5672         *ppos += ret;
5673
5674         return ret;
5675 }
5676
5677 static ssize_t
5678 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5679                    size_t cnt, loff_t *ppos)
5680 {
5681         char buf[64];
5682         int r;
5683
5684         r = snprintf(buf, sizeof(buf), "%ld\n",
5685                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5686         if (r > sizeof(buf))
5687                 r = sizeof(buf);
5688         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5689 }
5690
5691 static ssize_t
5692 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5693                     size_t cnt, loff_t *ppos)
5694 {
5695         unsigned long val;
5696         int ret;
5697
5698         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5699         if (ret)
5700                 return ret;
5701
5702         *ptr = val * 1000;
5703
5704         return cnt;
5705 }
5706
5707 static ssize_t
5708 tracing_thresh_read(struct file *filp, char __user *ubuf,
5709                     size_t cnt, loff_t *ppos)
5710 {
5711         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
5712 }
5713
5714 static ssize_t
5715 tracing_thresh_write(struct file *filp, const char __user *ubuf,
5716                      size_t cnt, loff_t *ppos)
5717 {
5718         struct trace_array *tr = filp->private_data;
5719         int ret;
5720
5721         mutex_lock(&trace_types_lock);
5722         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
5723         if (ret < 0)
5724                 goto out;
5725
5726         if (tr->current_trace->update_thresh) {
5727                 ret = tr->current_trace->update_thresh(tr);
5728                 if (ret < 0)
5729                         goto out;
5730         }
5731
5732         ret = cnt;
5733 out:
5734         mutex_unlock(&trace_types_lock);
5735
5736         return ret;
5737 }
5738
5739 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5740
5741 static ssize_t
5742 tracing_max_lat_read(struct file *filp, char __user *ubuf,
5743                      size_t cnt, loff_t *ppos)
5744 {
5745         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
5746 }
5747
5748 static ssize_t
5749 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
5750                       size_t cnt, loff_t *ppos)
5751 {
5752         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
5753 }
5754
5755 #endif
5756
5757 static int tracing_open_pipe(struct inode *inode, struct file *filp)
5758 {
5759         struct trace_array *tr = inode->i_private;
5760         struct trace_iterator *iter;
5761         int ret = 0;
5762
5763         if (tracing_disabled)
5764                 return -ENODEV;
5765
5766         if (trace_array_get(tr) < 0)
5767                 return -ENODEV;
5768
5769         mutex_lock(&trace_types_lock);
5770
5771         /* create a buffer to store the information to pass to userspace */
5772         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5773         if (!iter) {
5774                 ret = -ENOMEM;
5775                 __trace_array_put(tr);
5776                 goto out;
5777         }
5778
5779         trace_seq_init(&iter->seq);
5780         iter->trace = tr->current_trace;
5781
5782         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5783                 ret = -ENOMEM;
5784                 goto fail;
5785         }
5786
5787         /* trace pipe does not show start of buffer */
5788         cpumask_setall(iter->started);
5789
5790         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5791                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
5792
5793         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
5794         if (trace_clocks[tr->clock_id].in_ns)
5795                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5796
5797         iter->tr = tr;
5798         iter->trace_buffer = &tr->trace_buffer;
5799         iter->cpu_file = tracing_get_cpu(inode);
5800         mutex_init(&iter->mutex);
5801         filp->private_data = iter;
5802
5803         if (iter->trace->pipe_open)
5804                 iter->trace->pipe_open(iter);
5805
5806         nonseekable_open(inode, filp);
5807
5808         tr->current_trace->ref++;
5809 out:
5810         mutex_unlock(&trace_types_lock);
5811         return ret;
5812
5813 fail:
5814         kfree(iter);
5815         __trace_array_put(tr);
5816         mutex_unlock(&trace_types_lock);
5817         return ret;
5818 }
5819
5820 static int tracing_release_pipe(struct inode *inode, struct file *file)
5821 {
5822         struct trace_iterator *iter = file->private_data;
5823         struct trace_array *tr = inode->i_private;
5824
5825         mutex_lock(&trace_types_lock);
5826
5827         tr->current_trace->ref--;
5828
5829         if (iter->trace->pipe_close)
5830                 iter->trace->pipe_close(iter);
5831
5832         mutex_unlock(&trace_types_lock);
5833
5834         free_cpumask_var(iter->started);
5835         mutex_destroy(&iter->mutex);
5836         kfree(iter);
5837
5838         trace_array_put(tr);
5839
5840         return 0;
5841 }
5842
5843 static __poll_t
5844 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5845 {
5846         struct trace_array *tr = iter->tr;
5847
5848         /* Iterators are static, they should be filled or empty */
5849         if (trace_buffer_iter(iter, iter->cpu_file))
5850                 return EPOLLIN | EPOLLRDNORM;
5851
5852         if (tr->trace_flags & TRACE_ITER_BLOCK)
5853                 /*
5854                  * Always select as readable when in blocking mode
5855                  */
5856                 return EPOLLIN | EPOLLRDNORM;
5857         else
5858                 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
5859                                              filp, poll_table);
5860 }
5861
5862 static __poll_t
5863 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5864 {
5865         struct trace_iterator *iter = filp->private_data;
5866
5867         return trace_poll(iter, filp, poll_table);
5868 }
5869
5870 /* Must be called with iter->mutex held. */
5871 static int tracing_wait_pipe(struct file *filp)
5872 {
5873         struct trace_iterator *iter = filp->private_data;
5874         int ret;
5875
5876         while (trace_empty(iter)) {
5877
5878                 if ((filp->f_flags & O_NONBLOCK)) {
5879                         return -EAGAIN;
5880                 }
5881
5882                 /*
5883                  * We block until we read something and tracing is disabled.
5884                  * We still block if tracing is disabled, but we have never
5885                  * read anything. This allows a user to cat this file, and
5886                  * then enable tracing. But after we have read something,
5887                  * we give an EOF when tracing is again disabled.
5888                  *
5889                  * iter->pos will be 0 if we haven't read anything.
5890                  */
5891                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
5892                         break;
5893
5894                 mutex_unlock(&iter->mutex);
5895
5896                 ret = wait_on_pipe(iter, 0);
5897
5898                 mutex_lock(&iter->mutex);
5899
5900                 if (ret)
5901                         return ret;
5902         }
5903
5904         return 1;
5905 }
5906
5907 /*
5908  * Consumer reader.
5909  */
5910 static ssize_t
5911 tracing_read_pipe(struct file *filp, char __user *ubuf,
5912                   size_t cnt, loff_t *ppos)
5913 {
5914         struct trace_iterator *iter = filp->private_data;
5915         ssize_t sret;
5916
5917         /*
5918          * Avoid more than one consumer on a single file descriptor
5919          * This is just a matter of traces coherency, the ring buffer itself
5920          * is protected.
5921          */
5922         mutex_lock(&iter->mutex);
5923
5924         /* return any leftover data */
5925         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5926         if (sret != -EBUSY)
5927                 goto out;
5928
5929         trace_seq_init(&iter->seq);
5930
5931         if (iter->trace->read) {
5932                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
5933                 if (sret)
5934                         goto out;
5935         }
5936
5937 waitagain:
5938         sret = tracing_wait_pipe(filp);
5939         if (sret <= 0)
5940                 goto out;
5941
5942         /* stop when tracing is finished */
5943         if (trace_empty(iter)) {
5944                 sret = 0;
5945                 goto out;
5946         }
5947
5948         if (cnt >= PAGE_SIZE)
5949                 cnt = PAGE_SIZE - 1;
5950
5951         /* reset all but tr, trace, and overruns */
5952         memset(&iter->seq, 0,
5953                sizeof(struct trace_iterator) -
5954                offsetof(struct trace_iterator, seq));
5955         cpumask_clear(iter->started);
5956         iter->pos = -1;
5957
5958         trace_event_read_lock();
5959         trace_access_lock(iter->cpu_file);
5960         while (trace_find_next_entry_inc(iter) != NULL) {
5961                 enum print_line_t ret;
5962                 int save_len = iter->seq.seq.len;
5963
5964                 ret = print_trace_line(iter);
5965                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5966                         /* don't print partial lines */
5967                         iter->seq.seq.len = save_len;
5968                         break;
5969                 }
5970                 if (ret != TRACE_TYPE_NO_CONSUME)
5971                         trace_consume(iter);
5972
5973                 if (trace_seq_used(&iter->seq) >= cnt)
5974                         break;
5975
5976                 /*
5977                  * Setting the full flag means we reached the trace_seq buffer
5978                  * size and we should leave by partial output condition above.
5979                  * One of the trace_seq_* functions is not used properly.
5980                  */
5981                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
5982                           iter->ent->type);
5983         }
5984         trace_access_unlock(iter->cpu_file);
5985         trace_event_read_unlock();
5986
5987         /* Now copy what we have to the user */
5988         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5989         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
5990                 trace_seq_init(&iter->seq);
5991
5992         /*
5993          * If there was nothing to send to user, in spite of consuming trace
5994          * entries, go back to wait for more entries.
5995          */
5996         if (sret == -EBUSY)
5997                 goto waitagain;
5998
5999 out:
6000         mutex_unlock(&iter->mutex);
6001
6002         return sret;
6003 }
6004
6005 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6006                                      unsigned int idx)
6007 {
6008         __free_page(spd->pages[idx]);
6009 }
6010
6011 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
6012         .can_merge              = 0,
6013         .confirm                = generic_pipe_buf_confirm,
6014         .release                = generic_pipe_buf_release,
6015         .steal                  = generic_pipe_buf_steal,
6016         .get                    = generic_pipe_buf_get,
6017 };
6018
6019 static size_t
6020 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6021 {
6022         size_t count;
6023         int save_len;
6024         int ret;
6025
6026         /* Seq buffer is page-sized, exactly what we need. */
6027         for (;;) {
6028                 save_len = iter->seq.seq.len;
6029                 ret = print_trace_line(iter);
6030
6031                 if (trace_seq_has_overflowed(&iter->seq)) {
6032                         iter->seq.seq.len = save_len;
6033                         break;
6034                 }
6035
6036                 /*
6037                  * This should not be hit, because it should only
6038                  * be set if the iter->seq overflowed. But check it
6039                  * anyway to be safe.
6040                  */
6041                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6042                         iter->seq.seq.len = save_len;
6043                         break;
6044                 }
6045
6046                 count = trace_seq_used(&iter->seq) - save_len;
6047                 if (rem < count) {
6048                         rem = 0;
6049                         iter->seq.seq.len = save_len;
6050                         break;
6051                 }
6052
6053                 if (ret != TRACE_TYPE_NO_CONSUME)
6054                         trace_consume(iter);
6055                 rem -= count;
6056                 if (!trace_find_next_entry_inc(iter))   {
6057                         rem = 0;
6058                         iter->ent = NULL;
6059                         break;
6060                 }
6061         }
6062
6063         return rem;
6064 }
6065
6066 static ssize_t tracing_splice_read_pipe(struct file *filp,
6067                                         loff_t *ppos,
6068                                         struct pipe_inode_info *pipe,
6069                                         size_t len,
6070                                         unsigned int flags)
6071 {
6072         struct page *pages_def[PIPE_DEF_BUFFERS];
6073         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6074         struct trace_iterator *iter = filp->private_data;
6075         struct splice_pipe_desc spd = {
6076                 .pages          = pages_def,
6077                 .partial        = partial_def,
6078                 .nr_pages       = 0, /* This gets updated below. */
6079                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6080                 .ops            = &tracing_pipe_buf_ops,
6081                 .spd_release    = tracing_spd_release_pipe,
6082         };
6083         ssize_t ret;
6084         size_t rem;
6085         unsigned int i;
6086
6087         if (splice_grow_spd(pipe, &spd))
6088                 return -ENOMEM;
6089
6090         mutex_lock(&iter->mutex);
6091
6092         if (iter->trace->splice_read) {
6093                 ret = iter->trace->splice_read(iter, filp,
6094                                                ppos, pipe, len, flags);
6095                 if (ret)
6096                         goto out_err;
6097         }
6098
6099         ret = tracing_wait_pipe(filp);
6100         if (ret <= 0)
6101                 goto out_err;
6102
6103         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6104                 ret = -EFAULT;
6105                 goto out_err;
6106         }
6107
6108         trace_event_read_lock();
6109         trace_access_lock(iter->cpu_file);
6110
6111         /* Fill as many pages as possible. */
6112         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6113                 spd.pages[i] = alloc_page(GFP_KERNEL);
6114                 if (!spd.pages[i])
6115                         break;
6116
6117                 rem = tracing_fill_pipe_page(rem, iter);
6118
6119                 /* Copy the data into the page, so we can start over. */
6120                 ret = trace_seq_to_buffer(&iter->seq,
6121                                           page_address(spd.pages[i]),
6122                                           trace_seq_used(&iter->seq));
6123                 if (ret < 0) {
6124                         __free_page(spd.pages[i]);
6125                         break;
6126                 }
6127                 spd.partial[i].offset = 0;
6128                 spd.partial[i].len = trace_seq_used(&iter->seq);
6129
6130                 trace_seq_init(&iter->seq);
6131         }
6132
6133         trace_access_unlock(iter->cpu_file);
6134         trace_event_read_unlock();
6135         mutex_unlock(&iter->mutex);
6136
6137         spd.nr_pages = i;
6138
6139         if (i)
6140                 ret = splice_to_pipe(pipe, &spd);
6141         else
6142                 ret = 0;
6143 out:
6144         splice_shrink_spd(&spd);
6145         return ret;
6146
6147 out_err:
6148         mutex_unlock(&iter->mutex);
6149         goto out;
6150 }
6151
6152 static ssize_t
6153 tracing_entries_read(struct file *filp, char __user *ubuf,
6154                      size_t cnt, loff_t *ppos)
6155 {
6156         struct inode *inode = file_inode(filp);
6157         struct trace_array *tr = inode->i_private;
6158         int cpu = tracing_get_cpu(inode);
6159         char buf[64];
6160         int r = 0;
6161         ssize_t ret;
6162
6163         mutex_lock(&trace_types_lock);
6164
6165         if (cpu == RING_BUFFER_ALL_CPUS) {
6166                 int cpu, buf_size_same;
6167                 unsigned long size;
6168
6169                 size = 0;
6170                 buf_size_same = 1;
6171                 /* check if all cpu sizes are same */
6172                 for_each_tracing_cpu(cpu) {
6173                         /* fill in the size from first enabled cpu */
6174                         if (size == 0)
6175                                 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
6176                         if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
6177                                 buf_size_same = 0;
6178                                 break;
6179                         }
6180                 }
6181
6182                 if (buf_size_same) {
6183                         if (!ring_buffer_expanded)
6184                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
6185                                             size >> 10,
6186                                             trace_buf_size >> 10);
6187                         else
6188                                 r = sprintf(buf, "%lu\n", size >> 10);
6189                 } else
6190                         r = sprintf(buf, "X\n");
6191         } else
6192                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
6193
6194         mutex_unlock(&trace_types_lock);
6195
6196         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6197         return ret;
6198 }
6199
6200 static ssize_t
6201 tracing_entries_write(struct file *filp, const char __user *ubuf,
6202                       size_t cnt, loff_t *ppos)
6203 {
6204         struct inode *inode = file_inode(filp);
6205         struct trace_array *tr = inode->i_private;
6206         unsigned long val;
6207         int ret;
6208
6209         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6210         if (ret)
6211                 return ret;
6212
6213         /* must have at least 1 entry */
6214         if (!val)
6215                 return -EINVAL;
6216
6217         /* value is in KB */
6218         val <<= 10;
6219         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6220         if (ret < 0)
6221                 return ret;
6222
6223         *ppos += cnt;
6224
6225         return cnt;
6226 }
6227
6228 static ssize_t
6229 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6230                                 size_t cnt, loff_t *ppos)
6231 {
6232         struct trace_array *tr = filp->private_data;
6233         char buf[64];
6234         int r, cpu;
6235         unsigned long size = 0, expanded_size = 0;
6236
6237         mutex_lock(&trace_types_lock);
6238         for_each_tracing_cpu(cpu) {
6239                 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
6240                 if (!ring_buffer_expanded)
6241                         expanded_size += trace_buf_size >> 10;
6242         }
6243         if (ring_buffer_expanded)
6244                 r = sprintf(buf, "%lu\n", size);
6245         else
6246                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6247         mutex_unlock(&trace_types_lock);
6248
6249         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6250 }
6251
6252 static ssize_t
6253 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6254                           size_t cnt, loff_t *ppos)
6255 {
6256         /*
6257          * There is no need to read what the user has written, this function
6258          * is just to make sure that there is no error when "echo" is used
6259          */
6260
6261         *ppos += cnt;
6262
6263         return cnt;
6264 }
6265
6266 static int
6267 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6268 {
6269         struct trace_array *tr = inode->i_private;
6270
6271         /* disable tracing ? */
6272         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6273                 tracer_tracing_off(tr);
6274         /* resize the ring buffer to 0 */
6275         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6276
6277         trace_array_put(tr);
6278
6279         return 0;
6280 }
6281
6282 static ssize_t
6283 tracing_mark_write(struct file *filp, const char __user *ubuf,
6284                                         size_t cnt, loff_t *fpos)
6285 {
6286         struct trace_array *tr = filp->private_data;
6287         struct ring_buffer_event *event;
6288         enum event_trigger_type tt = ETT_NONE;
6289         struct ring_buffer *buffer;
6290         struct print_entry *entry;
6291         unsigned long irq_flags;
6292         const char faulted[] = "<faulted>";
6293         ssize_t written;
6294         int size;
6295         int len;
6296
6297 /* Used in tracing_mark_raw_write() as well */
6298 #define FAULTED_SIZE (sizeof(faulted) - 1) /* '\0' is already accounted for */
6299
6300         if (tracing_disabled)
6301                 return -EINVAL;
6302
6303         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6304                 return -EINVAL;
6305
6306         if (cnt > TRACE_BUF_SIZE)
6307                 cnt = TRACE_BUF_SIZE;
6308
6309         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6310
6311         local_save_flags(irq_flags);
6312         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6313
6314         /* If less than "<faulted>", then make sure we can still add that */
6315         if (cnt < FAULTED_SIZE)
6316                 size += FAULTED_SIZE - cnt;
6317
6318         buffer = tr->trace_buffer.buffer;
6319         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6320                                             irq_flags, preempt_count());
6321         if (unlikely(!event))
6322                 /* Ring buffer disabled, return as if not open for write */
6323                 return -EBADF;
6324
6325         entry = ring_buffer_event_data(event);
6326         entry->ip = _THIS_IP_;
6327
6328         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6329         if (len) {
6330                 memcpy(&entry->buf, faulted, FAULTED_SIZE);
6331                 cnt = FAULTED_SIZE;
6332                 written = -EFAULT;
6333         } else
6334                 written = cnt;
6335         len = cnt;
6336
6337         if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6338                 /* do not add \n before testing triggers, but add \0 */
6339                 entry->buf[cnt] = '\0';
6340                 tt = event_triggers_call(tr->trace_marker_file, entry, event);
6341         }
6342
6343         if (entry->buf[cnt - 1] != '\n') {
6344                 entry->buf[cnt] = '\n';
6345                 entry->buf[cnt + 1] = '\0';
6346         } else
6347                 entry->buf[cnt] = '\0';
6348
6349         __buffer_unlock_commit(buffer, event);
6350
6351         if (tt)
6352                 event_triggers_post_call(tr->trace_marker_file, tt);
6353
6354         if (written > 0)
6355                 *fpos += written;
6356
6357         return written;
6358 }
6359
6360 /* Limit it for now to 3K (including tag) */
6361 #define RAW_DATA_MAX_SIZE (1024*3)
6362
6363 static ssize_t
6364 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6365                                         size_t cnt, loff_t *fpos)
6366 {
6367         struct trace_array *tr = filp->private_data;
6368         struct ring_buffer_event *event;
6369         struct ring_buffer *buffer;
6370         struct raw_data_entry *entry;
6371         const char faulted[] = "<faulted>";
6372         unsigned long irq_flags;
6373         ssize_t written;
6374         int size;
6375         int len;
6376
6377 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6378
6379         if (tracing_disabled)
6380                 return -EINVAL;
6381
6382         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6383                 return -EINVAL;
6384
6385         /* The marker must at least have a tag id */
6386         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6387                 return -EINVAL;
6388
6389         if (cnt > TRACE_BUF_SIZE)
6390                 cnt = TRACE_BUF_SIZE;
6391
6392         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6393
6394         local_save_flags(irq_flags);
6395         size = sizeof(*entry) + cnt;
6396         if (cnt < FAULT_SIZE_ID)
6397                 size += FAULT_SIZE_ID - cnt;
6398
6399         buffer = tr->trace_buffer.buffer;
6400         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6401                                             irq_flags, preempt_count());
6402         if (!event)
6403                 /* Ring buffer disabled, return as if not open for write */
6404                 return -EBADF;
6405
6406         entry = ring_buffer_event_data(event);
6407
6408         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6409         if (len) {
6410                 entry->id = -1;
6411                 memcpy(&entry->buf, faulted, FAULTED_SIZE);
6412                 written = -EFAULT;
6413         } else
6414                 written = cnt;
6415
6416         __buffer_unlock_commit(buffer, event);
6417
6418         if (written > 0)
6419                 *fpos += written;
6420
6421         return written;
6422 }
6423
6424 static int tracing_clock_show(struct seq_file *m, void *v)
6425 {
6426         struct trace_array *tr = m->private;
6427         int i;
6428
6429         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6430                 seq_printf(m,
6431                         "%s%s%s%s", i ? " " : "",
6432                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6433                         i == tr->clock_id ? "]" : "");
6434         seq_putc(m, '\n');
6435
6436         return 0;
6437 }
6438
6439 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6440 {
6441         int i;
6442
6443         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6444                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
6445                         break;
6446         }
6447         if (i == ARRAY_SIZE(trace_clocks))
6448                 return -EINVAL;
6449
6450         mutex_lock(&trace_types_lock);
6451
6452         tr->clock_id = i;
6453
6454         ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
6455
6456         /*
6457          * New clock may not be consistent with the previous clock.
6458          * Reset the buffer so that it doesn't have incomparable timestamps.
6459          */
6460         tracing_reset_online_cpus(&tr->trace_buffer);
6461
6462 #ifdef CONFIG_TRACER_MAX_TRACE
6463         if (tr->max_buffer.buffer)
6464                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6465         tracing_reset_online_cpus(&tr->max_buffer);
6466 #endif
6467
6468         mutex_unlock(&trace_types_lock);
6469
6470         return 0;
6471 }
6472
6473 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6474                                    size_t cnt, loff_t *fpos)
6475 {
6476         struct seq_file *m = filp->private_data;
6477         struct trace_array *tr = m->private;
6478         char buf[64];
6479         const char *clockstr;
6480         int ret;
6481
6482         if (cnt >= sizeof(buf))
6483                 return -EINVAL;
6484
6485         if (copy_from_user(buf, ubuf, cnt))
6486                 return -EFAULT;
6487
6488         buf[cnt] = 0;
6489
6490         clockstr = strstrip(buf);
6491
6492         ret = tracing_set_clock(tr, clockstr);
6493         if (ret)
6494                 return ret;
6495
6496         *fpos += cnt;
6497
6498         return cnt;
6499 }
6500
6501 static int tracing_clock_open(struct inode *inode, struct file *file)
6502 {
6503         struct trace_array *tr = inode->i_private;
6504         int ret;
6505
6506         if (tracing_disabled)
6507                 return -ENODEV;
6508
6509         if (trace_array_get(tr))
6510                 return -ENODEV;
6511
6512         ret = single_open(file, tracing_clock_show, inode->i_private);
6513         if (ret < 0)
6514                 trace_array_put(tr);
6515
6516         return ret;
6517 }
6518
6519 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
6520 {
6521         struct trace_array *tr = m->private;
6522
6523         mutex_lock(&trace_types_lock);
6524
6525         if (ring_buffer_time_stamp_abs(tr->trace_buffer.buffer))
6526                 seq_puts(m, "delta [absolute]\n");
6527         else
6528                 seq_puts(m, "[delta] absolute\n");
6529
6530         mutex_unlock(&trace_types_lock);
6531
6532         return 0;
6533 }
6534
6535 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
6536 {
6537         struct trace_array *tr = inode->i_private;
6538         int ret;
6539
6540         if (tracing_disabled)
6541                 return -ENODEV;
6542
6543         if (trace_array_get(tr))
6544                 return -ENODEV;
6545
6546         ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
6547         if (ret < 0)
6548                 trace_array_put(tr);
6549
6550         return ret;
6551 }
6552
6553 int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
6554 {
6555         int ret = 0;
6556
6557         mutex_lock(&trace_types_lock);
6558
6559         if (abs && tr->time_stamp_abs_ref++)
6560                 goto out;
6561
6562         if (!abs) {
6563                 if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) {
6564                         ret = -EINVAL;
6565                         goto out;
6566                 }
6567
6568                 if (--tr->time_stamp_abs_ref)
6569                         goto out;
6570         }
6571
6572         ring_buffer_set_time_stamp_abs(tr->trace_buffer.buffer, abs);
6573
6574 #ifdef CONFIG_TRACER_MAX_TRACE
6575         if (tr->max_buffer.buffer)
6576                 ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs);
6577 #endif
6578  out:
6579         mutex_unlock(&trace_types_lock);
6580
6581         return ret;
6582 }
6583
6584 struct ftrace_buffer_info {
6585         struct trace_iterator   iter;
6586         void                    *spare;
6587         unsigned int            spare_cpu;
6588         unsigned int            read;
6589 };
6590
6591 #ifdef CONFIG_TRACER_SNAPSHOT
6592 static int tracing_snapshot_open(struct inode *inode, struct file *file)
6593 {
6594         struct trace_array *tr = inode->i_private;
6595         struct trace_iterator *iter;
6596         struct seq_file *m;
6597         int ret = 0;
6598
6599         if (trace_array_get(tr) < 0)
6600                 return -ENODEV;
6601
6602         if (file->f_mode & FMODE_READ) {
6603                 iter = __tracing_open(inode, file, true);
6604                 if (IS_ERR(iter))
6605                         ret = PTR_ERR(iter);
6606         } else {
6607                 /* Writes still need the seq_file to hold the private data */
6608                 ret = -ENOMEM;
6609                 m = kzalloc(sizeof(*m), GFP_KERNEL);
6610                 if (!m)
6611                         goto out;
6612                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6613                 if (!iter) {
6614                         kfree(m);
6615                         goto out;
6616                 }
6617                 ret = 0;
6618
6619                 iter->tr = tr;
6620                 iter->trace_buffer = &tr->max_buffer;
6621                 iter->cpu_file = tracing_get_cpu(inode);
6622                 m->private = iter;
6623                 file->private_data = m;
6624         }
6625 out:
6626         if (ret < 0)
6627                 trace_array_put(tr);
6628
6629         return ret;
6630 }
6631
6632 static ssize_t
6633 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6634                        loff_t *ppos)
6635 {
6636         struct seq_file *m = filp->private_data;
6637         struct trace_iterator *iter = m->private;
6638         struct trace_array *tr = iter->tr;
6639         unsigned long val;
6640         int ret;
6641
6642         ret = tracing_update_buffers();
6643         if (ret < 0)
6644                 return ret;
6645
6646         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6647         if (ret)
6648                 return ret;
6649
6650         mutex_lock(&trace_types_lock);
6651
6652         if (tr->current_trace->use_max_tr) {
6653                 ret = -EBUSY;
6654                 goto out;
6655         }
6656
6657         arch_spin_lock(&tr->max_lock);
6658         if (tr->cond_snapshot)
6659                 ret = -EBUSY;
6660         arch_spin_unlock(&tr->max_lock);
6661         if (ret)
6662                 goto out;
6663
6664         switch (val) {
6665         case 0:
6666                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6667                         ret = -EINVAL;
6668                         break;
6669                 }
6670                 if (tr->allocated_snapshot)
6671                         free_snapshot(tr);
6672                 break;
6673         case 1:
6674 /* Only allow per-cpu swap if the ring buffer supports it */
6675 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
6676                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6677                         ret = -EINVAL;
6678                         break;
6679                 }
6680 #endif
6681                 if (!tr->allocated_snapshot) {
6682                         ret = tracing_alloc_snapshot_instance(tr);
6683                         if (ret < 0)
6684                                 break;
6685                 }
6686                 local_irq_disable();
6687                 /* Now, we're going to swap */
6688                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6689                         update_max_tr(tr, current, smp_processor_id(), NULL);
6690                 else
6691                         update_max_tr_single(tr, current, iter->cpu_file);
6692                 local_irq_enable();
6693                 break;
6694         default:
6695                 if (tr->allocated_snapshot) {
6696                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6697                                 tracing_reset_online_cpus(&tr->max_buffer);
6698                         else
6699                                 tracing_reset(&tr->max_buffer, iter->cpu_file);
6700                 }
6701                 break;
6702         }
6703
6704         if (ret >= 0) {
6705                 *ppos += cnt;
6706                 ret = cnt;
6707         }
6708 out:
6709         mutex_unlock(&trace_types_lock);
6710         return ret;
6711 }
6712
6713 static int tracing_snapshot_release(struct inode *inode, struct file *file)
6714 {
6715         struct seq_file *m = file->private_data;
6716         int ret;
6717
6718         ret = tracing_release(inode, file);
6719
6720         if (file->f_mode & FMODE_READ)
6721                 return ret;
6722
6723         /* If write only, the seq_file is just a stub */
6724         if (m)
6725                 kfree(m->private);
6726         kfree(m);
6727
6728         return 0;
6729 }
6730
6731 static int tracing_buffers_open(struct inode *inode, struct file *filp);
6732 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
6733                                     size_t count, loff_t *ppos);
6734 static int tracing_buffers_release(struct inode *inode, struct file *file);
6735 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6736                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
6737
6738 static int snapshot_raw_open(struct inode *inode, struct file *filp)
6739 {
6740         struct ftrace_buffer_info *info;
6741         int ret;
6742
6743         ret = tracing_buffers_open(inode, filp);
6744         if (ret < 0)
6745                 return ret;
6746
6747         info = filp->private_data;
6748
6749         if (info->iter.trace->use_max_tr) {
6750                 tracing_buffers_release(inode, filp);
6751                 return -EBUSY;
6752         }
6753
6754         info->iter.snapshot = true;
6755         info->iter.trace_buffer = &info->iter.tr->max_buffer;
6756
6757         return ret;
6758 }
6759
6760 #endif /* CONFIG_TRACER_SNAPSHOT */
6761
6762
6763 static const struct file_operations tracing_thresh_fops = {
6764         .open           = tracing_open_generic,
6765         .read           = tracing_thresh_read,
6766         .write          = tracing_thresh_write,
6767         .llseek         = generic_file_llseek,
6768 };
6769
6770 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6771 static const struct file_operations tracing_max_lat_fops = {
6772         .open           = tracing_open_generic,
6773         .read           = tracing_max_lat_read,
6774         .write          = tracing_max_lat_write,
6775         .llseek         = generic_file_llseek,
6776 };
6777 #endif
6778
6779 static const struct file_operations set_tracer_fops = {
6780         .open           = tracing_open_generic,
6781         .read           = tracing_set_trace_read,
6782         .write          = tracing_set_trace_write,
6783         .llseek         = generic_file_llseek,
6784 };
6785
6786 static const struct file_operations tracing_pipe_fops = {
6787         .open           = tracing_open_pipe,
6788         .poll           = tracing_poll_pipe,
6789         .read           = tracing_read_pipe,
6790         .splice_read    = tracing_splice_read_pipe,
6791         .release        = tracing_release_pipe,
6792         .llseek         = no_llseek,
6793 };
6794
6795 static const struct file_operations tracing_entries_fops = {
6796         .open           = tracing_open_generic_tr,
6797         .read           = tracing_entries_read,
6798         .write          = tracing_entries_write,
6799         .llseek         = generic_file_llseek,
6800         .release        = tracing_release_generic_tr,
6801 };
6802
6803 static const struct file_operations tracing_total_entries_fops = {
6804         .open           = tracing_open_generic_tr,
6805         .read           = tracing_total_entries_read,
6806         .llseek         = generic_file_llseek,
6807         .release        = tracing_release_generic_tr,
6808 };
6809
6810 static const struct file_operations tracing_free_buffer_fops = {
6811         .open           = tracing_open_generic_tr,
6812         .write          = tracing_free_buffer_write,
6813         .release        = tracing_free_buffer_release,
6814 };
6815
6816 static const struct file_operations tracing_mark_fops = {
6817         .open           = tracing_open_generic_tr,
6818         .write          = tracing_mark_write,
6819         .llseek         = generic_file_llseek,
6820         .release        = tracing_release_generic_tr,
6821 };
6822
6823 static const struct file_operations tracing_mark_raw_fops = {
6824         .open           = tracing_open_generic_tr,
6825         .write          = tracing_mark_raw_write,
6826         .llseek         = generic_file_llseek,
6827         .release        = tracing_release_generic_tr,
6828 };
6829
6830 static const struct file_operations trace_clock_fops = {
6831         .open           = tracing_clock_open,
6832         .read           = seq_read,
6833         .llseek         = seq_lseek,
6834         .release        = tracing_single_release_tr,
6835         .write          = tracing_clock_write,
6836 };
6837
6838 static const struct file_operations trace_time_stamp_mode_fops = {
6839         .open           = tracing_time_stamp_mode_open,
6840         .read           = seq_read,
6841         .llseek         = seq_lseek,
6842         .release        = tracing_single_release_tr,
6843 };
6844
6845 #ifdef CONFIG_TRACER_SNAPSHOT
6846 static const struct file_operations snapshot_fops = {
6847         .open           = tracing_snapshot_open,
6848         .read           = seq_read,
6849         .write          = tracing_snapshot_write,
6850         .llseek         = tracing_lseek,
6851         .release        = tracing_snapshot_release,
6852 };
6853
6854 static const struct file_operations snapshot_raw_fops = {
6855         .open           = snapshot_raw_open,
6856         .read           = tracing_buffers_read,
6857         .release        = tracing_buffers_release,
6858         .splice_read    = tracing_buffers_splice_read,
6859         .llseek         = no_llseek,
6860 };
6861
6862 #endif /* CONFIG_TRACER_SNAPSHOT */
6863
6864 static int tracing_buffers_open(struct inode *inode, struct file *filp)
6865 {
6866         struct trace_array *tr = inode->i_private;
6867         struct ftrace_buffer_info *info;
6868         int ret;
6869
6870         if (tracing_disabled)
6871                 return -ENODEV;
6872
6873         if (trace_array_get(tr) < 0)
6874                 return -ENODEV;
6875
6876         info = kzalloc(sizeof(*info), GFP_KERNEL);
6877         if (!info) {
6878                 trace_array_put(tr);
6879                 return -ENOMEM;
6880         }
6881
6882         mutex_lock(&trace_types_lock);
6883
6884         info->iter.tr           = tr;
6885         info->iter.cpu_file     = tracing_get_cpu(inode);
6886         info->iter.trace        = tr->current_trace;
6887         info->iter.trace_buffer = &tr->trace_buffer;
6888         info->spare             = NULL;
6889         /* Force reading ring buffer for first read */
6890         info->read              = (unsigned int)-1;
6891
6892         filp->private_data = info;
6893
6894         tr->current_trace->ref++;
6895
6896         mutex_unlock(&trace_types_lock);
6897
6898         ret = nonseekable_open(inode, filp);
6899         if (ret < 0)
6900                 trace_array_put(tr);
6901
6902         return ret;
6903 }
6904
6905 static __poll_t
6906 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
6907 {
6908         struct ftrace_buffer_info *info = filp->private_data;
6909         struct trace_iterator *iter = &info->iter;
6910
6911         return trace_poll(iter, filp, poll_table);
6912 }
6913
6914 static ssize_t
6915 tracing_buffers_read(struct file *filp, char __user *ubuf,
6916                      size_t count, loff_t *ppos)
6917 {
6918         struct ftrace_buffer_info *info = filp->private_data;
6919         struct trace_iterator *iter = &info->iter;
6920         ssize_t ret = 0;
6921         ssize_t size;
6922
6923         if (!count)
6924                 return 0;
6925
6926 #ifdef CONFIG_TRACER_MAX_TRACE
6927         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6928                 return -EBUSY;
6929 #endif
6930
6931         if (!info->spare) {
6932                 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
6933                                                           iter->cpu_file);
6934                 if (IS_ERR(info->spare)) {
6935                         ret = PTR_ERR(info->spare);
6936                         info->spare = NULL;
6937                 } else {
6938                         info->spare_cpu = iter->cpu_file;
6939                 }
6940         }
6941         if (!info->spare)
6942                 return ret;
6943
6944         /* Do we have previous read data to read? */
6945         if (info->read < PAGE_SIZE)
6946                 goto read;
6947
6948  again:
6949         trace_access_lock(iter->cpu_file);
6950         ret = ring_buffer_read_page(iter->trace_buffer->buffer,
6951                                     &info->spare,
6952                                     count,
6953                                     iter->cpu_file, 0);
6954         trace_access_unlock(iter->cpu_file);
6955
6956         if (ret < 0) {
6957                 if (trace_empty(iter)) {
6958                         if ((filp->f_flags & O_NONBLOCK))
6959                                 return -EAGAIN;
6960
6961                         ret = wait_on_pipe(iter, 0);
6962                         if (ret)
6963                                 return ret;
6964
6965                         goto again;
6966                 }
6967                 return 0;
6968         }
6969
6970         info->read = 0;
6971  read:
6972         size = PAGE_SIZE - info->read;
6973         if (size > count)
6974                 size = count;
6975
6976         ret = copy_to_user(ubuf, info->spare + info->read, size);
6977         if (ret == size)
6978                 return -EFAULT;
6979
6980         size -= ret;
6981
6982         *ppos += size;
6983         info->read += size;
6984
6985         return size;
6986 }
6987
6988 static int tracing_buffers_release(struct inode *inode, struct file *file)
6989 {
6990         struct ftrace_buffer_info *info = file->private_data;
6991         struct trace_iterator *iter = &info->iter;
6992
6993         mutex_lock(&trace_types_lock);
6994
6995         iter->tr->current_trace->ref--;
6996
6997         __trace_array_put(iter->tr);
6998
6999         if (info->spare)
7000                 ring_buffer_free_read_page(iter->trace_buffer->buffer,
7001                                            info->spare_cpu, info->spare);
7002         kfree(info);
7003
7004         mutex_unlock(&trace_types_lock);
7005
7006         return 0;
7007 }
7008
7009 struct buffer_ref {
7010         struct ring_buffer      *buffer;
7011         void                    *page;
7012         int                     cpu;
7013         int                     ref;
7014 };
7015
7016 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
7017                                     struct pipe_buffer *buf)
7018 {
7019         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7020
7021         if (--ref->ref)
7022                 return;
7023
7024         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
7025         kfree(ref);
7026         buf->private = 0;
7027 }
7028
7029 static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
7030                                 struct pipe_buffer *buf)
7031 {
7032         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7033
7034         ref->ref++;
7035 }
7036
7037 /* Pipe buffer operations for a buffer. */
7038 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
7039         .can_merge              = 0,
7040         .confirm                = generic_pipe_buf_confirm,
7041         .release                = buffer_pipe_buf_release,
7042         .steal                  = generic_pipe_buf_steal,
7043         .get                    = buffer_pipe_buf_get,
7044 };
7045
7046 /*
7047  * Callback from splice_to_pipe(), if we need to release some pages
7048  * at the end of the spd in case we error'ed out in filling the pipe.
7049  */
7050 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
7051 {
7052         struct buffer_ref *ref =
7053                 (struct buffer_ref *)spd->partial[i].private;
7054
7055         if (--ref->ref)
7056                 return;
7057
7058         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
7059         kfree(ref);
7060         spd->partial[i].private = 0;
7061 }
7062
7063 static ssize_t
7064 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7065                             struct pipe_inode_info *pipe, size_t len,
7066                             unsigned int flags)
7067 {
7068         struct ftrace_buffer_info *info = file->private_data;
7069         struct trace_iterator *iter = &info->iter;
7070         struct partial_page partial_def[PIPE_DEF_BUFFERS];
7071         struct page *pages_def[PIPE_DEF_BUFFERS];
7072         struct splice_pipe_desc spd = {
7073                 .pages          = pages_def,
7074                 .partial        = partial_def,
7075                 .nr_pages_max   = PIPE_DEF_BUFFERS,
7076                 .ops            = &buffer_pipe_buf_ops,
7077                 .spd_release    = buffer_spd_release,
7078         };
7079         struct buffer_ref *ref;
7080         int entries, i;
7081         ssize_t ret = 0;
7082
7083 #ifdef CONFIG_TRACER_MAX_TRACE
7084         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7085                 return -EBUSY;
7086 #endif
7087
7088         if (*ppos & (PAGE_SIZE - 1))
7089                 return -EINVAL;
7090
7091         if (len & (PAGE_SIZE - 1)) {
7092                 if (len < PAGE_SIZE)
7093                         return -EINVAL;
7094                 len &= PAGE_MASK;
7095         }
7096
7097         if (splice_grow_spd(pipe, &spd))
7098                 return -ENOMEM;
7099
7100  again:
7101         trace_access_lock(iter->cpu_file);
7102         entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
7103
7104         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
7105                 struct page *page;
7106                 int r;
7107
7108                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
7109                 if (!ref) {
7110                         ret = -ENOMEM;
7111                         break;
7112                 }
7113
7114                 ref->ref = 1;
7115                 ref->buffer = iter->trace_buffer->buffer;
7116                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
7117                 if (IS_ERR(ref->page)) {
7118                         ret = PTR_ERR(ref->page);
7119                         ref->page = NULL;
7120                         kfree(ref);
7121                         break;
7122                 }
7123                 ref->cpu = iter->cpu_file;
7124
7125                 r = ring_buffer_read_page(ref->buffer, &ref->page,
7126                                           len, iter->cpu_file, 1);
7127                 if (r < 0) {
7128                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
7129                                                    ref->page);
7130                         kfree(ref);
7131                         break;
7132                 }
7133
7134                 page = virt_to_page(ref->page);
7135
7136                 spd.pages[i] = page;
7137                 spd.partial[i].len = PAGE_SIZE;
7138                 spd.partial[i].offset = 0;
7139                 spd.partial[i].private = (unsigned long)ref;
7140                 spd.nr_pages++;
7141                 *ppos += PAGE_SIZE;
7142
7143                 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
7144         }
7145
7146         trace_access_unlock(iter->cpu_file);
7147         spd.nr_pages = i;
7148
7149         /* did we read anything? */
7150         if (!spd.nr_pages) {
7151                 if (ret)
7152                         goto out;
7153
7154                 ret = -EAGAIN;
7155                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
7156                         goto out;
7157
7158                 ret = wait_on_pipe(iter, iter->tr->buffer_percent);
7159                 if (ret)
7160                         goto out;
7161
7162                 goto again;
7163         }
7164
7165         ret = splice_to_pipe(pipe, &spd);
7166 out:
7167         splice_shrink_spd(&spd);
7168
7169         return ret;
7170 }
7171
7172 static const struct file_operations tracing_buffers_fops = {
7173         .open           = tracing_buffers_open,
7174         .read           = tracing_buffers_read,
7175         .poll           = tracing_buffers_poll,
7176         .release        = tracing_buffers_release,
7177         .splice_read    = tracing_buffers_splice_read,
7178         .llseek         = no_llseek,
7179 };
7180
7181 static ssize_t
7182 tracing_stats_read(struct file *filp, char __user *ubuf,
7183                    size_t count, loff_t *ppos)
7184 {
7185         struct inode *inode = file_inode(filp);
7186         struct trace_array *tr = inode->i_private;
7187         struct trace_buffer *trace_buf = &tr->trace_buffer;
7188         int cpu = tracing_get_cpu(inode);
7189         struct trace_seq *s;
7190         unsigned long cnt;
7191         unsigned long long t;
7192         unsigned long usec_rem;
7193
7194         s = kmalloc(sizeof(*s), GFP_KERNEL);
7195         if (!s)
7196                 return -ENOMEM;
7197
7198         trace_seq_init(s);
7199
7200         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
7201         trace_seq_printf(s, "entries: %ld\n", cnt);
7202
7203         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
7204         trace_seq_printf(s, "overrun: %ld\n", cnt);
7205
7206         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
7207         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
7208
7209         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
7210         trace_seq_printf(s, "bytes: %ld\n", cnt);
7211
7212         if (trace_clocks[tr->clock_id].in_ns) {
7213                 /* local or global for trace_clock */
7214                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7215                 usec_rem = do_div(t, USEC_PER_SEC);
7216                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
7217                                                                 t, usec_rem);
7218
7219                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
7220                 usec_rem = do_div(t, USEC_PER_SEC);
7221                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
7222         } else {
7223                 /* counter or tsc mode for trace_clock */
7224                 trace_seq_printf(s, "oldest event ts: %llu\n",
7225                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7226
7227                 trace_seq_printf(s, "now ts: %llu\n",
7228                                 ring_buffer_time_stamp(trace_buf->buffer, cpu));
7229         }
7230
7231         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
7232         trace_seq_printf(s, "dropped events: %ld\n", cnt);
7233
7234         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
7235         trace_seq_printf(s, "read events: %ld\n", cnt);
7236
7237         count = simple_read_from_buffer(ubuf, count, ppos,
7238                                         s->buffer, trace_seq_used(s));
7239
7240         kfree(s);
7241
7242         return count;
7243 }
7244
7245 static const struct file_operations tracing_stats_fops = {
7246         .open           = tracing_open_generic_tr,
7247         .read           = tracing_stats_read,
7248         .llseek         = generic_file_llseek,
7249         .release        = tracing_release_generic_tr,
7250 };
7251
7252 #ifdef CONFIG_DYNAMIC_FTRACE
7253
7254 static ssize_t
7255 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
7256                   size_t cnt, loff_t *ppos)
7257 {
7258         unsigned long *p = filp->private_data;
7259         char buf[64]; /* Not too big for a shallow stack */
7260         int r;
7261
7262         r = scnprintf(buf, 63, "%ld", *p);
7263         buf[r++] = '\n';
7264
7265         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7266 }
7267
7268 static const struct file_operations tracing_dyn_info_fops = {
7269         .open           = tracing_open_generic,
7270         .read           = tracing_read_dyn_info,
7271         .llseek         = generic_file_llseek,
7272 };
7273 #endif /* CONFIG_DYNAMIC_FTRACE */
7274
7275 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
7276 static void
7277 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
7278                 struct trace_array *tr, struct ftrace_probe_ops *ops,
7279                 void *data)
7280 {
7281         tracing_snapshot_instance(tr);
7282 }
7283
7284 static void
7285 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
7286                       struct trace_array *tr, struct ftrace_probe_ops *ops,
7287                       void *data)
7288 {
7289         struct ftrace_func_mapper *mapper = data;
7290         long *count = NULL;
7291
7292         if (mapper)
7293                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7294
7295         if (count) {
7296
7297                 if (*count <= 0)
7298                         return;
7299
7300                 (*count)--;
7301         }
7302
7303         tracing_snapshot_instance(tr);
7304 }
7305
7306 static int
7307 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
7308                       struct ftrace_probe_ops *ops, void *data)
7309 {
7310         struct ftrace_func_mapper *mapper = data;
7311         long *count = NULL;
7312
7313         seq_printf(m, "%ps:", (void *)ip);
7314
7315         seq_puts(m, "snapshot");
7316
7317         if (mapper)
7318                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7319
7320         if (count)
7321                 seq_printf(m, ":count=%ld\n", *count);
7322         else
7323                 seq_puts(m, ":unlimited\n");
7324
7325         return 0;
7326 }
7327
7328 static int
7329 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
7330                      unsigned long ip, void *init_data, void **data)
7331 {
7332         struct ftrace_func_mapper *mapper = *data;
7333
7334         if (!mapper) {
7335                 mapper = allocate_ftrace_func_mapper();
7336                 if (!mapper)
7337                         return -ENOMEM;
7338                 *data = mapper;
7339         }
7340
7341         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
7342 }
7343
7344 static void
7345 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
7346                      unsigned long ip, void *data)
7347 {
7348         struct ftrace_func_mapper *mapper = data;
7349
7350         if (!ip) {
7351                 if (!mapper)
7352                         return;
7353                 free_ftrace_func_mapper(mapper, NULL);
7354                 return;
7355         }
7356
7357         ftrace_func_mapper_remove_ip(mapper, ip);
7358 }
7359
7360 static struct ftrace_probe_ops snapshot_probe_ops = {
7361         .func                   = ftrace_snapshot,
7362         .print                  = ftrace_snapshot_print,
7363 };
7364
7365 static struct ftrace_probe_ops snapshot_count_probe_ops = {
7366         .func                   = ftrace_count_snapshot,
7367         .print                  = ftrace_snapshot_print,
7368         .init                   = ftrace_snapshot_init,
7369         .free                   = ftrace_snapshot_free,
7370 };
7371
7372 static int
7373 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
7374                                char *glob, char *cmd, char *param, int enable)
7375 {
7376         struct ftrace_probe_ops *ops;
7377         void *count = (void *)-1;
7378         char *number;
7379         int ret;
7380
7381         if (!tr)
7382                 return -ENODEV;
7383
7384         /* hash funcs only work with set_ftrace_filter */
7385         if (!enable)
7386                 return -EINVAL;
7387
7388         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
7389
7390         if (glob[0] == '!')
7391                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
7392
7393         if (!param)
7394                 goto out_reg;
7395
7396         number = strsep(&param, ":");
7397
7398         if (!strlen(number))
7399                 goto out_reg;
7400
7401         /*
7402          * We use the callback data field (which is a pointer)
7403          * as our counter.
7404          */
7405         ret = kstrtoul(number, 0, (unsigned long *)&count);
7406         if (ret)
7407                 return ret;
7408
7409  out_reg:
7410         ret = tracing_alloc_snapshot_instance(tr);
7411         if (ret < 0)
7412                 goto out;
7413
7414         ret = register_ftrace_function_probe(glob, tr, ops, count);
7415
7416  out:
7417         return ret < 0 ? ret : 0;
7418 }
7419
7420 static struct ftrace_func_command ftrace_snapshot_cmd = {
7421         .name                   = "snapshot",
7422         .func                   = ftrace_trace_snapshot_callback,
7423 };
7424
7425 static __init int register_snapshot_cmd(void)
7426 {
7427         return register_ftrace_command(&ftrace_snapshot_cmd);
7428 }
7429 #else
7430 static inline __init int register_snapshot_cmd(void) { return 0; }
7431 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
7432
7433 static struct dentry *tracing_get_dentry(struct trace_array *tr)
7434 {
7435         if (WARN_ON(!tr->dir))
7436                 return ERR_PTR(-ENODEV);
7437
7438         /* Top directory uses NULL as the parent */
7439         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
7440                 return NULL;
7441
7442         /* All sub buffers have a descriptor */
7443         return tr->dir;
7444 }
7445
7446 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
7447 {
7448         struct dentry *d_tracer;
7449
7450         if (tr->percpu_dir)
7451                 return tr->percpu_dir;
7452
7453         d_tracer = tracing_get_dentry(tr);
7454         if (IS_ERR(d_tracer))
7455                 return NULL;
7456
7457         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
7458
7459         WARN_ONCE(!tr->percpu_dir,
7460                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
7461
7462         return tr->percpu_dir;
7463 }
7464
7465 static struct dentry *
7466 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
7467                       void *data, long cpu, const struct file_operations *fops)
7468 {
7469         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
7470
7471         if (ret) /* See tracing_get_cpu() */
7472                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
7473         return ret;
7474 }
7475
7476 static void
7477 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
7478 {
7479         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
7480         struct dentry *d_cpu;
7481         char cpu_dir[30]; /* 30 characters should be more than enough */
7482
7483         if (!d_percpu)
7484                 return;
7485
7486         snprintf(cpu_dir, 30, "cpu%ld", cpu);
7487         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
7488         if (!d_cpu) {
7489                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
7490                 return;
7491         }
7492
7493         /* per cpu trace_pipe */
7494         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
7495                                 tr, cpu, &tracing_pipe_fops);
7496
7497         /* per cpu trace */
7498         trace_create_cpu_file("trace", 0644, d_cpu,
7499                                 tr, cpu, &tracing_fops);
7500
7501         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
7502                                 tr, cpu, &tracing_buffers_fops);
7503
7504         trace_create_cpu_file("stats", 0444, d_cpu,
7505                                 tr, cpu, &tracing_stats_fops);
7506
7507         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
7508                                 tr, cpu, &tracing_entries_fops);
7509
7510 #ifdef CONFIG_TRACER_SNAPSHOT
7511         trace_create_cpu_file("snapshot", 0644, d_cpu,
7512                                 tr, cpu, &snapshot_fops);
7513
7514         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
7515                                 tr, cpu, &snapshot_raw_fops);
7516 #endif
7517 }
7518
7519 #ifdef CONFIG_FTRACE_SELFTEST
7520 /* Let selftest have access to static functions in this file */
7521 #include "trace_selftest.c"
7522 #endif
7523
7524 static ssize_t
7525 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
7526                         loff_t *ppos)
7527 {
7528         struct trace_option_dentry *topt = filp->private_data;
7529         char *buf;
7530
7531         if (topt->flags->val & topt->opt->bit)
7532                 buf = "1\n";
7533         else
7534                 buf = "0\n";
7535
7536         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7537 }
7538
7539 static ssize_t
7540 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
7541                          loff_t *ppos)
7542 {
7543         struct trace_option_dentry *topt = filp->private_data;
7544         unsigned long val;
7545         int ret;
7546
7547         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7548         if (ret)
7549                 return ret;
7550
7551         if (val != 0 && val != 1)
7552                 return -EINVAL;
7553
7554         if (!!(topt->flags->val & topt->opt->bit) != val) {
7555                 mutex_lock(&trace_types_lock);
7556                 ret = __set_tracer_option(topt->tr, topt->flags,
7557                                           topt->opt, !val);
7558                 mutex_unlock(&trace_types_lock);
7559                 if (ret)
7560                         return ret;
7561         }
7562
7563         *ppos += cnt;
7564
7565         return cnt;
7566 }
7567
7568
7569 static const struct file_operations trace_options_fops = {
7570         .open = tracing_open_generic,
7571         .read = trace_options_read,
7572         .write = trace_options_write,
7573         .llseek = generic_file_llseek,
7574 };
7575
7576 /*
7577  * In order to pass in both the trace_array descriptor as well as the index
7578  * to the flag that the trace option file represents, the trace_array
7579  * has a character array of trace_flags_index[], which holds the index
7580  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
7581  * The address of this character array is passed to the flag option file
7582  * read/write callbacks.
7583  *
7584  * In order to extract both the index and the trace_array descriptor,
7585  * get_tr_index() uses the following algorithm.
7586  *
7587  *   idx = *ptr;
7588  *
7589  * As the pointer itself contains the address of the index (remember
7590  * index[1] == 1).
7591  *
7592  * Then to get the trace_array descriptor, by subtracting that index
7593  * from the ptr, we get to the start of the index itself.
7594  *
7595  *   ptr - idx == &index[0]
7596  *
7597  * Then a simple container_of() from that pointer gets us to the
7598  * trace_array descriptor.
7599  */
7600 static void get_tr_index(void *data, struct trace_array **ptr,
7601                          unsigned int *pindex)
7602 {
7603         *pindex = *(unsigned char *)data;
7604
7605         *ptr = container_of(data - *pindex, struct trace_array,
7606                             trace_flags_index);
7607 }
7608
7609 static ssize_t
7610 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
7611                         loff_t *ppos)
7612 {
7613         void *tr_index = filp->private_data;
7614         struct trace_array *tr;
7615         unsigned int index;
7616         char *buf;
7617
7618         get_tr_index(tr_index, &tr, &index);
7619
7620         if (tr->trace_flags & (1 << index))
7621                 buf = "1\n";
7622         else
7623                 buf = "0\n";
7624
7625         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7626 }
7627
7628 static ssize_t
7629 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
7630                          loff_t *ppos)
7631 {
7632         void *tr_index = filp->private_data;
7633         struct trace_array *tr;
7634         unsigned int index;
7635         unsigned long val;
7636         int ret;
7637
7638         get_tr_index(tr_index, &tr, &index);
7639
7640         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7641         if (ret)
7642                 return ret;
7643
7644         if (val != 0 && val != 1)
7645                 return -EINVAL;
7646
7647         mutex_lock(&trace_types_lock);
7648         ret = set_tracer_flag(tr, 1 << index, val);
7649         mutex_unlock(&trace_types_lock);
7650
7651         if (ret < 0)
7652                 return ret;
7653
7654         *ppos += cnt;
7655
7656         return cnt;
7657 }
7658
7659 static const struct file_operations trace_options_core_fops = {
7660         .open = tracing_open_generic,
7661         .read = trace_options_core_read,
7662         .write = trace_options_core_write,
7663         .llseek = generic_file_llseek,
7664 };
7665
7666 struct dentry *trace_create_file(const char *name,
7667                                  umode_t mode,
7668                                  struct dentry *parent,
7669                                  void *data,
7670                                  const struct file_operations *fops)
7671 {
7672         struct dentry *ret;
7673
7674         ret = tracefs_create_file(name, mode, parent, data, fops);
7675         if (!ret)
7676                 pr_warn("Could not create tracefs '%s' entry\n", name);
7677
7678         return ret;
7679 }
7680
7681
7682 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
7683 {
7684         struct dentry *d_tracer;
7685
7686         if (tr->options)
7687                 return tr->options;
7688
7689         d_tracer = tracing_get_dentry(tr);
7690         if (IS_ERR(d_tracer))
7691                 return NULL;
7692
7693         tr->options = tracefs_create_dir("options", d_tracer);
7694         if (!tr->options) {
7695                 pr_warn("Could not create tracefs directory 'options'\n");
7696                 return NULL;
7697         }
7698
7699         return tr->options;
7700 }
7701
7702 static void
7703 create_trace_option_file(struct trace_array *tr,
7704                          struct trace_option_dentry *topt,
7705                          struct tracer_flags *flags,
7706                          struct tracer_opt *opt)
7707 {
7708         struct dentry *t_options;
7709
7710         t_options = trace_options_init_dentry(tr);
7711         if (!t_options)
7712                 return;
7713
7714         topt->flags = flags;
7715         topt->opt = opt;
7716         topt->tr = tr;
7717
7718         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
7719                                     &trace_options_fops);
7720
7721 }
7722
7723 static void
7724 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
7725 {
7726         struct trace_option_dentry *topts;
7727         struct trace_options *tr_topts;
7728         struct tracer_flags *flags;
7729         struct tracer_opt *opts;
7730         int cnt;
7731         int i;
7732
7733         if (!tracer)
7734                 return;
7735
7736         flags = tracer->flags;
7737
7738         if (!flags || !flags->opts)
7739                 return;
7740
7741         /*
7742          * If this is an instance, only create flags for tracers
7743          * the instance may have.
7744          */
7745         if (!trace_ok_for_array(tracer, tr))
7746                 return;
7747
7748         for (i = 0; i < tr->nr_topts; i++) {
7749                 /* Make sure there's no duplicate flags. */
7750                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
7751                         return;
7752         }
7753
7754         opts = flags->opts;
7755
7756         for (cnt = 0; opts[cnt].name; cnt++)
7757                 ;
7758
7759         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
7760         if (!topts)
7761                 return;
7762
7763         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
7764                             GFP_KERNEL);
7765         if (!tr_topts) {
7766                 kfree(topts);
7767                 return;
7768         }
7769
7770         tr->topts = tr_topts;
7771         tr->topts[tr->nr_topts].tracer = tracer;
7772         tr->topts[tr->nr_topts].topts = topts;
7773         tr->nr_topts++;
7774
7775         for (cnt = 0; opts[cnt].name; cnt++) {
7776                 create_trace_option_file(tr, &topts[cnt], flags,
7777                                          &opts[cnt]);
7778                 WARN_ONCE(topts[cnt].entry == NULL,
7779                           "Failed to create trace option: %s",
7780                           opts[cnt].name);
7781         }
7782 }
7783
7784 static struct dentry *
7785 create_trace_option_core_file(struct trace_array *tr,
7786                               const char *option, long index)
7787 {
7788         struct dentry *t_options;
7789
7790         t_options = trace_options_init_dentry(tr);
7791         if (!t_options)
7792                 return NULL;
7793
7794         return trace_create_file(option, 0644, t_options,
7795                                  (void *)&tr->trace_flags_index[index],
7796                                  &trace_options_core_fops);
7797 }
7798
7799 static void create_trace_options_dir(struct trace_array *tr)
7800 {
7801         struct dentry *t_options;
7802         bool top_level = tr == &global_trace;
7803         int i;
7804
7805         t_options = trace_options_init_dentry(tr);
7806         if (!t_options)
7807                 return;
7808
7809         for (i = 0; trace_options[i]; i++) {
7810                 if (top_level ||
7811                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
7812                         create_trace_option_core_file(tr, trace_options[i], i);
7813         }
7814 }
7815
7816 static ssize_t
7817 rb_simple_read(struct file *filp, char __user *ubuf,
7818                size_t cnt, loff_t *ppos)
7819 {
7820         struct trace_array *tr = filp->private_data;
7821         char buf[64];
7822         int r;
7823
7824         r = tracer_tracing_is_on(tr);
7825         r = sprintf(buf, "%d\n", r);
7826
7827         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7828 }
7829
7830 static ssize_t
7831 rb_simple_write(struct file *filp, const char __user *ubuf,
7832                 size_t cnt, loff_t *ppos)
7833 {
7834         struct trace_array *tr = filp->private_data;
7835         struct ring_buffer *buffer = tr->trace_buffer.buffer;
7836         unsigned long val;
7837         int ret;
7838
7839         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7840         if (ret)
7841                 return ret;
7842
7843         if (buffer) {
7844                 mutex_lock(&trace_types_lock);
7845                 if (!!val == tracer_tracing_is_on(tr)) {
7846                         val = 0; /* do nothing */
7847                 } else if (val) {
7848                         tracer_tracing_on(tr);
7849                         if (tr->current_trace->start)
7850                                 tr->current_trace->start(tr);
7851                 } else {
7852                         tracer_tracing_off(tr);
7853                         if (tr->current_trace->stop)
7854                                 tr->current_trace->stop(tr);
7855                 }
7856                 mutex_unlock(&trace_types_lock);
7857         }
7858
7859         (*ppos)++;
7860
7861         return cnt;
7862 }
7863
7864 static const struct file_operations rb_simple_fops = {
7865         .open           = tracing_open_generic_tr,
7866         .read           = rb_simple_read,
7867         .write          = rb_simple_write,
7868         .release        = tracing_release_generic_tr,
7869         .llseek         = default_llseek,
7870 };
7871
7872 static ssize_t
7873 buffer_percent_read(struct file *filp, char __user *ubuf,
7874                     size_t cnt, loff_t *ppos)
7875 {
7876         struct trace_array *tr = filp->private_data;
7877         char buf[64];
7878         int r;
7879
7880         r = tr->buffer_percent;
7881         r = sprintf(buf, "%d\n", r);
7882
7883         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7884 }
7885
7886 static ssize_t
7887 buffer_percent_write(struct file *filp, const char __user *ubuf,
7888                      size_t cnt, loff_t *ppos)
7889 {
7890         struct trace_array *tr = filp->private_data;
7891         unsigned long val;
7892         int ret;
7893
7894         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7895         if (ret)
7896                 return ret;
7897
7898         if (val > 100)
7899                 return -EINVAL;
7900
7901         if (!val)
7902                 val = 1;
7903
7904         tr->buffer_percent = val;
7905
7906         (*ppos)++;
7907
7908         return cnt;
7909 }
7910
7911 static const struct file_operations buffer_percent_fops = {
7912         .open           = tracing_open_generic_tr,
7913         .read           = buffer_percent_read,
7914         .write          = buffer_percent_write,
7915         .release        = tracing_release_generic_tr,
7916         .llseek         = default_llseek,
7917 };
7918
7919 struct dentry *trace_instance_dir;
7920
7921 static void
7922 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
7923
7924 static int
7925 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
7926 {
7927         enum ring_buffer_flags rb_flags;
7928
7929         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
7930
7931         buf->tr = tr;
7932
7933         buf->buffer = ring_buffer_alloc(size, rb_flags);
7934         if (!buf->buffer)
7935                 return -ENOMEM;
7936
7937         buf->data = alloc_percpu(struct trace_array_cpu);
7938         if (!buf->data) {
7939                 ring_buffer_free(buf->buffer);
7940                 buf->buffer = NULL;
7941                 return -ENOMEM;
7942         }
7943
7944         /* Allocate the first page for all buffers */
7945         set_buffer_entries(&tr->trace_buffer,
7946                            ring_buffer_size(tr->trace_buffer.buffer, 0));
7947
7948         return 0;
7949 }
7950
7951 static int allocate_trace_buffers(struct trace_array *tr, int size)
7952 {
7953         int ret;
7954
7955         ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
7956         if (ret)
7957                 return ret;
7958
7959 #ifdef CONFIG_TRACER_MAX_TRACE
7960         ret = allocate_trace_buffer(tr, &tr->max_buffer,
7961                                     allocate_snapshot ? size : 1);
7962         if (WARN_ON(ret)) {
7963                 ring_buffer_free(tr->trace_buffer.buffer);
7964                 tr->trace_buffer.buffer = NULL;
7965                 free_percpu(tr->trace_buffer.data);
7966                 tr->trace_buffer.data = NULL;
7967                 return -ENOMEM;
7968         }
7969         tr->allocated_snapshot = allocate_snapshot;
7970
7971         /*
7972          * Only the top level trace array gets its snapshot allocated
7973          * from the kernel command line.
7974          */
7975         allocate_snapshot = false;
7976 #endif
7977         return 0;
7978 }
7979
7980 static void free_trace_buffer(struct trace_buffer *buf)
7981 {
7982         if (buf->buffer) {
7983                 ring_buffer_free(buf->buffer);
7984                 buf->buffer = NULL;
7985                 free_percpu(buf->data);
7986                 buf->data = NULL;
7987         }
7988 }
7989
7990 static void free_trace_buffers(struct trace_array *tr)
7991 {
7992         if (!tr)
7993                 return;
7994
7995         free_trace_buffer(&tr->trace_buffer);
7996
7997 #ifdef CONFIG_TRACER_MAX_TRACE
7998         free_trace_buffer(&tr->max_buffer);
7999 #endif
8000 }
8001
8002 static void init_trace_flags_index(struct trace_array *tr)
8003 {
8004         int i;
8005
8006         /* Used by the trace options files */
8007         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
8008                 tr->trace_flags_index[i] = i;
8009 }
8010
8011 static void __update_tracer_options(struct trace_array *tr)
8012 {
8013         struct tracer *t;
8014
8015         for (t = trace_types; t; t = t->next)
8016                 add_tracer_options(tr, t);
8017 }
8018
8019 static void update_tracer_options(struct trace_array *tr)
8020 {
8021         mutex_lock(&trace_types_lock);
8022         __update_tracer_options(tr);
8023         mutex_unlock(&trace_types_lock);
8024 }
8025
8026 static int instance_mkdir(const char *name)
8027 {
8028         struct trace_array *tr;
8029         int ret;
8030
8031         mutex_lock(&event_mutex);
8032         mutex_lock(&trace_types_lock);
8033
8034         ret = -EEXIST;
8035         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8036                 if (tr->name && strcmp(tr->name, name) == 0)
8037                         goto out_unlock;
8038         }
8039
8040         ret = -ENOMEM;
8041         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
8042         if (!tr)
8043                 goto out_unlock;
8044
8045         tr->name = kstrdup(name, GFP_KERNEL);
8046         if (!tr->name)
8047                 goto out_free_tr;
8048
8049         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
8050                 goto out_free_tr;
8051
8052         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
8053
8054         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
8055
8056         raw_spin_lock_init(&tr->start_lock);
8057
8058         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8059
8060         tr->current_trace = &nop_trace;
8061
8062         INIT_LIST_HEAD(&tr->systems);
8063         INIT_LIST_HEAD(&tr->events);
8064         INIT_LIST_HEAD(&tr->hist_vars);
8065
8066         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
8067                 goto out_free_tr;
8068
8069         tr->dir = tracefs_create_dir(name, trace_instance_dir);
8070         if (!tr->dir)
8071                 goto out_free_tr;
8072
8073         ret = event_trace_add_tracer(tr->dir, tr);
8074         if (ret) {
8075                 tracefs_remove_recursive(tr->dir);
8076                 goto out_free_tr;
8077         }
8078
8079         ftrace_init_trace_array(tr);
8080
8081         init_tracer_tracefs(tr, tr->dir);
8082         init_trace_flags_index(tr);
8083         __update_tracer_options(tr);
8084
8085         list_add(&tr->list, &ftrace_trace_arrays);
8086
8087         mutex_unlock(&trace_types_lock);
8088         mutex_unlock(&event_mutex);
8089
8090         return 0;
8091
8092  out_free_tr:
8093         free_trace_buffers(tr);
8094         free_cpumask_var(tr->tracing_cpumask);
8095         kfree(tr->name);
8096         kfree(tr);
8097
8098  out_unlock:
8099         mutex_unlock(&trace_types_lock);
8100         mutex_unlock(&event_mutex);
8101
8102         return ret;
8103
8104 }
8105
8106 static int instance_rmdir(const char *name)
8107 {
8108         struct trace_array *tr;
8109         int found = 0;
8110         int ret;
8111         int i;
8112
8113         mutex_lock(&event_mutex);
8114         mutex_lock(&trace_types_lock);
8115
8116         ret = -ENODEV;
8117         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8118                 if (tr->name && strcmp(tr->name, name) == 0) {
8119                         found = 1;
8120                         break;
8121                 }
8122         }
8123         if (!found)
8124                 goto out_unlock;
8125
8126         ret = -EBUSY;
8127         if (tr->ref || (tr->current_trace && tr->current_trace->ref))
8128                 goto out_unlock;
8129
8130         list_del(&tr->list);
8131
8132         /* Disable all the flags that were enabled coming in */
8133         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
8134                 if ((1 << i) & ZEROED_TRACE_FLAGS)
8135                         set_tracer_flag(tr, 1 << i, 0);
8136         }
8137
8138         tracing_set_nop(tr);
8139         clear_ftrace_function_probes(tr);
8140         event_trace_del_tracer(tr);
8141         ftrace_clear_pids(tr);
8142         ftrace_destroy_function_files(tr);
8143         tracefs_remove_recursive(tr->dir);
8144         free_trace_buffers(tr);
8145
8146         for (i = 0; i < tr->nr_topts; i++) {
8147                 kfree(tr->topts[i].topts);
8148         }
8149         kfree(tr->topts);
8150
8151         free_cpumask_var(tr->tracing_cpumask);
8152         kfree(tr->name);
8153         kfree(tr);
8154
8155         ret = 0;
8156
8157  out_unlock:
8158         mutex_unlock(&trace_types_lock);
8159         mutex_unlock(&event_mutex);
8160
8161         return ret;
8162 }
8163
8164 static __init void create_trace_instances(struct dentry *d_tracer)
8165 {
8166         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
8167                                                          instance_mkdir,
8168                                                          instance_rmdir);
8169         if (WARN_ON(!trace_instance_dir))
8170                 return;
8171 }
8172
8173 static void
8174 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
8175 {
8176         struct trace_event_file *file;
8177         int cpu;
8178
8179         trace_create_file("available_tracers", 0444, d_tracer,
8180                         tr, &show_traces_fops);
8181
8182         trace_create_file("current_tracer", 0644, d_tracer,
8183                         tr, &set_tracer_fops);
8184
8185         trace_create_file("tracing_cpumask", 0644, d_tracer,
8186                           tr, &tracing_cpumask_fops);
8187
8188         trace_create_file("trace_options", 0644, d_tracer,
8189                           tr, &tracing_iter_fops);
8190
8191         trace_create_file("trace", 0644, d_tracer,
8192                           tr, &tracing_fops);
8193
8194         trace_create_file("trace_pipe", 0444, d_tracer,
8195                           tr, &tracing_pipe_fops);
8196
8197         trace_create_file("buffer_size_kb", 0644, d_tracer,
8198                           tr, &tracing_entries_fops);
8199
8200         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
8201                           tr, &tracing_total_entries_fops);
8202
8203         trace_create_file("free_buffer", 0200, d_tracer,
8204                           tr, &tracing_free_buffer_fops);
8205
8206         trace_create_file("trace_marker", 0220, d_tracer,
8207                           tr, &tracing_mark_fops);
8208
8209         file = __find_event_file(tr, "ftrace", "print");
8210         if (file && file->dir)
8211                 trace_create_file("trigger", 0644, file->dir, file,
8212                                   &event_trigger_fops);
8213         tr->trace_marker_file = file;
8214
8215         trace_create_file("trace_marker_raw", 0220, d_tracer,
8216                           tr, &tracing_mark_raw_fops);
8217
8218         trace_create_file("trace_clock", 0644, d_tracer, tr,
8219                           &trace_clock_fops);
8220
8221         trace_create_file("tracing_on", 0644, d_tracer,
8222                           tr, &rb_simple_fops);
8223
8224         trace_create_file("timestamp_mode", 0444, d_tracer, tr,
8225                           &trace_time_stamp_mode_fops);
8226
8227         tr->buffer_percent = 50;
8228
8229         trace_create_file("buffer_percent", 0444, d_tracer,
8230                         tr, &buffer_percent_fops);
8231
8232         create_trace_options_dir(tr);
8233
8234 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
8235         trace_create_file("tracing_max_latency", 0644, d_tracer,
8236                         &tr->max_latency, &tracing_max_lat_fops);
8237 #endif
8238
8239         if (ftrace_create_function_files(tr, d_tracer))
8240                 WARN(1, "Could not allocate function filter files");
8241
8242 #ifdef CONFIG_TRACER_SNAPSHOT
8243         trace_create_file("snapshot", 0644, d_tracer,
8244                           tr, &snapshot_fops);
8245 #endif
8246
8247         for_each_tracing_cpu(cpu)
8248                 tracing_init_tracefs_percpu(tr, cpu);
8249
8250         ftrace_init_tracefs(tr, d_tracer);
8251 }
8252
8253 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
8254 {
8255         struct vfsmount *mnt;
8256         struct file_system_type *type;
8257
8258         /*
8259          * To maintain backward compatibility for tools that mount
8260          * debugfs to get to the tracing facility, tracefs is automatically
8261          * mounted to the debugfs/tracing directory.
8262          */
8263         type = get_fs_type("tracefs");
8264         if (!type)
8265                 return NULL;
8266         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
8267         put_filesystem(type);
8268         if (IS_ERR(mnt))
8269                 return NULL;
8270         mntget(mnt);
8271
8272         return mnt;
8273 }
8274
8275 /**
8276  * tracing_init_dentry - initialize top level trace array
8277  *
8278  * This is called when creating files or directories in the tracing
8279  * directory. It is called via fs_initcall() by any of the boot up code
8280  * and expects to return the dentry of the top level tracing directory.
8281  */
8282 struct dentry *tracing_init_dentry(void)
8283 {
8284         struct trace_array *tr = &global_trace;
8285
8286         /* The top level trace array uses  NULL as parent */
8287         if (tr->dir)
8288                 return NULL;
8289
8290         if (WARN_ON(!tracefs_initialized()) ||
8291                 (IS_ENABLED(CONFIG_DEBUG_FS) &&
8292                  WARN_ON(!debugfs_initialized())))
8293                 return ERR_PTR(-ENODEV);
8294
8295         /*
8296          * As there may still be users that expect the tracing
8297          * files to exist in debugfs/tracing, we must automount
8298          * the tracefs file system there, so older tools still
8299          * work with the newer kerenl.
8300          */
8301         tr->dir = debugfs_create_automount("tracing", NULL,
8302                                            trace_automount, NULL);
8303         if (!tr->dir) {
8304                 pr_warn_once("Could not create debugfs directory 'tracing'\n");
8305                 return ERR_PTR(-ENOMEM);
8306         }
8307
8308         return NULL;
8309 }
8310
8311 extern struct trace_eval_map *__start_ftrace_eval_maps[];
8312 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
8313
8314 static void __init trace_eval_init(void)
8315 {
8316         int len;
8317
8318         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
8319         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
8320 }
8321
8322 #ifdef CONFIG_MODULES
8323 static void trace_module_add_evals(struct module *mod)
8324 {
8325         if (!mod->num_trace_evals)
8326                 return;
8327
8328         /*
8329          * Modules with bad taint do not have events created, do
8330          * not bother with enums either.
8331          */
8332         if (trace_module_has_bad_taint(mod))
8333                 return;
8334
8335         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
8336 }
8337
8338 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
8339 static void trace_module_remove_evals(struct module *mod)
8340 {
8341         union trace_eval_map_item *map;
8342         union trace_eval_map_item **last = &trace_eval_maps;
8343
8344         if (!mod->num_trace_evals)
8345                 return;
8346
8347         mutex_lock(&trace_eval_mutex);
8348
8349         map = trace_eval_maps;
8350
8351         while (map) {
8352                 if (map->head.mod == mod)
8353                         break;
8354                 map = trace_eval_jmp_to_tail(map);
8355                 last = &map->tail.next;
8356                 map = map->tail.next;
8357         }
8358         if (!map)
8359                 goto out;
8360
8361         *last = trace_eval_jmp_to_tail(map)->tail.next;
8362         kfree(map);
8363  out:
8364         mutex_unlock(&trace_eval_mutex);
8365 }
8366 #else
8367 static inline void trace_module_remove_evals(struct module *mod) { }
8368 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
8369
8370 static int trace_module_notify(struct notifier_block *self,
8371                                unsigned long val, void *data)
8372 {
8373         struct module *mod = data;
8374
8375         switch (val) {
8376         case MODULE_STATE_COMING:
8377                 trace_module_add_evals(mod);
8378                 break;
8379         case MODULE_STATE_GOING:
8380                 trace_module_remove_evals(mod);
8381                 break;
8382         }
8383
8384         return 0;
8385 }
8386
8387 static struct notifier_block trace_module_nb = {
8388         .notifier_call = trace_module_notify,
8389         .priority = 0,
8390 };
8391 #endif /* CONFIG_MODULES */
8392
8393 static __init int tracer_init_tracefs(void)
8394 {
8395         struct dentry *d_tracer;
8396
8397         trace_access_lock_init();
8398
8399         d_tracer = tracing_init_dentry();
8400         if (IS_ERR(d_tracer))
8401                 return 0;
8402
8403         event_trace_init();
8404
8405         init_tracer_tracefs(&global_trace, d_tracer);
8406         ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
8407
8408         trace_create_file("tracing_thresh", 0644, d_tracer,
8409                         &global_trace, &tracing_thresh_fops);
8410
8411         trace_create_file("README", 0444, d_tracer,
8412                         NULL, &tracing_readme_fops);
8413
8414         trace_create_file("saved_cmdlines", 0444, d_tracer,
8415                         NULL, &tracing_saved_cmdlines_fops);
8416
8417         trace_create_file("saved_cmdlines_size", 0644, d_tracer,
8418                           NULL, &tracing_saved_cmdlines_size_fops);
8419
8420         trace_create_file("saved_tgids", 0444, d_tracer,
8421                         NULL, &tracing_saved_tgids_fops);
8422
8423         trace_eval_init();
8424
8425         trace_create_eval_file(d_tracer);
8426
8427 #ifdef CONFIG_MODULES
8428         register_module_notifier(&trace_module_nb);
8429 #endif
8430
8431 #ifdef CONFIG_DYNAMIC_FTRACE
8432         trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
8433                         &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
8434 #endif
8435
8436         create_trace_instances(d_tracer);
8437
8438         update_tracer_options(&global_trace);
8439
8440         return 0;
8441 }
8442
8443 static int trace_panic_handler(struct notifier_block *this,
8444                                unsigned long event, void *unused)
8445 {
8446         if (ftrace_dump_on_oops)
8447                 ftrace_dump(ftrace_dump_on_oops);
8448         return NOTIFY_OK;
8449 }
8450
8451 static struct notifier_block trace_panic_notifier = {
8452         .notifier_call  = trace_panic_handler,
8453         .next           = NULL,
8454         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
8455 };
8456
8457 static int trace_die_handler(struct notifier_block *self,
8458                              unsigned long val,
8459                              void *data)
8460 {
8461         switch (val) {
8462         case DIE_OOPS:
8463                 if (ftrace_dump_on_oops)
8464                         ftrace_dump(ftrace_dump_on_oops);
8465                 break;
8466         default:
8467                 break;
8468         }
8469         return NOTIFY_OK;
8470 }
8471
8472 static struct notifier_block trace_die_notifier = {
8473         .notifier_call = trace_die_handler,
8474         .priority = 200
8475 };
8476
8477 /*
8478  * printk is set to max of 1024, we really don't need it that big.
8479  * Nothing should be printing 1000 characters anyway.
8480  */
8481 #define TRACE_MAX_PRINT         1000
8482
8483 /*
8484  * Define here KERN_TRACE so that we have one place to modify
8485  * it if we decide to change what log level the ftrace dump
8486  * should be at.
8487  */
8488 #define KERN_TRACE              KERN_EMERG
8489
8490 void
8491 trace_printk_seq(struct trace_seq *s)
8492 {
8493         /* Probably should print a warning here. */
8494         if (s->seq.len >= TRACE_MAX_PRINT)
8495                 s->seq.len = TRACE_MAX_PRINT;
8496
8497         /*
8498          * More paranoid code. Although the buffer size is set to
8499          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
8500          * an extra layer of protection.
8501          */
8502         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
8503                 s->seq.len = s->seq.size - 1;
8504
8505         /* should be zero ended, but we are paranoid. */
8506         s->buffer[s->seq.len] = 0;
8507
8508         printk(KERN_TRACE "%s", s->buffer);
8509
8510         trace_seq_init(s);
8511 }
8512
8513 void trace_init_global_iter(struct trace_iterator *iter)
8514 {
8515         iter->tr = &global_trace;
8516         iter->trace = iter->tr->current_trace;
8517         iter->cpu_file = RING_BUFFER_ALL_CPUS;
8518         iter->trace_buffer = &global_trace.trace_buffer;
8519
8520         if (iter->trace && iter->trace->open)
8521                 iter->trace->open(iter);
8522
8523         /* Annotate start of buffers if we had overruns */
8524         if (ring_buffer_overruns(iter->trace_buffer->buffer))
8525                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
8526
8527         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
8528         if (trace_clocks[iter->tr->clock_id].in_ns)
8529                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
8530 }
8531
8532 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
8533 {
8534         /* use static because iter can be a bit big for the stack */
8535         static struct trace_iterator iter;
8536         static atomic_t dump_running;
8537         struct trace_array *tr = &global_trace;
8538         unsigned int old_userobj;
8539         unsigned long flags;
8540         int cnt = 0, cpu;
8541
8542         /* Only allow one dump user at a time. */
8543         if (atomic_inc_return(&dump_running) != 1) {
8544                 atomic_dec(&dump_running);
8545                 return;
8546         }
8547
8548         /*
8549          * Always turn off tracing when we dump.
8550          * We don't need to show trace output of what happens
8551          * between multiple crashes.
8552          *
8553          * If the user does a sysrq-z, then they can re-enable
8554          * tracing with echo 1 > tracing_on.
8555          */
8556         tracing_off();
8557
8558         local_irq_save(flags);
8559         printk_nmi_direct_enter();
8560
8561         /* Simulate the iterator */
8562         trace_init_global_iter(&iter);
8563
8564         for_each_tracing_cpu(cpu) {
8565                 atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8566         }
8567
8568         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
8569
8570         /* don't look at user memory in panic mode */
8571         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
8572
8573         switch (oops_dump_mode) {
8574         case DUMP_ALL:
8575                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
8576                 break;
8577         case DUMP_ORIG:
8578                 iter.cpu_file = raw_smp_processor_id();
8579                 break;
8580         case DUMP_NONE:
8581                 goto out_enable;
8582         default:
8583                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
8584                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
8585         }
8586
8587         printk(KERN_TRACE "Dumping ftrace buffer:\n");
8588
8589         /* Did function tracer already get disabled? */
8590         if (ftrace_is_dead()) {
8591                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
8592                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
8593         }
8594
8595         /*
8596          * We need to stop all tracing on all CPUS to read the
8597          * the next buffer. This is a bit expensive, but is
8598          * not done often. We fill all what we can read,
8599          * and then release the locks again.
8600          */
8601
8602         while (!trace_empty(&iter)) {
8603
8604                 if (!cnt)
8605                         printk(KERN_TRACE "---------------------------------\n");
8606
8607                 cnt++;
8608
8609                 /* reset all but tr, trace, and overruns */
8610                 memset(&iter.seq, 0,
8611                        sizeof(struct trace_iterator) -
8612                        offsetof(struct trace_iterator, seq));
8613                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
8614                 iter.pos = -1;
8615
8616                 if (trace_find_next_entry_inc(&iter) != NULL) {
8617                         int ret;
8618
8619                         ret = print_trace_line(&iter);
8620                         if (ret != TRACE_TYPE_NO_CONSUME)
8621                                 trace_consume(&iter);
8622                 }
8623                 touch_nmi_watchdog();
8624
8625                 trace_printk_seq(&iter.seq);
8626         }
8627
8628         if (!cnt)
8629                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
8630         else
8631                 printk(KERN_TRACE "---------------------------------\n");
8632
8633  out_enable:
8634         tr->trace_flags |= old_userobj;
8635
8636         for_each_tracing_cpu(cpu) {
8637                 atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8638         }
8639         atomic_dec(&dump_running);
8640         printk_nmi_direct_exit();
8641         local_irq_restore(flags);
8642 }
8643 EXPORT_SYMBOL_GPL(ftrace_dump);
8644
8645 int trace_run_command(const char *buf, int (*createfn)(int, char **))
8646 {
8647         char **argv;
8648         int argc, ret;
8649
8650         argc = 0;
8651         ret = 0;
8652         argv = argv_split(GFP_KERNEL, buf, &argc);
8653         if (!argv)
8654                 return -ENOMEM;
8655
8656         if (argc)
8657                 ret = createfn(argc, argv);
8658
8659         argv_free(argv);
8660
8661         return ret;
8662 }
8663
8664 #define WRITE_BUFSIZE  4096
8665
8666 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
8667                                 size_t count, loff_t *ppos,
8668                                 int (*createfn)(int, char **))
8669 {
8670         char *kbuf, *buf, *tmp;
8671         int ret = 0;
8672         size_t done = 0;
8673         size_t size;
8674
8675         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
8676         if (!kbuf)
8677                 return -ENOMEM;
8678
8679         while (done < count) {
8680                 size = count - done;
8681
8682                 if (size >= WRITE_BUFSIZE)
8683                         size = WRITE_BUFSIZE - 1;
8684
8685                 if (copy_from_user(kbuf, buffer + done, size)) {
8686                         ret = -EFAULT;
8687                         goto out;
8688                 }
8689                 kbuf[size] = '\0';
8690                 buf = kbuf;
8691                 do {
8692                         tmp = strchr(buf, '\n');
8693                         if (tmp) {
8694                                 *tmp = '\0';
8695                                 size = tmp - buf + 1;
8696                         } else {
8697                                 size = strlen(buf);
8698                                 if (done + size < count) {
8699                                         if (buf != kbuf)
8700                                                 break;
8701                                         /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
8702                                         pr_warn("Line length is too long: Should be less than %d\n",
8703                                                 WRITE_BUFSIZE - 2);
8704                                         ret = -EINVAL;
8705                                         goto out;
8706                                 }
8707                         }
8708                         done += size;
8709
8710                         /* Remove comments */
8711                         tmp = strchr(buf, '#');
8712
8713                         if (tmp)
8714                                 *tmp = '\0';
8715
8716                         ret = trace_run_command(buf, createfn);
8717                         if (ret)
8718                                 goto out;
8719                         buf += size;
8720
8721                 } while (done < count);
8722         }
8723         ret = done;
8724
8725 out:
8726         kfree(kbuf);
8727
8728         return ret;
8729 }
8730
8731 __init static int tracer_alloc_buffers(void)
8732 {
8733         int ring_buf_size;
8734         int ret = -ENOMEM;
8735
8736         /*
8737          * Make sure we don't accidently add more trace options
8738          * than we have bits for.
8739          */
8740         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
8741
8742         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
8743                 goto out;
8744
8745         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
8746                 goto out_free_buffer_mask;
8747
8748         /* Only allocate trace_printk buffers if a trace_printk exists */
8749         if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
8750                 /* Must be called before global_trace.buffer is allocated */
8751                 trace_printk_init_buffers();
8752
8753         /* To save memory, keep the ring buffer size to its minimum */
8754         if (ring_buffer_expanded)
8755                 ring_buf_size = trace_buf_size;
8756         else
8757                 ring_buf_size = 1;
8758
8759         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
8760         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
8761
8762         raw_spin_lock_init(&global_trace.start_lock);
8763
8764         /*
8765          * The prepare callbacks allocates some memory for the ring buffer. We
8766          * don't free the buffer if the if the CPU goes down. If we were to free
8767          * the buffer, then the user would lose any trace that was in the
8768          * buffer. The memory will be removed once the "instance" is removed.
8769          */
8770         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
8771                                       "trace/RB:preapre", trace_rb_cpu_prepare,
8772                                       NULL);
8773         if (ret < 0)
8774                 goto out_free_cpumask;
8775         /* Used for event triggers */
8776         ret = -ENOMEM;
8777         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
8778         if (!temp_buffer)
8779                 goto out_rm_hp_state;
8780
8781         if (trace_create_savedcmd() < 0)
8782                 goto out_free_temp_buffer;
8783
8784         /* TODO: make the number of buffers hot pluggable with CPUS */
8785         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
8786                 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
8787                 WARN_ON(1);
8788                 goto out_free_savedcmd;
8789         }
8790
8791         if (global_trace.buffer_disabled)
8792                 tracing_off();
8793
8794         if (trace_boot_clock) {
8795                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
8796                 if (ret < 0)
8797                         pr_warn("Trace clock %s not defined, going back to default\n",
8798                                 trace_boot_clock);
8799         }
8800
8801         /*
8802          * register_tracer() might reference current_trace, so it
8803          * needs to be set before we register anything. This is
8804          * just a bootstrap of current_trace anyway.
8805          */
8806         global_trace.current_trace = &nop_trace;
8807
8808         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8809
8810         ftrace_init_global_array_ops(&global_trace);
8811
8812         init_trace_flags_index(&global_trace);
8813
8814         register_tracer(&nop_trace);
8815
8816         /* Function tracing may start here (via kernel command line) */
8817         init_function_trace();
8818
8819         /* All seems OK, enable tracing */
8820         tracing_disabled = 0;
8821
8822         atomic_notifier_chain_register(&panic_notifier_list,
8823                                        &trace_panic_notifier);
8824
8825         register_die_notifier(&trace_die_notifier);
8826
8827         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
8828
8829         INIT_LIST_HEAD(&global_trace.systems);
8830         INIT_LIST_HEAD(&global_trace.events);
8831         INIT_LIST_HEAD(&global_trace.hist_vars);
8832         list_add(&global_trace.list, &ftrace_trace_arrays);
8833
8834         apply_trace_boot_options();
8835
8836         register_snapshot_cmd();
8837
8838         return 0;
8839
8840 out_free_savedcmd:
8841         free_saved_cmdlines_buffer(savedcmd);
8842 out_free_temp_buffer:
8843         ring_buffer_free(temp_buffer);
8844 out_rm_hp_state:
8845         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
8846 out_free_cpumask:
8847         free_cpumask_var(global_trace.tracing_cpumask);
8848 out_free_buffer_mask:
8849         free_cpumask_var(tracing_buffer_mask);
8850 out:
8851         return ret;
8852 }
8853
8854 void __init early_trace_init(void)
8855 {
8856         if (tracepoint_printk) {
8857                 tracepoint_print_iter =
8858                         kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
8859                 if (WARN_ON(!tracepoint_print_iter))
8860                         tracepoint_printk = 0;
8861                 else
8862                         static_key_enable(&tracepoint_printk_key.key);
8863         }
8864         tracer_alloc_buffers();
8865 }
8866
8867 void __init trace_init(void)
8868 {
8869         trace_event_init();
8870 }
8871
8872 __init static int clear_boot_tracer(void)
8873 {
8874         /*
8875          * The default tracer at boot buffer is an init section.
8876          * This function is called in lateinit. If we did not
8877          * find the boot tracer, then clear it out, to prevent
8878          * later registration from accessing the buffer that is
8879          * about to be freed.
8880          */
8881         if (!default_bootup_tracer)
8882                 return 0;
8883
8884         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
8885                default_bootup_tracer);
8886         default_bootup_tracer = NULL;
8887
8888         return 0;
8889 }
8890
8891 fs_initcall(tracer_init_tracefs);
8892 late_initcall_sync(clear_boot_tracer);
8893
8894 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
8895 __init static int tracing_set_default_clock(void)
8896 {
8897         /* sched_clock_stable() is determined in late_initcall */
8898         if (!trace_boot_clock && !sched_clock_stable()) {
8899                 printk(KERN_WARNING
8900                        "Unstable clock detected, switching default tracing clock to \"global\"\n"
8901                        "If you want to keep using the local clock, then add:\n"
8902                        "  \"trace_clock=local\"\n"
8903                        "on the kernel command line\n");
8904                 tracing_set_clock(&global_trace, "global");
8905         }
8906
8907         return 0;
8908 }
8909 late_initcall_sync(tracing_set_default_clock);
8910 #endif