]> asedeno.scripts.mit.edu Git - linux.git/blob - kernel/trace/trace.c
tracing: Add hist trigger onchange() handler
[linux.git] / kernel / trace / trace.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/seq_file.h>
21 #include <linux/notifier.h>
22 #include <linux/irqflags.h>
23 #include <linux/debugfs.h>
24 #include <linux/tracefs.h>
25 #include <linux/pagemap.h>
26 #include <linux/hardirq.h>
27 #include <linux/linkage.h>
28 #include <linux/uaccess.h>
29 #include <linux/vmalloc.h>
30 #include <linux/ftrace.h>
31 #include <linux/module.h>
32 #include <linux/percpu.h>
33 #include <linux/splice.h>
34 #include <linux/kdebug.h>
35 #include <linux/string.h>
36 #include <linux/mount.h>
37 #include <linux/rwsem.h>
38 #include <linux/slab.h>
39 #include <linux/ctype.h>
40 #include <linux/init.h>
41 #include <linux/poll.h>
42 #include <linux/nmi.h>
43 #include <linux/fs.h>
44 #include <linux/trace.h>
45 #include <linux/sched/clock.h>
46 #include <linux/sched/rt.h>
47
48 #include "trace.h"
49 #include "trace_output.h"
50
51 /*
52  * On boot up, the ring buffer is set to the minimum size, so that
53  * we do not waste memory on systems that are not using tracing.
54  */
55 bool ring_buffer_expanded;
56
57 /*
58  * We need to change this state when a selftest is running.
59  * A selftest will lurk into the ring-buffer to count the
60  * entries inserted during the selftest although some concurrent
61  * insertions into the ring-buffer such as trace_printk could occurred
62  * at the same time, giving false positive or negative results.
63  */
64 static bool __read_mostly tracing_selftest_running;
65
66 /*
67  * If a tracer is running, we do not want to run SELFTEST.
68  */
69 bool __read_mostly tracing_selftest_disabled;
70
71 /* Pipe tracepoints to printk */
72 struct trace_iterator *tracepoint_print_iter;
73 int tracepoint_printk;
74 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
75
76 /* For tracers that don't implement custom flags */
77 static struct tracer_opt dummy_tracer_opt[] = {
78         { }
79 };
80
81 static int
82 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
83 {
84         return 0;
85 }
86
87 /*
88  * To prevent the comm cache from being overwritten when no
89  * tracing is active, only save the comm when a trace event
90  * occurred.
91  */
92 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
93
94 /*
95  * Kill all tracing for good (never come back).
96  * It is initialized to 1 but will turn to zero if the initialization
97  * of the tracer is successful. But that is the only place that sets
98  * this back to zero.
99  */
100 static int tracing_disabled = 1;
101
102 cpumask_var_t __read_mostly     tracing_buffer_mask;
103
104 /*
105  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
106  *
107  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
108  * is set, then ftrace_dump is called. This will output the contents
109  * of the ftrace buffers to the console.  This is very useful for
110  * capturing traces that lead to crashes and outputing it to a
111  * serial console.
112  *
113  * It is default off, but you can enable it with either specifying
114  * "ftrace_dump_on_oops" in the kernel command line, or setting
115  * /proc/sys/kernel/ftrace_dump_on_oops
116  * Set 1 if you want to dump buffers of all CPUs
117  * Set 2 if you want to dump the buffer of the CPU that triggered oops
118  */
119
120 enum ftrace_dump_mode ftrace_dump_on_oops;
121
122 /* When set, tracing will stop when a WARN*() is hit */
123 int __disable_trace_on_warning;
124
125 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
126 /* Map of enums to their values, for "eval_map" file */
127 struct trace_eval_map_head {
128         struct module                   *mod;
129         unsigned long                   length;
130 };
131
132 union trace_eval_map_item;
133
134 struct trace_eval_map_tail {
135         /*
136          * "end" is first and points to NULL as it must be different
137          * than "mod" or "eval_string"
138          */
139         union trace_eval_map_item       *next;
140         const char                      *end;   /* points to NULL */
141 };
142
143 static DEFINE_MUTEX(trace_eval_mutex);
144
145 /*
146  * The trace_eval_maps are saved in an array with two extra elements,
147  * one at the beginning, and one at the end. The beginning item contains
148  * the count of the saved maps (head.length), and the module they
149  * belong to if not built in (head.mod). The ending item contains a
150  * pointer to the next array of saved eval_map items.
151  */
152 union trace_eval_map_item {
153         struct trace_eval_map           map;
154         struct trace_eval_map_head      head;
155         struct trace_eval_map_tail      tail;
156 };
157
158 static union trace_eval_map_item *trace_eval_maps;
159 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
160
161 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
162
163 #define MAX_TRACER_SIZE         100
164 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
165 static char *default_bootup_tracer;
166
167 static bool allocate_snapshot;
168
169 static int __init set_cmdline_ftrace(char *str)
170 {
171         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
172         default_bootup_tracer = bootup_tracer_buf;
173         /* We are using ftrace early, expand it */
174         ring_buffer_expanded = true;
175         return 1;
176 }
177 __setup("ftrace=", set_cmdline_ftrace);
178
179 static int __init set_ftrace_dump_on_oops(char *str)
180 {
181         if (*str++ != '=' || !*str) {
182                 ftrace_dump_on_oops = DUMP_ALL;
183                 return 1;
184         }
185
186         if (!strcmp("orig_cpu", str)) {
187                 ftrace_dump_on_oops = DUMP_ORIG;
188                 return 1;
189         }
190
191         return 0;
192 }
193 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
194
195 static int __init stop_trace_on_warning(char *str)
196 {
197         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
198                 __disable_trace_on_warning = 1;
199         return 1;
200 }
201 __setup("traceoff_on_warning", stop_trace_on_warning);
202
203 static int __init boot_alloc_snapshot(char *str)
204 {
205         allocate_snapshot = true;
206         /* We also need the main ring buffer expanded */
207         ring_buffer_expanded = true;
208         return 1;
209 }
210 __setup("alloc_snapshot", boot_alloc_snapshot);
211
212
213 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
214
215 static int __init set_trace_boot_options(char *str)
216 {
217         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
218         return 0;
219 }
220 __setup("trace_options=", set_trace_boot_options);
221
222 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
223 static char *trace_boot_clock __initdata;
224
225 static int __init set_trace_boot_clock(char *str)
226 {
227         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
228         trace_boot_clock = trace_boot_clock_buf;
229         return 0;
230 }
231 __setup("trace_clock=", set_trace_boot_clock);
232
233 static int __init set_tracepoint_printk(char *str)
234 {
235         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
236                 tracepoint_printk = 1;
237         return 1;
238 }
239 __setup("tp_printk", set_tracepoint_printk);
240
241 unsigned long long ns2usecs(u64 nsec)
242 {
243         nsec += 500;
244         do_div(nsec, 1000);
245         return nsec;
246 }
247
248 /* trace_flags holds trace_options default values */
249 #define TRACE_DEFAULT_FLAGS                                             \
250         (FUNCTION_DEFAULT_FLAGS |                                       \
251          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
252          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
253          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
254          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
255
256 /* trace_options that are only supported by global_trace */
257 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
258                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
259
260 /* trace_flags that are default zero for instances */
261 #define ZEROED_TRACE_FLAGS \
262         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
263
264 /*
265  * The global_trace is the descriptor that holds the top-level tracing
266  * buffers for the live tracing.
267  */
268 static struct trace_array global_trace = {
269         .trace_flags = TRACE_DEFAULT_FLAGS,
270 };
271
272 LIST_HEAD(ftrace_trace_arrays);
273
274 int trace_array_get(struct trace_array *this_tr)
275 {
276         struct trace_array *tr;
277         int ret = -ENODEV;
278
279         mutex_lock(&trace_types_lock);
280         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
281                 if (tr == this_tr) {
282                         tr->ref++;
283                         ret = 0;
284                         break;
285                 }
286         }
287         mutex_unlock(&trace_types_lock);
288
289         return ret;
290 }
291
292 static void __trace_array_put(struct trace_array *this_tr)
293 {
294         WARN_ON(!this_tr->ref);
295         this_tr->ref--;
296 }
297
298 void trace_array_put(struct trace_array *this_tr)
299 {
300         mutex_lock(&trace_types_lock);
301         __trace_array_put(this_tr);
302         mutex_unlock(&trace_types_lock);
303 }
304
305 int call_filter_check_discard(struct trace_event_call *call, void *rec,
306                               struct ring_buffer *buffer,
307                               struct ring_buffer_event *event)
308 {
309         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
310             !filter_match_preds(call->filter, rec)) {
311                 __trace_event_discard_commit(buffer, event);
312                 return 1;
313         }
314
315         return 0;
316 }
317
318 void trace_free_pid_list(struct trace_pid_list *pid_list)
319 {
320         vfree(pid_list->pids);
321         kfree(pid_list);
322 }
323
324 /**
325  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
326  * @filtered_pids: The list of pids to check
327  * @search_pid: The PID to find in @filtered_pids
328  *
329  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
330  */
331 bool
332 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
333 {
334         /*
335          * If pid_max changed after filtered_pids was created, we
336          * by default ignore all pids greater than the previous pid_max.
337          */
338         if (search_pid >= filtered_pids->pid_max)
339                 return false;
340
341         return test_bit(search_pid, filtered_pids->pids);
342 }
343
344 /**
345  * trace_ignore_this_task - should a task be ignored for tracing
346  * @filtered_pids: The list of pids to check
347  * @task: The task that should be ignored if not filtered
348  *
349  * Checks if @task should be traced or not from @filtered_pids.
350  * Returns true if @task should *NOT* be traced.
351  * Returns false if @task should be traced.
352  */
353 bool
354 trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
355 {
356         /*
357          * Return false, because if filtered_pids does not exist,
358          * all pids are good to trace.
359          */
360         if (!filtered_pids)
361                 return false;
362
363         return !trace_find_filtered_pid(filtered_pids, task->pid);
364 }
365
366 /**
367  * trace_pid_filter_add_remove_task - Add or remove a task from a pid_list
368  * @pid_list: The list to modify
369  * @self: The current task for fork or NULL for exit
370  * @task: The task to add or remove
371  *
372  * If adding a task, if @self is defined, the task is only added if @self
373  * is also included in @pid_list. This happens on fork and tasks should
374  * only be added when the parent is listed. If @self is NULL, then the
375  * @task pid will be removed from the list, which would happen on exit
376  * of a task.
377  */
378 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
379                                   struct task_struct *self,
380                                   struct task_struct *task)
381 {
382         if (!pid_list)
383                 return;
384
385         /* For forks, we only add if the forking task is listed */
386         if (self) {
387                 if (!trace_find_filtered_pid(pid_list, self->pid))
388                         return;
389         }
390
391         /* Sorry, but we don't support pid_max changing after setting */
392         if (task->pid >= pid_list->pid_max)
393                 return;
394
395         /* "self" is set for forks, and NULL for exits */
396         if (self)
397                 set_bit(task->pid, pid_list->pids);
398         else
399                 clear_bit(task->pid, pid_list->pids);
400 }
401
402 /**
403  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
404  * @pid_list: The pid list to show
405  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
406  * @pos: The position of the file
407  *
408  * This is used by the seq_file "next" operation to iterate the pids
409  * listed in a trace_pid_list structure.
410  *
411  * Returns the pid+1 as we want to display pid of zero, but NULL would
412  * stop the iteration.
413  */
414 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
415 {
416         unsigned long pid = (unsigned long)v;
417
418         (*pos)++;
419
420         /* pid already is +1 of the actual prevous bit */
421         pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
422
423         /* Return pid + 1 to allow zero to be represented */
424         if (pid < pid_list->pid_max)
425                 return (void *)(pid + 1);
426
427         return NULL;
428 }
429
430 /**
431  * trace_pid_start - Used for seq_file to start reading pid lists
432  * @pid_list: The pid list to show
433  * @pos: The position of the file
434  *
435  * This is used by seq_file "start" operation to start the iteration
436  * of listing pids.
437  *
438  * Returns the pid+1 as we want to display pid of zero, but NULL would
439  * stop the iteration.
440  */
441 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
442 {
443         unsigned long pid;
444         loff_t l = 0;
445
446         pid = find_first_bit(pid_list->pids, pid_list->pid_max);
447         if (pid >= pid_list->pid_max)
448                 return NULL;
449
450         /* Return pid + 1 so that zero can be the exit value */
451         for (pid++; pid && l < *pos;
452              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
453                 ;
454         return (void *)pid;
455 }
456
457 /**
458  * trace_pid_show - show the current pid in seq_file processing
459  * @m: The seq_file structure to write into
460  * @v: A void pointer of the pid (+1) value to display
461  *
462  * Can be directly used by seq_file operations to display the current
463  * pid value.
464  */
465 int trace_pid_show(struct seq_file *m, void *v)
466 {
467         unsigned long pid = (unsigned long)v - 1;
468
469         seq_printf(m, "%lu\n", pid);
470         return 0;
471 }
472
473 /* 128 should be much more than enough */
474 #define PID_BUF_SIZE            127
475
476 int trace_pid_write(struct trace_pid_list *filtered_pids,
477                     struct trace_pid_list **new_pid_list,
478                     const char __user *ubuf, size_t cnt)
479 {
480         struct trace_pid_list *pid_list;
481         struct trace_parser parser;
482         unsigned long val;
483         int nr_pids = 0;
484         ssize_t read = 0;
485         ssize_t ret = 0;
486         loff_t pos;
487         pid_t pid;
488
489         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
490                 return -ENOMEM;
491
492         /*
493          * Always recreate a new array. The write is an all or nothing
494          * operation. Always create a new array when adding new pids by
495          * the user. If the operation fails, then the current list is
496          * not modified.
497          */
498         pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
499         if (!pid_list)
500                 return -ENOMEM;
501
502         pid_list->pid_max = READ_ONCE(pid_max);
503
504         /* Only truncating will shrink pid_max */
505         if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
506                 pid_list->pid_max = filtered_pids->pid_max;
507
508         pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
509         if (!pid_list->pids) {
510                 kfree(pid_list);
511                 return -ENOMEM;
512         }
513
514         if (filtered_pids) {
515                 /* copy the current bits to the new max */
516                 for_each_set_bit(pid, filtered_pids->pids,
517                                  filtered_pids->pid_max) {
518                         set_bit(pid, pid_list->pids);
519                         nr_pids++;
520                 }
521         }
522
523         while (cnt > 0) {
524
525                 pos = 0;
526
527                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
528                 if (ret < 0 || !trace_parser_loaded(&parser))
529                         break;
530
531                 read += ret;
532                 ubuf += ret;
533                 cnt -= ret;
534
535                 ret = -EINVAL;
536                 if (kstrtoul(parser.buffer, 0, &val))
537                         break;
538                 if (val >= pid_list->pid_max)
539                         break;
540
541                 pid = (pid_t)val;
542
543                 set_bit(pid, pid_list->pids);
544                 nr_pids++;
545
546                 trace_parser_clear(&parser);
547                 ret = 0;
548         }
549         trace_parser_put(&parser);
550
551         if (ret < 0) {
552                 trace_free_pid_list(pid_list);
553                 return ret;
554         }
555
556         if (!nr_pids) {
557                 /* Cleared the list of pids */
558                 trace_free_pid_list(pid_list);
559                 read = ret;
560                 pid_list = NULL;
561         }
562
563         *new_pid_list = pid_list;
564
565         return read;
566 }
567
568 static u64 buffer_ftrace_now(struct trace_buffer *buf, int cpu)
569 {
570         u64 ts;
571
572         /* Early boot up does not have a buffer yet */
573         if (!buf->buffer)
574                 return trace_clock_local();
575
576         ts = ring_buffer_time_stamp(buf->buffer, cpu);
577         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
578
579         return ts;
580 }
581
582 u64 ftrace_now(int cpu)
583 {
584         return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
585 }
586
587 /**
588  * tracing_is_enabled - Show if global_trace has been disabled
589  *
590  * Shows if the global trace has been enabled or not. It uses the
591  * mirror flag "buffer_disabled" to be used in fast paths such as for
592  * the irqsoff tracer. But it may be inaccurate due to races. If you
593  * need to know the accurate state, use tracing_is_on() which is a little
594  * slower, but accurate.
595  */
596 int tracing_is_enabled(void)
597 {
598         /*
599          * For quick access (irqsoff uses this in fast path), just
600          * return the mirror variable of the state of the ring buffer.
601          * It's a little racy, but we don't really care.
602          */
603         smp_rmb();
604         return !global_trace.buffer_disabled;
605 }
606
607 /*
608  * trace_buf_size is the size in bytes that is allocated
609  * for a buffer. Note, the number of bytes is always rounded
610  * to page size.
611  *
612  * This number is purposely set to a low number of 16384.
613  * If the dump on oops happens, it will be much appreciated
614  * to not have to wait for all that output. Anyway this can be
615  * boot time and run time configurable.
616  */
617 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
618
619 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
620
621 /* trace_types holds a link list of available tracers. */
622 static struct tracer            *trace_types __read_mostly;
623
624 /*
625  * trace_types_lock is used to protect the trace_types list.
626  */
627 DEFINE_MUTEX(trace_types_lock);
628
629 /*
630  * serialize the access of the ring buffer
631  *
632  * ring buffer serializes readers, but it is low level protection.
633  * The validity of the events (which returns by ring_buffer_peek() ..etc)
634  * are not protected by ring buffer.
635  *
636  * The content of events may become garbage if we allow other process consumes
637  * these events concurrently:
638  *   A) the page of the consumed events may become a normal page
639  *      (not reader page) in ring buffer, and this page will be rewrited
640  *      by events producer.
641  *   B) The page of the consumed events may become a page for splice_read,
642  *      and this page will be returned to system.
643  *
644  * These primitives allow multi process access to different cpu ring buffer
645  * concurrently.
646  *
647  * These primitives don't distinguish read-only and read-consume access.
648  * Multi read-only access are also serialized.
649  */
650
651 #ifdef CONFIG_SMP
652 static DECLARE_RWSEM(all_cpu_access_lock);
653 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
654
655 static inline void trace_access_lock(int cpu)
656 {
657         if (cpu == RING_BUFFER_ALL_CPUS) {
658                 /* gain it for accessing the whole ring buffer. */
659                 down_write(&all_cpu_access_lock);
660         } else {
661                 /* gain it for accessing a cpu ring buffer. */
662
663                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
664                 down_read(&all_cpu_access_lock);
665
666                 /* Secondly block other access to this @cpu ring buffer. */
667                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
668         }
669 }
670
671 static inline void trace_access_unlock(int cpu)
672 {
673         if (cpu == RING_BUFFER_ALL_CPUS) {
674                 up_write(&all_cpu_access_lock);
675         } else {
676                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
677                 up_read(&all_cpu_access_lock);
678         }
679 }
680
681 static inline void trace_access_lock_init(void)
682 {
683         int cpu;
684
685         for_each_possible_cpu(cpu)
686                 mutex_init(&per_cpu(cpu_access_lock, cpu));
687 }
688
689 #else
690
691 static DEFINE_MUTEX(access_lock);
692
693 static inline void trace_access_lock(int cpu)
694 {
695         (void)cpu;
696         mutex_lock(&access_lock);
697 }
698
699 static inline void trace_access_unlock(int cpu)
700 {
701         (void)cpu;
702         mutex_unlock(&access_lock);
703 }
704
705 static inline void trace_access_lock_init(void)
706 {
707 }
708
709 #endif
710
711 #ifdef CONFIG_STACKTRACE
712 static void __ftrace_trace_stack(struct ring_buffer *buffer,
713                                  unsigned long flags,
714                                  int skip, int pc, struct pt_regs *regs);
715 static inline void ftrace_trace_stack(struct trace_array *tr,
716                                       struct ring_buffer *buffer,
717                                       unsigned long flags,
718                                       int skip, int pc, struct pt_regs *regs);
719
720 #else
721 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
722                                         unsigned long flags,
723                                         int skip, int pc, struct pt_regs *regs)
724 {
725 }
726 static inline void ftrace_trace_stack(struct trace_array *tr,
727                                       struct ring_buffer *buffer,
728                                       unsigned long flags,
729                                       int skip, int pc, struct pt_regs *regs)
730 {
731 }
732
733 #endif
734
735 static __always_inline void
736 trace_event_setup(struct ring_buffer_event *event,
737                   int type, unsigned long flags, int pc)
738 {
739         struct trace_entry *ent = ring_buffer_event_data(event);
740
741         tracing_generic_entry_update(ent, flags, pc);
742         ent->type = type;
743 }
744
745 static __always_inline struct ring_buffer_event *
746 __trace_buffer_lock_reserve(struct ring_buffer *buffer,
747                           int type,
748                           unsigned long len,
749                           unsigned long flags, int pc)
750 {
751         struct ring_buffer_event *event;
752
753         event = ring_buffer_lock_reserve(buffer, len);
754         if (event != NULL)
755                 trace_event_setup(event, type, flags, pc);
756
757         return event;
758 }
759
760 void tracer_tracing_on(struct trace_array *tr)
761 {
762         if (tr->trace_buffer.buffer)
763                 ring_buffer_record_on(tr->trace_buffer.buffer);
764         /*
765          * This flag is looked at when buffers haven't been allocated
766          * yet, or by some tracers (like irqsoff), that just want to
767          * know if the ring buffer has been disabled, but it can handle
768          * races of where it gets disabled but we still do a record.
769          * As the check is in the fast path of the tracers, it is more
770          * important to be fast than accurate.
771          */
772         tr->buffer_disabled = 0;
773         /* Make the flag seen by readers */
774         smp_wmb();
775 }
776
777 /**
778  * tracing_on - enable tracing buffers
779  *
780  * This function enables tracing buffers that may have been
781  * disabled with tracing_off.
782  */
783 void tracing_on(void)
784 {
785         tracer_tracing_on(&global_trace);
786 }
787 EXPORT_SYMBOL_GPL(tracing_on);
788
789
790 static __always_inline void
791 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
792 {
793         __this_cpu_write(trace_taskinfo_save, true);
794
795         /* If this is the temp buffer, we need to commit fully */
796         if (this_cpu_read(trace_buffered_event) == event) {
797                 /* Length is in event->array[0] */
798                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
799                 /* Release the temp buffer */
800                 this_cpu_dec(trace_buffered_event_cnt);
801         } else
802                 ring_buffer_unlock_commit(buffer, event);
803 }
804
805 /**
806  * __trace_puts - write a constant string into the trace buffer.
807  * @ip:    The address of the caller
808  * @str:   The constant string to write
809  * @size:  The size of the string.
810  */
811 int __trace_puts(unsigned long ip, const char *str, int size)
812 {
813         struct ring_buffer_event *event;
814         struct ring_buffer *buffer;
815         struct print_entry *entry;
816         unsigned long irq_flags;
817         int alloc;
818         int pc;
819
820         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
821                 return 0;
822
823         pc = preempt_count();
824
825         if (unlikely(tracing_selftest_running || tracing_disabled))
826                 return 0;
827
828         alloc = sizeof(*entry) + size + 2; /* possible \n added */
829
830         local_save_flags(irq_flags);
831         buffer = global_trace.trace_buffer.buffer;
832         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
833                                             irq_flags, pc);
834         if (!event)
835                 return 0;
836
837         entry = ring_buffer_event_data(event);
838         entry->ip = ip;
839
840         memcpy(&entry->buf, str, size);
841
842         /* Add a newline if necessary */
843         if (entry->buf[size - 1] != '\n') {
844                 entry->buf[size] = '\n';
845                 entry->buf[size + 1] = '\0';
846         } else
847                 entry->buf[size] = '\0';
848
849         __buffer_unlock_commit(buffer, event);
850         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
851
852         return size;
853 }
854 EXPORT_SYMBOL_GPL(__trace_puts);
855
856 /**
857  * __trace_bputs - write the pointer to a constant string into trace buffer
858  * @ip:    The address of the caller
859  * @str:   The constant string to write to the buffer to
860  */
861 int __trace_bputs(unsigned long ip, const char *str)
862 {
863         struct ring_buffer_event *event;
864         struct ring_buffer *buffer;
865         struct bputs_entry *entry;
866         unsigned long irq_flags;
867         int size = sizeof(struct bputs_entry);
868         int pc;
869
870         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
871                 return 0;
872
873         pc = preempt_count();
874
875         if (unlikely(tracing_selftest_running || tracing_disabled))
876                 return 0;
877
878         local_save_flags(irq_flags);
879         buffer = global_trace.trace_buffer.buffer;
880         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
881                                             irq_flags, pc);
882         if (!event)
883                 return 0;
884
885         entry = ring_buffer_event_data(event);
886         entry->ip                       = ip;
887         entry->str                      = str;
888
889         __buffer_unlock_commit(buffer, event);
890         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
891
892         return 1;
893 }
894 EXPORT_SYMBOL_GPL(__trace_bputs);
895
896 #ifdef CONFIG_TRACER_SNAPSHOT
897 void tracing_snapshot_instance_cond(struct trace_array *tr, void *cond_data)
898 {
899         struct tracer *tracer = tr->current_trace;
900         unsigned long flags;
901
902         if (in_nmi()) {
903                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
904                 internal_trace_puts("*** snapshot is being ignored        ***\n");
905                 return;
906         }
907
908         if (!tr->allocated_snapshot) {
909                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
910                 internal_trace_puts("*** stopping trace here!   ***\n");
911                 tracing_off();
912                 return;
913         }
914
915         /* Note, snapshot can not be used when the tracer uses it */
916         if (tracer->use_max_tr) {
917                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
918                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
919                 return;
920         }
921
922         local_irq_save(flags);
923         update_max_tr(tr, current, smp_processor_id(), cond_data);
924         local_irq_restore(flags);
925 }
926
927 void tracing_snapshot_instance(struct trace_array *tr)
928 {
929         tracing_snapshot_instance_cond(tr, NULL);
930 }
931
932 /**
933  * tracing_snapshot - take a snapshot of the current buffer.
934  *
935  * This causes a swap between the snapshot buffer and the current live
936  * tracing buffer. You can use this to take snapshots of the live
937  * trace when some condition is triggered, but continue to trace.
938  *
939  * Note, make sure to allocate the snapshot with either
940  * a tracing_snapshot_alloc(), or by doing it manually
941  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
942  *
943  * If the snapshot buffer is not allocated, it will stop tracing.
944  * Basically making a permanent snapshot.
945  */
946 void tracing_snapshot(void)
947 {
948         struct trace_array *tr = &global_trace;
949
950         tracing_snapshot_instance(tr);
951 }
952 EXPORT_SYMBOL_GPL(tracing_snapshot);
953
954 /**
955  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
956  * @tr:         The tracing instance to snapshot
957  * @cond_data:  The data to be tested conditionally, and possibly saved
958  *
959  * This is the same as tracing_snapshot() except that the snapshot is
960  * conditional - the snapshot will only happen if the
961  * cond_snapshot.update() implementation receiving the cond_data
962  * returns true, which means that the trace array's cond_snapshot
963  * update() operation used the cond_data to determine whether the
964  * snapshot should be taken, and if it was, presumably saved it along
965  * with the snapshot.
966  */
967 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
968 {
969         tracing_snapshot_instance_cond(tr, cond_data);
970 }
971 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
972
973 /**
974  * tracing_snapshot_cond_data - get the user data associated with a snapshot
975  * @tr:         The tracing instance
976  *
977  * When the user enables a conditional snapshot using
978  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
979  * with the snapshot.  This accessor is used to retrieve it.
980  *
981  * Should not be called from cond_snapshot.update(), since it takes
982  * the tr->max_lock lock, which the code calling
983  * cond_snapshot.update() has already done.
984  *
985  * Returns the cond_data associated with the trace array's snapshot.
986  */
987 void *tracing_cond_snapshot_data(struct trace_array *tr)
988 {
989         void *cond_data = NULL;
990
991         arch_spin_lock(&tr->max_lock);
992
993         if (tr->cond_snapshot)
994                 cond_data = tr->cond_snapshot->cond_data;
995
996         arch_spin_unlock(&tr->max_lock);
997
998         return cond_data;
999 }
1000 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1001
1002 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
1003                                         struct trace_buffer *size_buf, int cpu_id);
1004 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
1005
1006 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1007 {
1008         int ret;
1009
1010         if (!tr->allocated_snapshot) {
1011
1012                 /* allocate spare buffer */
1013                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1014                                    &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
1015                 if (ret < 0)
1016                         return ret;
1017
1018                 tr->allocated_snapshot = true;
1019         }
1020
1021         return 0;
1022 }
1023
1024 static void free_snapshot(struct trace_array *tr)
1025 {
1026         /*
1027          * We don't free the ring buffer. instead, resize it because
1028          * The max_tr ring buffer has some state (e.g. ring->clock) and
1029          * we want preserve it.
1030          */
1031         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1032         set_buffer_entries(&tr->max_buffer, 1);
1033         tracing_reset_online_cpus(&tr->max_buffer);
1034         tr->allocated_snapshot = false;
1035 }
1036
1037 /**
1038  * tracing_alloc_snapshot - allocate snapshot buffer.
1039  *
1040  * This only allocates the snapshot buffer if it isn't already
1041  * allocated - it doesn't also take a snapshot.
1042  *
1043  * This is meant to be used in cases where the snapshot buffer needs
1044  * to be set up for events that can't sleep but need to be able to
1045  * trigger a snapshot.
1046  */
1047 int tracing_alloc_snapshot(void)
1048 {
1049         struct trace_array *tr = &global_trace;
1050         int ret;
1051
1052         ret = tracing_alloc_snapshot_instance(tr);
1053         WARN_ON(ret < 0);
1054
1055         return ret;
1056 }
1057 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1058
1059 /**
1060  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1061  *
1062  * This is similar to tracing_snapshot(), but it will allocate the
1063  * snapshot buffer if it isn't already allocated. Use this only
1064  * where it is safe to sleep, as the allocation may sleep.
1065  *
1066  * This causes a swap between the snapshot buffer and the current live
1067  * tracing buffer. You can use this to take snapshots of the live
1068  * trace when some condition is triggered, but continue to trace.
1069  */
1070 void tracing_snapshot_alloc(void)
1071 {
1072         int ret;
1073
1074         ret = tracing_alloc_snapshot();
1075         if (ret < 0)
1076                 return;
1077
1078         tracing_snapshot();
1079 }
1080 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1081
1082 /**
1083  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1084  * @tr:         The tracing instance
1085  * @cond_data:  User data to associate with the snapshot
1086  * @update:     Implementation of the cond_snapshot update function
1087  *
1088  * Check whether the conditional snapshot for the given instance has
1089  * already been enabled, or if the current tracer is already using a
1090  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1091  * save the cond_data and update function inside.
1092  *
1093  * Returns 0 if successful, error otherwise.
1094  */
1095 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1096                                  cond_update_fn_t update)
1097 {
1098         struct cond_snapshot *cond_snapshot;
1099         int ret = 0;
1100
1101         cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1102         if (!cond_snapshot)
1103                 return -ENOMEM;
1104
1105         cond_snapshot->cond_data = cond_data;
1106         cond_snapshot->update = update;
1107
1108         mutex_lock(&trace_types_lock);
1109
1110         ret = tracing_alloc_snapshot_instance(tr);
1111         if (ret)
1112                 goto fail_unlock;
1113
1114         if (tr->current_trace->use_max_tr) {
1115                 ret = -EBUSY;
1116                 goto fail_unlock;
1117         }
1118
1119         if (tr->cond_snapshot) {
1120                 ret = -EBUSY;
1121                 goto fail_unlock;
1122         }
1123
1124         arch_spin_lock(&tr->max_lock);
1125         tr->cond_snapshot = cond_snapshot;
1126         arch_spin_unlock(&tr->max_lock);
1127
1128         mutex_unlock(&trace_types_lock);
1129
1130         return ret;
1131
1132  fail_unlock:
1133         mutex_unlock(&trace_types_lock);
1134         kfree(cond_snapshot);
1135         return ret;
1136 }
1137 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1138
1139 /**
1140  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1141  * @tr:         The tracing instance
1142  *
1143  * Check whether the conditional snapshot for the given instance is
1144  * enabled; if so, free the cond_snapshot associated with it,
1145  * otherwise return -EINVAL.
1146  *
1147  * Returns 0 if successful, error otherwise.
1148  */
1149 int tracing_snapshot_cond_disable(struct trace_array *tr)
1150 {
1151         int ret = 0;
1152
1153         arch_spin_lock(&tr->max_lock);
1154
1155         if (!tr->cond_snapshot)
1156                 ret = -EINVAL;
1157         else {
1158                 kfree(tr->cond_snapshot);
1159                 tr->cond_snapshot = NULL;
1160         }
1161
1162         arch_spin_unlock(&tr->max_lock);
1163
1164         return ret;
1165 }
1166 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1167 #else
1168 void tracing_snapshot(void)
1169 {
1170         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1171 }
1172 EXPORT_SYMBOL_GPL(tracing_snapshot);
1173 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1174 {
1175         WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1176 }
1177 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1178 int tracing_alloc_snapshot(void)
1179 {
1180         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1181         return -ENODEV;
1182 }
1183 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1184 void tracing_snapshot_alloc(void)
1185 {
1186         /* Give warning */
1187         tracing_snapshot();
1188 }
1189 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1190 void *tracing_cond_snapshot_data(struct trace_array *tr)
1191 {
1192         return NULL;
1193 }
1194 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1195 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1196 {
1197         return -ENODEV;
1198 }
1199 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1200 int tracing_snapshot_cond_disable(struct trace_array *tr)
1201 {
1202         return false;
1203 }
1204 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1205 #endif /* CONFIG_TRACER_SNAPSHOT */
1206
1207 void tracer_tracing_off(struct trace_array *tr)
1208 {
1209         if (tr->trace_buffer.buffer)
1210                 ring_buffer_record_off(tr->trace_buffer.buffer);
1211         /*
1212          * This flag is looked at when buffers haven't been allocated
1213          * yet, or by some tracers (like irqsoff), that just want to
1214          * know if the ring buffer has been disabled, but it can handle
1215          * races of where it gets disabled but we still do a record.
1216          * As the check is in the fast path of the tracers, it is more
1217          * important to be fast than accurate.
1218          */
1219         tr->buffer_disabled = 1;
1220         /* Make the flag seen by readers */
1221         smp_wmb();
1222 }
1223
1224 /**
1225  * tracing_off - turn off tracing buffers
1226  *
1227  * This function stops the tracing buffers from recording data.
1228  * It does not disable any overhead the tracers themselves may
1229  * be causing. This function simply causes all recording to
1230  * the ring buffers to fail.
1231  */
1232 void tracing_off(void)
1233 {
1234         tracer_tracing_off(&global_trace);
1235 }
1236 EXPORT_SYMBOL_GPL(tracing_off);
1237
1238 void disable_trace_on_warning(void)
1239 {
1240         if (__disable_trace_on_warning)
1241                 tracing_off();
1242 }
1243
1244 /**
1245  * tracer_tracing_is_on - show real state of ring buffer enabled
1246  * @tr : the trace array to know if ring buffer is enabled
1247  *
1248  * Shows real state of the ring buffer if it is enabled or not.
1249  */
1250 bool tracer_tracing_is_on(struct trace_array *tr)
1251 {
1252         if (tr->trace_buffer.buffer)
1253                 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1254         return !tr->buffer_disabled;
1255 }
1256
1257 /**
1258  * tracing_is_on - show state of ring buffers enabled
1259  */
1260 int tracing_is_on(void)
1261 {
1262         return tracer_tracing_is_on(&global_trace);
1263 }
1264 EXPORT_SYMBOL_GPL(tracing_is_on);
1265
1266 static int __init set_buf_size(char *str)
1267 {
1268         unsigned long buf_size;
1269
1270         if (!str)
1271                 return 0;
1272         buf_size = memparse(str, &str);
1273         /* nr_entries can not be zero */
1274         if (buf_size == 0)
1275                 return 0;
1276         trace_buf_size = buf_size;
1277         return 1;
1278 }
1279 __setup("trace_buf_size=", set_buf_size);
1280
1281 static int __init set_tracing_thresh(char *str)
1282 {
1283         unsigned long threshold;
1284         int ret;
1285
1286         if (!str)
1287                 return 0;
1288         ret = kstrtoul(str, 0, &threshold);
1289         if (ret < 0)
1290                 return 0;
1291         tracing_thresh = threshold * 1000;
1292         return 1;
1293 }
1294 __setup("tracing_thresh=", set_tracing_thresh);
1295
1296 unsigned long nsecs_to_usecs(unsigned long nsecs)
1297 {
1298         return nsecs / 1000;
1299 }
1300
1301 /*
1302  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1303  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1304  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1305  * of strings in the order that the evals (enum) were defined.
1306  */
1307 #undef C
1308 #define C(a, b) b
1309
1310 /* These must match the bit postions in trace_iterator_flags */
1311 static const char *trace_options[] = {
1312         TRACE_FLAGS
1313         NULL
1314 };
1315
1316 static struct {
1317         u64 (*func)(void);
1318         const char *name;
1319         int in_ns;              /* is this clock in nanoseconds? */
1320 } trace_clocks[] = {
1321         { trace_clock_local,            "local",        1 },
1322         { trace_clock_global,           "global",       1 },
1323         { trace_clock_counter,          "counter",      0 },
1324         { trace_clock_jiffies,          "uptime",       0 },
1325         { trace_clock,                  "perf",         1 },
1326         { ktime_get_mono_fast_ns,       "mono",         1 },
1327         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1328         { ktime_get_boot_fast_ns,       "boot",         1 },
1329         ARCH_TRACE_CLOCKS
1330 };
1331
1332 bool trace_clock_in_ns(struct trace_array *tr)
1333 {
1334         if (trace_clocks[tr->clock_id].in_ns)
1335                 return true;
1336
1337         return false;
1338 }
1339
1340 /*
1341  * trace_parser_get_init - gets the buffer for trace parser
1342  */
1343 int trace_parser_get_init(struct trace_parser *parser, int size)
1344 {
1345         memset(parser, 0, sizeof(*parser));
1346
1347         parser->buffer = kmalloc(size, GFP_KERNEL);
1348         if (!parser->buffer)
1349                 return 1;
1350
1351         parser->size = size;
1352         return 0;
1353 }
1354
1355 /*
1356  * trace_parser_put - frees the buffer for trace parser
1357  */
1358 void trace_parser_put(struct trace_parser *parser)
1359 {
1360         kfree(parser->buffer);
1361         parser->buffer = NULL;
1362 }
1363
1364 /*
1365  * trace_get_user - reads the user input string separated by  space
1366  * (matched by isspace(ch))
1367  *
1368  * For each string found the 'struct trace_parser' is updated,
1369  * and the function returns.
1370  *
1371  * Returns number of bytes read.
1372  *
1373  * See kernel/trace/trace.h for 'struct trace_parser' details.
1374  */
1375 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1376         size_t cnt, loff_t *ppos)
1377 {
1378         char ch;
1379         size_t read = 0;
1380         ssize_t ret;
1381
1382         if (!*ppos)
1383                 trace_parser_clear(parser);
1384
1385         ret = get_user(ch, ubuf++);
1386         if (ret)
1387                 goto out;
1388
1389         read++;
1390         cnt--;
1391
1392         /*
1393          * The parser is not finished with the last write,
1394          * continue reading the user input without skipping spaces.
1395          */
1396         if (!parser->cont) {
1397                 /* skip white space */
1398                 while (cnt && isspace(ch)) {
1399                         ret = get_user(ch, ubuf++);
1400                         if (ret)
1401                                 goto out;
1402                         read++;
1403                         cnt--;
1404                 }
1405
1406                 parser->idx = 0;
1407
1408                 /* only spaces were written */
1409                 if (isspace(ch) || !ch) {
1410                         *ppos += read;
1411                         ret = read;
1412                         goto out;
1413                 }
1414         }
1415
1416         /* read the non-space input */
1417         while (cnt && !isspace(ch) && ch) {
1418                 if (parser->idx < parser->size - 1)
1419                         parser->buffer[parser->idx++] = ch;
1420                 else {
1421                         ret = -EINVAL;
1422                         goto out;
1423                 }
1424                 ret = get_user(ch, ubuf++);
1425                 if (ret)
1426                         goto out;
1427                 read++;
1428                 cnt--;
1429         }
1430
1431         /* We either got finished input or we have to wait for another call. */
1432         if (isspace(ch) || !ch) {
1433                 parser->buffer[parser->idx] = 0;
1434                 parser->cont = false;
1435         } else if (parser->idx < parser->size - 1) {
1436                 parser->cont = true;
1437                 parser->buffer[parser->idx++] = ch;
1438                 /* Make sure the parsed string always terminates with '\0'. */
1439                 parser->buffer[parser->idx] = 0;
1440         } else {
1441                 ret = -EINVAL;
1442                 goto out;
1443         }
1444
1445         *ppos += read;
1446         ret = read;
1447
1448 out:
1449         return ret;
1450 }
1451
1452 /* TODO add a seq_buf_to_buffer() */
1453 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1454 {
1455         int len;
1456
1457         if (trace_seq_used(s) <= s->seq.readpos)
1458                 return -EBUSY;
1459
1460         len = trace_seq_used(s) - s->seq.readpos;
1461         if (cnt > len)
1462                 cnt = len;
1463         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1464
1465         s->seq.readpos += cnt;
1466         return cnt;
1467 }
1468
1469 unsigned long __read_mostly     tracing_thresh;
1470
1471 #ifdef CONFIG_TRACER_MAX_TRACE
1472 /*
1473  * Copy the new maximum trace into the separate maximum-trace
1474  * structure. (this way the maximum trace is permanently saved,
1475  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1476  */
1477 static void
1478 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1479 {
1480         struct trace_buffer *trace_buf = &tr->trace_buffer;
1481         struct trace_buffer *max_buf = &tr->max_buffer;
1482         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1483         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1484
1485         max_buf->cpu = cpu;
1486         max_buf->time_start = data->preempt_timestamp;
1487
1488         max_data->saved_latency = tr->max_latency;
1489         max_data->critical_start = data->critical_start;
1490         max_data->critical_end = data->critical_end;
1491
1492         memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1493         max_data->pid = tsk->pid;
1494         /*
1495          * If tsk == current, then use current_uid(), as that does not use
1496          * RCU. The irq tracer can be called out of RCU scope.
1497          */
1498         if (tsk == current)
1499                 max_data->uid = current_uid();
1500         else
1501                 max_data->uid = task_uid(tsk);
1502
1503         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1504         max_data->policy = tsk->policy;
1505         max_data->rt_priority = tsk->rt_priority;
1506
1507         /* record this tasks comm */
1508         tracing_record_cmdline(tsk);
1509 }
1510
1511 /**
1512  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1513  * @tr: tracer
1514  * @tsk: the task with the latency
1515  * @cpu: The cpu that initiated the trace.
1516  * @cond_data: User data associated with a conditional snapshot
1517  *
1518  * Flip the buffers between the @tr and the max_tr and record information
1519  * about which task was the cause of this latency.
1520  */
1521 void
1522 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1523               void *cond_data)
1524 {
1525         if (tr->stop_count)
1526                 return;
1527
1528         WARN_ON_ONCE(!irqs_disabled());
1529
1530         if (!tr->allocated_snapshot) {
1531                 /* Only the nop tracer should hit this when disabling */
1532                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1533                 return;
1534         }
1535
1536         arch_spin_lock(&tr->max_lock);
1537
1538         /* Inherit the recordable setting from trace_buffer */
1539         if (ring_buffer_record_is_set_on(tr->trace_buffer.buffer))
1540                 ring_buffer_record_on(tr->max_buffer.buffer);
1541         else
1542                 ring_buffer_record_off(tr->max_buffer.buffer);
1543
1544 #ifdef CONFIG_TRACER_SNAPSHOT
1545         if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1546                 goto out_unlock;
1547 #endif
1548         swap(tr->trace_buffer.buffer, tr->max_buffer.buffer);
1549
1550         __update_max_tr(tr, tsk, cpu);
1551
1552  out_unlock:
1553         arch_spin_unlock(&tr->max_lock);
1554 }
1555
1556 /**
1557  * update_max_tr_single - only copy one trace over, and reset the rest
1558  * @tr - tracer
1559  * @tsk - task with the latency
1560  * @cpu - the cpu of the buffer to copy.
1561  *
1562  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1563  */
1564 void
1565 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1566 {
1567         int ret;
1568
1569         if (tr->stop_count)
1570                 return;
1571
1572         WARN_ON_ONCE(!irqs_disabled());
1573         if (!tr->allocated_snapshot) {
1574                 /* Only the nop tracer should hit this when disabling */
1575                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1576                 return;
1577         }
1578
1579         arch_spin_lock(&tr->max_lock);
1580
1581         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1582
1583         if (ret == -EBUSY) {
1584                 /*
1585                  * We failed to swap the buffer due to a commit taking
1586                  * place on this CPU. We fail to record, but we reset
1587                  * the max trace buffer (no one writes directly to it)
1588                  * and flag that it failed.
1589                  */
1590                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1591                         "Failed to swap buffers due to commit in progress\n");
1592         }
1593
1594         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1595
1596         __update_max_tr(tr, tsk, cpu);
1597         arch_spin_unlock(&tr->max_lock);
1598 }
1599 #endif /* CONFIG_TRACER_MAX_TRACE */
1600
1601 static int wait_on_pipe(struct trace_iterator *iter, int full)
1602 {
1603         /* Iterators are static, they should be filled or empty */
1604         if (trace_buffer_iter(iter, iter->cpu_file))
1605                 return 0;
1606
1607         return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1608                                 full);
1609 }
1610
1611 #ifdef CONFIG_FTRACE_STARTUP_TEST
1612 static bool selftests_can_run;
1613
1614 struct trace_selftests {
1615         struct list_head                list;
1616         struct tracer                   *type;
1617 };
1618
1619 static LIST_HEAD(postponed_selftests);
1620
1621 static int save_selftest(struct tracer *type)
1622 {
1623         struct trace_selftests *selftest;
1624
1625         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1626         if (!selftest)
1627                 return -ENOMEM;
1628
1629         selftest->type = type;
1630         list_add(&selftest->list, &postponed_selftests);
1631         return 0;
1632 }
1633
1634 static int run_tracer_selftest(struct tracer *type)
1635 {
1636         struct trace_array *tr = &global_trace;
1637         struct tracer *saved_tracer = tr->current_trace;
1638         int ret;
1639
1640         if (!type->selftest || tracing_selftest_disabled)
1641                 return 0;
1642
1643         /*
1644          * If a tracer registers early in boot up (before scheduling is
1645          * initialized and such), then do not run its selftests yet.
1646          * Instead, run it a little later in the boot process.
1647          */
1648         if (!selftests_can_run)
1649                 return save_selftest(type);
1650
1651         /*
1652          * Run a selftest on this tracer.
1653          * Here we reset the trace buffer, and set the current
1654          * tracer to be this tracer. The tracer can then run some
1655          * internal tracing to verify that everything is in order.
1656          * If we fail, we do not register this tracer.
1657          */
1658         tracing_reset_online_cpus(&tr->trace_buffer);
1659
1660         tr->current_trace = type;
1661
1662 #ifdef CONFIG_TRACER_MAX_TRACE
1663         if (type->use_max_tr) {
1664                 /* If we expanded the buffers, make sure the max is expanded too */
1665                 if (ring_buffer_expanded)
1666                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1667                                            RING_BUFFER_ALL_CPUS);
1668                 tr->allocated_snapshot = true;
1669         }
1670 #endif
1671
1672         /* the test is responsible for initializing and enabling */
1673         pr_info("Testing tracer %s: ", type->name);
1674         ret = type->selftest(type, tr);
1675         /* the test is responsible for resetting too */
1676         tr->current_trace = saved_tracer;
1677         if (ret) {
1678                 printk(KERN_CONT "FAILED!\n");
1679                 /* Add the warning after printing 'FAILED' */
1680                 WARN_ON(1);
1681                 return -1;
1682         }
1683         /* Only reset on passing, to avoid touching corrupted buffers */
1684         tracing_reset_online_cpus(&tr->trace_buffer);
1685
1686 #ifdef CONFIG_TRACER_MAX_TRACE
1687         if (type->use_max_tr) {
1688                 tr->allocated_snapshot = false;
1689
1690                 /* Shrink the max buffer again */
1691                 if (ring_buffer_expanded)
1692                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1693                                            RING_BUFFER_ALL_CPUS);
1694         }
1695 #endif
1696
1697         printk(KERN_CONT "PASSED\n");
1698         return 0;
1699 }
1700
1701 static __init int init_trace_selftests(void)
1702 {
1703         struct trace_selftests *p, *n;
1704         struct tracer *t, **last;
1705         int ret;
1706
1707         selftests_can_run = true;
1708
1709         mutex_lock(&trace_types_lock);
1710
1711         if (list_empty(&postponed_selftests))
1712                 goto out;
1713
1714         pr_info("Running postponed tracer tests:\n");
1715
1716         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1717                 ret = run_tracer_selftest(p->type);
1718                 /* If the test fails, then warn and remove from available_tracers */
1719                 if (ret < 0) {
1720                         WARN(1, "tracer: %s failed selftest, disabling\n",
1721                              p->type->name);
1722                         last = &trace_types;
1723                         for (t = trace_types; t; t = t->next) {
1724                                 if (t == p->type) {
1725                                         *last = t->next;
1726                                         break;
1727                                 }
1728                                 last = &t->next;
1729                         }
1730                 }
1731                 list_del(&p->list);
1732                 kfree(p);
1733         }
1734
1735  out:
1736         mutex_unlock(&trace_types_lock);
1737
1738         return 0;
1739 }
1740 core_initcall(init_trace_selftests);
1741 #else
1742 static inline int run_tracer_selftest(struct tracer *type)
1743 {
1744         return 0;
1745 }
1746 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1747
1748 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1749
1750 static void __init apply_trace_boot_options(void);
1751
1752 /**
1753  * register_tracer - register a tracer with the ftrace system.
1754  * @type - the plugin for the tracer
1755  *
1756  * Register a new plugin tracer.
1757  */
1758 int __init register_tracer(struct tracer *type)
1759 {
1760         struct tracer *t;
1761         int ret = 0;
1762
1763         if (!type->name) {
1764                 pr_info("Tracer must have a name\n");
1765                 return -1;
1766         }
1767
1768         if (strlen(type->name) >= MAX_TRACER_SIZE) {
1769                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1770                 return -1;
1771         }
1772
1773         mutex_lock(&trace_types_lock);
1774
1775         tracing_selftest_running = true;
1776
1777         for (t = trace_types; t; t = t->next) {
1778                 if (strcmp(type->name, t->name) == 0) {
1779                         /* already found */
1780                         pr_info("Tracer %s already registered\n",
1781                                 type->name);
1782                         ret = -1;
1783                         goto out;
1784                 }
1785         }
1786
1787         if (!type->set_flag)
1788                 type->set_flag = &dummy_set_flag;
1789         if (!type->flags) {
1790                 /*allocate a dummy tracer_flags*/
1791                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1792                 if (!type->flags) {
1793                         ret = -ENOMEM;
1794                         goto out;
1795                 }
1796                 type->flags->val = 0;
1797                 type->flags->opts = dummy_tracer_opt;
1798         } else
1799                 if (!type->flags->opts)
1800                         type->flags->opts = dummy_tracer_opt;
1801
1802         /* store the tracer for __set_tracer_option */
1803         type->flags->trace = type;
1804
1805         ret = run_tracer_selftest(type);
1806         if (ret < 0)
1807                 goto out;
1808
1809         type->next = trace_types;
1810         trace_types = type;
1811         add_tracer_options(&global_trace, type);
1812
1813  out:
1814         tracing_selftest_running = false;
1815         mutex_unlock(&trace_types_lock);
1816
1817         if (ret || !default_bootup_tracer)
1818                 goto out_unlock;
1819
1820         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1821                 goto out_unlock;
1822
1823         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1824         /* Do we want this tracer to start on bootup? */
1825         tracing_set_tracer(&global_trace, type->name);
1826         default_bootup_tracer = NULL;
1827
1828         apply_trace_boot_options();
1829
1830         /* disable other selftests, since this will break it. */
1831         tracing_selftest_disabled = true;
1832 #ifdef CONFIG_FTRACE_STARTUP_TEST
1833         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1834                type->name);
1835 #endif
1836
1837  out_unlock:
1838         return ret;
1839 }
1840
1841 void tracing_reset(struct trace_buffer *buf, int cpu)
1842 {
1843         struct ring_buffer *buffer = buf->buffer;
1844
1845         if (!buffer)
1846                 return;
1847
1848         ring_buffer_record_disable(buffer);
1849
1850         /* Make sure all commits have finished */
1851         synchronize_rcu();
1852         ring_buffer_reset_cpu(buffer, cpu);
1853
1854         ring_buffer_record_enable(buffer);
1855 }
1856
1857 void tracing_reset_online_cpus(struct trace_buffer *buf)
1858 {
1859         struct ring_buffer *buffer = buf->buffer;
1860         int cpu;
1861
1862         if (!buffer)
1863                 return;
1864
1865         ring_buffer_record_disable(buffer);
1866
1867         /* Make sure all commits have finished */
1868         synchronize_rcu();
1869
1870         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1871
1872         for_each_online_cpu(cpu)
1873                 ring_buffer_reset_cpu(buffer, cpu);
1874
1875         ring_buffer_record_enable(buffer);
1876 }
1877
1878 /* Must have trace_types_lock held */
1879 void tracing_reset_all_online_cpus(void)
1880 {
1881         struct trace_array *tr;
1882
1883         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1884                 if (!tr->clear_trace)
1885                         continue;
1886                 tr->clear_trace = false;
1887                 tracing_reset_online_cpus(&tr->trace_buffer);
1888 #ifdef CONFIG_TRACER_MAX_TRACE
1889                 tracing_reset_online_cpus(&tr->max_buffer);
1890 #endif
1891         }
1892 }
1893
1894 static int *tgid_map;
1895
1896 #define SAVED_CMDLINES_DEFAULT 128
1897 #define NO_CMDLINE_MAP UINT_MAX
1898 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1899 struct saved_cmdlines_buffer {
1900         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1901         unsigned *map_cmdline_to_pid;
1902         unsigned cmdline_num;
1903         int cmdline_idx;
1904         char *saved_cmdlines;
1905 };
1906 static struct saved_cmdlines_buffer *savedcmd;
1907
1908 /* temporary disable recording */
1909 static atomic_t trace_record_taskinfo_disabled __read_mostly;
1910
1911 static inline char *get_saved_cmdlines(int idx)
1912 {
1913         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1914 }
1915
1916 static inline void set_cmdline(int idx, const char *cmdline)
1917 {
1918         memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1919 }
1920
1921 static int allocate_cmdlines_buffer(unsigned int val,
1922                                     struct saved_cmdlines_buffer *s)
1923 {
1924         s->map_cmdline_to_pid = kmalloc_array(val,
1925                                               sizeof(*s->map_cmdline_to_pid),
1926                                               GFP_KERNEL);
1927         if (!s->map_cmdline_to_pid)
1928                 return -ENOMEM;
1929
1930         s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
1931         if (!s->saved_cmdlines) {
1932                 kfree(s->map_cmdline_to_pid);
1933                 return -ENOMEM;
1934         }
1935
1936         s->cmdline_idx = 0;
1937         s->cmdline_num = val;
1938         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1939                sizeof(s->map_pid_to_cmdline));
1940         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1941                val * sizeof(*s->map_cmdline_to_pid));
1942
1943         return 0;
1944 }
1945
1946 static int trace_create_savedcmd(void)
1947 {
1948         int ret;
1949
1950         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1951         if (!savedcmd)
1952                 return -ENOMEM;
1953
1954         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1955         if (ret < 0) {
1956                 kfree(savedcmd);
1957                 savedcmd = NULL;
1958                 return -ENOMEM;
1959         }
1960
1961         return 0;
1962 }
1963
1964 int is_tracing_stopped(void)
1965 {
1966         return global_trace.stop_count;
1967 }
1968
1969 /**
1970  * tracing_start - quick start of the tracer
1971  *
1972  * If tracing is enabled but was stopped by tracing_stop,
1973  * this will start the tracer back up.
1974  */
1975 void tracing_start(void)
1976 {
1977         struct ring_buffer *buffer;
1978         unsigned long flags;
1979
1980         if (tracing_disabled)
1981                 return;
1982
1983         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1984         if (--global_trace.stop_count) {
1985                 if (global_trace.stop_count < 0) {
1986                         /* Someone screwed up their debugging */
1987                         WARN_ON_ONCE(1);
1988                         global_trace.stop_count = 0;
1989                 }
1990                 goto out;
1991         }
1992
1993         /* Prevent the buffers from switching */
1994         arch_spin_lock(&global_trace.max_lock);
1995
1996         buffer = global_trace.trace_buffer.buffer;
1997         if (buffer)
1998                 ring_buffer_record_enable(buffer);
1999
2000 #ifdef CONFIG_TRACER_MAX_TRACE
2001         buffer = global_trace.max_buffer.buffer;
2002         if (buffer)
2003                 ring_buffer_record_enable(buffer);
2004 #endif
2005
2006         arch_spin_unlock(&global_trace.max_lock);
2007
2008  out:
2009         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2010 }
2011
2012 static void tracing_start_tr(struct trace_array *tr)
2013 {
2014         struct ring_buffer *buffer;
2015         unsigned long flags;
2016
2017         if (tracing_disabled)
2018                 return;
2019
2020         /* If global, we need to also start the max tracer */
2021         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2022                 return tracing_start();
2023
2024         raw_spin_lock_irqsave(&tr->start_lock, flags);
2025
2026         if (--tr->stop_count) {
2027                 if (tr->stop_count < 0) {
2028                         /* Someone screwed up their debugging */
2029                         WARN_ON_ONCE(1);
2030                         tr->stop_count = 0;
2031                 }
2032                 goto out;
2033         }
2034
2035         buffer = tr->trace_buffer.buffer;
2036         if (buffer)
2037                 ring_buffer_record_enable(buffer);
2038
2039  out:
2040         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2041 }
2042
2043 /**
2044  * tracing_stop - quick stop of the tracer
2045  *
2046  * Light weight way to stop tracing. Use in conjunction with
2047  * tracing_start.
2048  */
2049 void tracing_stop(void)
2050 {
2051         struct ring_buffer *buffer;
2052         unsigned long flags;
2053
2054         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2055         if (global_trace.stop_count++)
2056                 goto out;
2057
2058         /* Prevent the buffers from switching */
2059         arch_spin_lock(&global_trace.max_lock);
2060
2061         buffer = global_trace.trace_buffer.buffer;
2062         if (buffer)
2063                 ring_buffer_record_disable(buffer);
2064
2065 #ifdef CONFIG_TRACER_MAX_TRACE
2066         buffer = global_trace.max_buffer.buffer;
2067         if (buffer)
2068                 ring_buffer_record_disable(buffer);
2069 #endif
2070
2071         arch_spin_unlock(&global_trace.max_lock);
2072
2073  out:
2074         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2075 }
2076
2077 static void tracing_stop_tr(struct trace_array *tr)
2078 {
2079         struct ring_buffer *buffer;
2080         unsigned long flags;
2081
2082         /* If global, we need to also stop the max tracer */
2083         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2084                 return tracing_stop();
2085
2086         raw_spin_lock_irqsave(&tr->start_lock, flags);
2087         if (tr->stop_count++)
2088                 goto out;
2089
2090         buffer = tr->trace_buffer.buffer;
2091         if (buffer)
2092                 ring_buffer_record_disable(buffer);
2093
2094  out:
2095         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2096 }
2097
2098 static int trace_save_cmdline(struct task_struct *tsk)
2099 {
2100         unsigned pid, idx;
2101
2102         /* treat recording of idle task as a success */
2103         if (!tsk->pid)
2104                 return 1;
2105
2106         if (unlikely(tsk->pid > PID_MAX_DEFAULT))
2107                 return 0;
2108
2109         /*
2110          * It's not the end of the world if we don't get
2111          * the lock, but we also don't want to spin
2112          * nor do we want to disable interrupts,
2113          * so if we miss here, then better luck next time.
2114          */
2115         if (!arch_spin_trylock(&trace_cmdline_lock))
2116                 return 0;
2117
2118         idx = savedcmd->map_pid_to_cmdline[tsk->pid];
2119         if (idx == NO_CMDLINE_MAP) {
2120                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2121
2122                 /*
2123                  * Check whether the cmdline buffer at idx has a pid
2124                  * mapped. We are going to overwrite that entry so we
2125                  * need to clear the map_pid_to_cmdline. Otherwise we
2126                  * would read the new comm for the old pid.
2127                  */
2128                 pid = savedcmd->map_cmdline_to_pid[idx];
2129                 if (pid != NO_CMDLINE_MAP)
2130                         savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
2131
2132                 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2133                 savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
2134
2135                 savedcmd->cmdline_idx = idx;
2136         }
2137
2138         set_cmdline(idx, tsk->comm);
2139
2140         arch_spin_unlock(&trace_cmdline_lock);
2141
2142         return 1;
2143 }
2144
2145 static void __trace_find_cmdline(int pid, char comm[])
2146 {
2147         unsigned map;
2148
2149         if (!pid) {
2150                 strcpy(comm, "<idle>");
2151                 return;
2152         }
2153
2154         if (WARN_ON_ONCE(pid < 0)) {
2155                 strcpy(comm, "<XXX>");
2156                 return;
2157         }
2158
2159         if (pid > PID_MAX_DEFAULT) {
2160                 strcpy(comm, "<...>");
2161                 return;
2162         }
2163
2164         map = savedcmd->map_pid_to_cmdline[pid];
2165         if (map != NO_CMDLINE_MAP)
2166                 strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2167         else
2168                 strcpy(comm, "<...>");
2169 }
2170
2171 void trace_find_cmdline(int pid, char comm[])
2172 {
2173         preempt_disable();
2174         arch_spin_lock(&trace_cmdline_lock);
2175
2176         __trace_find_cmdline(pid, comm);
2177
2178         arch_spin_unlock(&trace_cmdline_lock);
2179         preempt_enable();
2180 }
2181
2182 int trace_find_tgid(int pid)
2183 {
2184         if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2185                 return 0;
2186
2187         return tgid_map[pid];
2188 }
2189
2190 static int trace_save_tgid(struct task_struct *tsk)
2191 {
2192         /* treat recording of idle task as a success */
2193         if (!tsk->pid)
2194                 return 1;
2195
2196         if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2197                 return 0;
2198
2199         tgid_map[tsk->pid] = tsk->tgid;
2200         return 1;
2201 }
2202
2203 static bool tracing_record_taskinfo_skip(int flags)
2204 {
2205         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2206                 return true;
2207         if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2208                 return true;
2209         if (!__this_cpu_read(trace_taskinfo_save))
2210                 return true;
2211         return false;
2212 }
2213
2214 /**
2215  * tracing_record_taskinfo - record the task info of a task
2216  *
2217  * @task  - task to record
2218  * @flags - TRACE_RECORD_CMDLINE for recording comm
2219  *        - TRACE_RECORD_TGID for recording tgid
2220  */
2221 void tracing_record_taskinfo(struct task_struct *task, int flags)
2222 {
2223         bool done;
2224
2225         if (tracing_record_taskinfo_skip(flags))
2226                 return;
2227
2228         /*
2229          * Record as much task information as possible. If some fail, continue
2230          * to try to record the others.
2231          */
2232         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2233         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2234
2235         /* If recording any information failed, retry again soon. */
2236         if (!done)
2237                 return;
2238
2239         __this_cpu_write(trace_taskinfo_save, false);
2240 }
2241
2242 /**
2243  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2244  *
2245  * @prev - previous task during sched_switch
2246  * @next - next task during sched_switch
2247  * @flags - TRACE_RECORD_CMDLINE for recording comm
2248  *          TRACE_RECORD_TGID for recording tgid
2249  */
2250 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2251                                           struct task_struct *next, int flags)
2252 {
2253         bool done;
2254
2255         if (tracing_record_taskinfo_skip(flags))
2256                 return;
2257
2258         /*
2259          * Record as much task information as possible. If some fail, continue
2260          * to try to record the others.
2261          */
2262         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2263         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2264         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2265         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2266
2267         /* If recording any information failed, retry again soon. */
2268         if (!done)
2269                 return;
2270
2271         __this_cpu_write(trace_taskinfo_save, false);
2272 }
2273
2274 /* Helpers to record a specific task information */
2275 void tracing_record_cmdline(struct task_struct *task)
2276 {
2277         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2278 }
2279
2280 void tracing_record_tgid(struct task_struct *task)
2281 {
2282         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2283 }
2284
2285 /*
2286  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2287  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2288  * simplifies those functions and keeps them in sync.
2289  */
2290 enum print_line_t trace_handle_return(struct trace_seq *s)
2291 {
2292         return trace_seq_has_overflowed(s) ?
2293                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2294 }
2295 EXPORT_SYMBOL_GPL(trace_handle_return);
2296
2297 void
2298 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
2299                              int pc)
2300 {
2301         struct task_struct *tsk = current;
2302
2303         entry->preempt_count            = pc & 0xff;
2304         entry->pid                      = (tsk) ? tsk->pid : 0;
2305         entry->flags =
2306 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2307                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2308 #else
2309                 TRACE_FLAG_IRQS_NOSUPPORT |
2310 #endif
2311                 ((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
2312                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2313                 ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2314                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2315                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2316 }
2317 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2318
2319 struct ring_buffer_event *
2320 trace_buffer_lock_reserve(struct ring_buffer *buffer,
2321                           int type,
2322                           unsigned long len,
2323                           unsigned long flags, int pc)
2324 {
2325         return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2326 }
2327
2328 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2329 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2330 static int trace_buffered_event_ref;
2331
2332 /**
2333  * trace_buffered_event_enable - enable buffering events
2334  *
2335  * When events are being filtered, it is quicker to use a temporary
2336  * buffer to write the event data into if there's a likely chance
2337  * that it will not be committed. The discard of the ring buffer
2338  * is not as fast as committing, and is much slower than copying
2339  * a commit.
2340  *
2341  * When an event is to be filtered, allocate per cpu buffers to
2342  * write the event data into, and if the event is filtered and discarded
2343  * it is simply dropped, otherwise, the entire data is to be committed
2344  * in one shot.
2345  */
2346 void trace_buffered_event_enable(void)
2347 {
2348         struct ring_buffer_event *event;
2349         struct page *page;
2350         int cpu;
2351
2352         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2353
2354         if (trace_buffered_event_ref++)
2355                 return;
2356
2357         for_each_tracing_cpu(cpu) {
2358                 page = alloc_pages_node(cpu_to_node(cpu),
2359                                         GFP_KERNEL | __GFP_NORETRY, 0);
2360                 if (!page)
2361                         goto failed;
2362
2363                 event = page_address(page);
2364                 memset(event, 0, sizeof(*event));
2365
2366                 per_cpu(trace_buffered_event, cpu) = event;
2367
2368                 preempt_disable();
2369                 if (cpu == smp_processor_id() &&
2370                     this_cpu_read(trace_buffered_event) !=
2371                     per_cpu(trace_buffered_event, cpu))
2372                         WARN_ON_ONCE(1);
2373                 preempt_enable();
2374         }
2375
2376         return;
2377  failed:
2378         trace_buffered_event_disable();
2379 }
2380
2381 static void enable_trace_buffered_event(void *data)
2382 {
2383         /* Probably not needed, but do it anyway */
2384         smp_rmb();
2385         this_cpu_dec(trace_buffered_event_cnt);
2386 }
2387
2388 static void disable_trace_buffered_event(void *data)
2389 {
2390         this_cpu_inc(trace_buffered_event_cnt);
2391 }
2392
2393 /**
2394  * trace_buffered_event_disable - disable buffering events
2395  *
2396  * When a filter is removed, it is faster to not use the buffered
2397  * events, and to commit directly into the ring buffer. Free up
2398  * the temp buffers when there are no more users. This requires
2399  * special synchronization with current events.
2400  */
2401 void trace_buffered_event_disable(void)
2402 {
2403         int cpu;
2404
2405         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2406
2407         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2408                 return;
2409
2410         if (--trace_buffered_event_ref)
2411                 return;
2412
2413         preempt_disable();
2414         /* For each CPU, set the buffer as used. */
2415         smp_call_function_many(tracing_buffer_mask,
2416                                disable_trace_buffered_event, NULL, 1);
2417         preempt_enable();
2418
2419         /* Wait for all current users to finish */
2420         synchronize_rcu();
2421
2422         for_each_tracing_cpu(cpu) {
2423                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2424                 per_cpu(trace_buffered_event, cpu) = NULL;
2425         }
2426         /*
2427          * Make sure trace_buffered_event is NULL before clearing
2428          * trace_buffered_event_cnt.
2429          */
2430         smp_wmb();
2431
2432         preempt_disable();
2433         /* Do the work on each cpu */
2434         smp_call_function_many(tracing_buffer_mask,
2435                                enable_trace_buffered_event, NULL, 1);
2436         preempt_enable();
2437 }
2438
2439 static struct ring_buffer *temp_buffer;
2440
2441 struct ring_buffer_event *
2442 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2443                           struct trace_event_file *trace_file,
2444                           int type, unsigned long len,
2445                           unsigned long flags, int pc)
2446 {
2447         struct ring_buffer_event *entry;
2448         int val;
2449
2450         *current_rb = trace_file->tr->trace_buffer.buffer;
2451
2452         if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
2453              (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2454             (entry = this_cpu_read(trace_buffered_event))) {
2455                 /* Try to use the per cpu buffer first */
2456                 val = this_cpu_inc_return(trace_buffered_event_cnt);
2457                 if (val == 1) {
2458                         trace_event_setup(entry, type, flags, pc);
2459                         entry->array[0] = len;
2460                         return entry;
2461                 }
2462                 this_cpu_dec(trace_buffered_event_cnt);
2463         }
2464
2465         entry = __trace_buffer_lock_reserve(*current_rb,
2466                                             type, len, flags, pc);
2467         /*
2468          * If tracing is off, but we have triggers enabled
2469          * we still need to look at the event data. Use the temp_buffer
2470          * to store the trace event for the tigger to use. It's recusive
2471          * safe and will not be recorded anywhere.
2472          */
2473         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2474                 *current_rb = temp_buffer;
2475                 entry = __trace_buffer_lock_reserve(*current_rb,
2476                                                     type, len, flags, pc);
2477         }
2478         return entry;
2479 }
2480 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2481
2482 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2483 static DEFINE_MUTEX(tracepoint_printk_mutex);
2484
2485 static void output_printk(struct trace_event_buffer *fbuffer)
2486 {
2487         struct trace_event_call *event_call;
2488         struct trace_event *event;
2489         unsigned long flags;
2490         struct trace_iterator *iter = tracepoint_print_iter;
2491
2492         /* We should never get here if iter is NULL */
2493         if (WARN_ON_ONCE(!iter))
2494                 return;
2495
2496         event_call = fbuffer->trace_file->event_call;
2497         if (!event_call || !event_call->event.funcs ||
2498             !event_call->event.funcs->trace)
2499                 return;
2500
2501         event = &fbuffer->trace_file->event_call->event;
2502
2503         spin_lock_irqsave(&tracepoint_iter_lock, flags);
2504         trace_seq_init(&iter->seq);
2505         iter->ent = fbuffer->entry;
2506         event_call->event.funcs->trace(iter, 0, event);
2507         trace_seq_putc(&iter->seq, 0);
2508         printk("%s", iter->seq.buffer);
2509
2510         spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2511 }
2512
2513 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2514                              void __user *buffer, size_t *lenp,
2515                              loff_t *ppos)
2516 {
2517         int save_tracepoint_printk;
2518         int ret;
2519
2520         mutex_lock(&tracepoint_printk_mutex);
2521         save_tracepoint_printk = tracepoint_printk;
2522
2523         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2524
2525         /*
2526          * This will force exiting early, as tracepoint_printk
2527          * is always zero when tracepoint_printk_iter is not allocated
2528          */
2529         if (!tracepoint_print_iter)
2530                 tracepoint_printk = 0;
2531
2532         if (save_tracepoint_printk == tracepoint_printk)
2533                 goto out;
2534
2535         if (tracepoint_printk)
2536                 static_key_enable(&tracepoint_printk_key.key);
2537         else
2538                 static_key_disable(&tracepoint_printk_key.key);
2539
2540  out:
2541         mutex_unlock(&tracepoint_printk_mutex);
2542
2543         return ret;
2544 }
2545
2546 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2547 {
2548         if (static_key_false(&tracepoint_printk_key.key))
2549                 output_printk(fbuffer);
2550
2551         event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer,
2552                                     fbuffer->event, fbuffer->entry,
2553                                     fbuffer->flags, fbuffer->pc);
2554 }
2555 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2556
2557 /*
2558  * Skip 3:
2559  *
2560  *   trace_buffer_unlock_commit_regs()
2561  *   trace_event_buffer_commit()
2562  *   trace_event_raw_event_xxx()
2563  */
2564 # define STACK_SKIP 3
2565
2566 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2567                                      struct ring_buffer *buffer,
2568                                      struct ring_buffer_event *event,
2569                                      unsigned long flags, int pc,
2570                                      struct pt_regs *regs)
2571 {
2572         __buffer_unlock_commit(buffer, event);
2573
2574         /*
2575          * If regs is not set, then skip the necessary functions.
2576          * Note, we can still get here via blktrace, wakeup tracer
2577          * and mmiotrace, but that's ok if they lose a function or
2578          * two. They are not that meaningful.
2579          */
2580         ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
2581         ftrace_trace_userstack(buffer, flags, pc);
2582 }
2583
2584 /*
2585  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2586  */
2587 void
2588 trace_buffer_unlock_commit_nostack(struct ring_buffer *buffer,
2589                                    struct ring_buffer_event *event)
2590 {
2591         __buffer_unlock_commit(buffer, event);
2592 }
2593
2594 static void
2595 trace_process_export(struct trace_export *export,
2596                struct ring_buffer_event *event)
2597 {
2598         struct trace_entry *entry;
2599         unsigned int size = 0;
2600
2601         entry = ring_buffer_event_data(event);
2602         size = ring_buffer_event_length(event);
2603         export->write(export, entry, size);
2604 }
2605
2606 static DEFINE_MUTEX(ftrace_export_lock);
2607
2608 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2609
2610 static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2611
2612 static inline void ftrace_exports_enable(void)
2613 {
2614         static_branch_enable(&ftrace_exports_enabled);
2615 }
2616
2617 static inline void ftrace_exports_disable(void)
2618 {
2619         static_branch_disable(&ftrace_exports_enabled);
2620 }
2621
2622 static void ftrace_exports(struct ring_buffer_event *event)
2623 {
2624         struct trace_export *export;
2625
2626         preempt_disable_notrace();
2627
2628         export = rcu_dereference_raw_notrace(ftrace_exports_list);
2629         while (export) {
2630                 trace_process_export(export, event);
2631                 export = rcu_dereference_raw_notrace(export->next);
2632         }
2633
2634         preempt_enable_notrace();
2635 }
2636
2637 static inline void
2638 add_trace_export(struct trace_export **list, struct trace_export *export)
2639 {
2640         rcu_assign_pointer(export->next, *list);
2641         /*
2642          * We are entering export into the list but another
2643          * CPU might be walking that list. We need to make sure
2644          * the export->next pointer is valid before another CPU sees
2645          * the export pointer included into the list.
2646          */
2647         rcu_assign_pointer(*list, export);
2648 }
2649
2650 static inline int
2651 rm_trace_export(struct trace_export **list, struct trace_export *export)
2652 {
2653         struct trace_export **p;
2654
2655         for (p = list; *p != NULL; p = &(*p)->next)
2656                 if (*p == export)
2657                         break;
2658
2659         if (*p != export)
2660                 return -1;
2661
2662         rcu_assign_pointer(*p, (*p)->next);
2663
2664         return 0;
2665 }
2666
2667 static inline void
2668 add_ftrace_export(struct trace_export **list, struct trace_export *export)
2669 {
2670         if (*list == NULL)
2671                 ftrace_exports_enable();
2672
2673         add_trace_export(list, export);
2674 }
2675
2676 static inline int
2677 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2678 {
2679         int ret;
2680
2681         ret = rm_trace_export(list, export);
2682         if (*list == NULL)
2683                 ftrace_exports_disable();
2684
2685         return ret;
2686 }
2687
2688 int register_ftrace_export(struct trace_export *export)
2689 {
2690         if (WARN_ON_ONCE(!export->write))
2691                 return -1;
2692
2693         mutex_lock(&ftrace_export_lock);
2694
2695         add_ftrace_export(&ftrace_exports_list, export);
2696
2697         mutex_unlock(&ftrace_export_lock);
2698
2699         return 0;
2700 }
2701 EXPORT_SYMBOL_GPL(register_ftrace_export);
2702
2703 int unregister_ftrace_export(struct trace_export *export)
2704 {
2705         int ret;
2706
2707         mutex_lock(&ftrace_export_lock);
2708
2709         ret = rm_ftrace_export(&ftrace_exports_list, export);
2710
2711         mutex_unlock(&ftrace_export_lock);
2712
2713         return ret;
2714 }
2715 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2716
2717 void
2718 trace_function(struct trace_array *tr,
2719                unsigned long ip, unsigned long parent_ip, unsigned long flags,
2720                int pc)
2721 {
2722         struct trace_event_call *call = &event_function;
2723         struct ring_buffer *buffer = tr->trace_buffer.buffer;
2724         struct ring_buffer_event *event;
2725         struct ftrace_entry *entry;
2726
2727         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2728                                             flags, pc);
2729         if (!event)
2730                 return;
2731         entry   = ring_buffer_event_data(event);
2732         entry->ip                       = ip;
2733         entry->parent_ip                = parent_ip;
2734
2735         if (!call_filter_check_discard(call, entry, buffer, event)) {
2736                 if (static_branch_unlikely(&ftrace_exports_enabled))
2737                         ftrace_exports(event);
2738                 __buffer_unlock_commit(buffer, event);
2739         }
2740 }
2741
2742 #ifdef CONFIG_STACKTRACE
2743
2744 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
2745 struct ftrace_stack {
2746         unsigned long           calls[FTRACE_STACK_MAX_ENTRIES];
2747 };
2748
2749 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
2750 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2751
2752 static void __ftrace_trace_stack(struct ring_buffer *buffer,
2753                                  unsigned long flags,
2754                                  int skip, int pc, struct pt_regs *regs)
2755 {
2756         struct trace_event_call *call = &event_kernel_stack;
2757         struct ring_buffer_event *event;
2758         struct stack_entry *entry;
2759         struct stack_trace trace;
2760         int use_stack;
2761         int size = FTRACE_STACK_ENTRIES;
2762
2763         trace.nr_entries        = 0;
2764         trace.skip              = skip;
2765
2766         /*
2767          * Add one, for this function and the call to save_stack_trace()
2768          * If regs is set, then these functions will not be in the way.
2769          */
2770 #ifndef CONFIG_UNWINDER_ORC
2771         if (!regs)
2772                 trace.skip++;
2773 #endif
2774
2775         /*
2776          * Since events can happen in NMIs there's no safe way to
2777          * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2778          * or NMI comes in, it will just have to use the default
2779          * FTRACE_STACK_SIZE.
2780          */
2781         preempt_disable_notrace();
2782
2783         use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
2784         /*
2785          * We don't need any atomic variables, just a barrier.
2786          * If an interrupt comes in, we don't care, because it would
2787          * have exited and put the counter back to what we want.
2788          * We just need a barrier to keep gcc from moving things
2789          * around.
2790          */
2791         barrier();
2792         if (use_stack == 1) {
2793                 trace.entries           = this_cpu_ptr(ftrace_stack.calls);
2794                 trace.max_entries       = FTRACE_STACK_MAX_ENTRIES;
2795
2796                 if (regs)
2797                         save_stack_trace_regs(regs, &trace);
2798                 else
2799                         save_stack_trace(&trace);
2800
2801                 if (trace.nr_entries > size)
2802                         size = trace.nr_entries;
2803         } else
2804                 /* From now on, use_stack is a boolean */
2805                 use_stack = 0;
2806
2807         size *= sizeof(unsigned long);
2808
2809         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2810                                             sizeof(*entry) + size, flags, pc);
2811         if (!event)
2812                 goto out;
2813         entry = ring_buffer_event_data(event);
2814
2815         memset(&entry->caller, 0, size);
2816
2817         if (use_stack)
2818                 memcpy(&entry->caller, trace.entries,
2819                        trace.nr_entries * sizeof(unsigned long));
2820         else {
2821                 trace.max_entries       = FTRACE_STACK_ENTRIES;
2822                 trace.entries           = entry->caller;
2823                 if (regs)
2824                         save_stack_trace_regs(regs, &trace);
2825                 else
2826                         save_stack_trace(&trace);
2827         }
2828
2829         entry->size = trace.nr_entries;
2830
2831         if (!call_filter_check_discard(call, entry, buffer, event))
2832                 __buffer_unlock_commit(buffer, event);
2833
2834  out:
2835         /* Again, don't let gcc optimize things here */
2836         barrier();
2837         __this_cpu_dec(ftrace_stack_reserve);
2838         preempt_enable_notrace();
2839
2840 }
2841
2842 static inline void ftrace_trace_stack(struct trace_array *tr,
2843                                       struct ring_buffer *buffer,
2844                                       unsigned long flags,
2845                                       int skip, int pc, struct pt_regs *regs)
2846 {
2847         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2848                 return;
2849
2850         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
2851 }
2852
2853 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2854                    int pc)
2855 {
2856         struct ring_buffer *buffer = tr->trace_buffer.buffer;
2857
2858         if (rcu_is_watching()) {
2859                 __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2860                 return;
2861         }
2862
2863         /*
2864          * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
2865          * but if the above rcu_is_watching() failed, then the NMI
2866          * triggered someplace critical, and rcu_irq_enter() should
2867          * not be called from NMI.
2868          */
2869         if (unlikely(in_nmi()))
2870                 return;
2871
2872         rcu_irq_enter_irqson();
2873         __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2874         rcu_irq_exit_irqson();
2875 }
2876
2877 /**
2878  * trace_dump_stack - record a stack back trace in the trace buffer
2879  * @skip: Number of functions to skip (helper handlers)
2880  */
2881 void trace_dump_stack(int skip)
2882 {
2883         unsigned long flags;
2884
2885         if (tracing_disabled || tracing_selftest_running)
2886                 return;
2887
2888         local_save_flags(flags);
2889
2890 #ifndef CONFIG_UNWINDER_ORC
2891         /* Skip 1 to skip this function. */
2892         skip++;
2893 #endif
2894         __ftrace_trace_stack(global_trace.trace_buffer.buffer,
2895                              flags, skip, preempt_count(), NULL);
2896 }
2897 EXPORT_SYMBOL_GPL(trace_dump_stack);
2898
2899 static DEFINE_PER_CPU(int, user_stack_count);
2900
2901 void
2902 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
2903 {
2904         struct trace_event_call *call = &event_user_stack;
2905         struct ring_buffer_event *event;
2906         struct userstack_entry *entry;
2907         struct stack_trace trace;
2908
2909         if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
2910                 return;
2911
2912         /*
2913          * NMIs can not handle page faults, even with fix ups.
2914          * The save user stack can (and often does) fault.
2915          */
2916         if (unlikely(in_nmi()))
2917                 return;
2918
2919         /*
2920          * prevent recursion, since the user stack tracing may
2921          * trigger other kernel events.
2922          */
2923         preempt_disable();
2924         if (__this_cpu_read(user_stack_count))
2925                 goto out;
2926
2927         __this_cpu_inc(user_stack_count);
2928
2929         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2930                                             sizeof(*entry), flags, pc);
2931         if (!event)
2932                 goto out_drop_count;
2933         entry   = ring_buffer_event_data(event);
2934
2935         entry->tgid             = current->tgid;
2936         memset(&entry->caller, 0, sizeof(entry->caller));
2937
2938         trace.nr_entries        = 0;
2939         trace.max_entries       = FTRACE_STACK_ENTRIES;
2940         trace.skip              = 0;
2941         trace.entries           = entry->caller;
2942
2943         save_stack_trace_user(&trace);
2944         if (!call_filter_check_discard(call, entry, buffer, event))
2945                 __buffer_unlock_commit(buffer, event);
2946
2947  out_drop_count:
2948         __this_cpu_dec(user_stack_count);
2949  out:
2950         preempt_enable();
2951 }
2952
2953 #ifdef UNUSED
2954 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
2955 {
2956         ftrace_trace_userstack(tr, flags, preempt_count());
2957 }
2958 #endif /* UNUSED */
2959
2960 #endif /* CONFIG_STACKTRACE */
2961
2962 /* created for use with alloc_percpu */
2963 struct trace_buffer_struct {
2964         int nesting;
2965         char buffer[4][TRACE_BUF_SIZE];
2966 };
2967
2968 static struct trace_buffer_struct *trace_percpu_buffer;
2969
2970 /*
2971  * Thise allows for lockless recording.  If we're nested too deeply, then
2972  * this returns NULL.
2973  */
2974 static char *get_trace_buf(void)
2975 {
2976         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
2977
2978         if (!buffer || buffer->nesting >= 4)
2979                 return NULL;
2980
2981         buffer->nesting++;
2982
2983         /* Interrupts must see nesting incremented before we use the buffer */
2984         barrier();
2985         return &buffer->buffer[buffer->nesting][0];
2986 }
2987
2988 static void put_trace_buf(void)
2989 {
2990         /* Don't let the decrement of nesting leak before this */
2991         barrier();
2992         this_cpu_dec(trace_percpu_buffer->nesting);
2993 }
2994
2995 static int alloc_percpu_trace_buffer(void)
2996 {
2997         struct trace_buffer_struct *buffers;
2998
2999         buffers = alloc_percpu(struct trace_buffer_struct);
3000         if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
3001                 return -ENOMEM;
3002
3003         trace_percpu_buffer = buffers;
3004         return 0;
3005 }
3006
3007 static int buffers_allocated;
3008
3009 void trace_printk_init_buffers(void)
3010 {
3011         if (buffers_allocated)
3012                 return;
3013
3014         if (alloc_percpu_trace_buffer())
3015                 return;
3016
3017         /* trace_printk() is for debug use only. Don't use it in production. */
3018
3019         pr_warn("\n");
3020         pr_warn("**********************************************************\n");
3021         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3022         pr_warn("**                                                      **\n");
3023         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3024         pr_warn("**                                                      **\n");
3025         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3026         pr_warn("** unsafe for production use.                           **\n");
3027         pr_warn("**                                                      **\n");
3028         pr_warn("** If you see this message and you are not debugging    **\n");
3029         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3030         pr_warn("**                                                      **\n");
3031         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3032         pr_warn("**********************************************************\n");
3033
3034         /* Expand the buffers to set size */
3035         tracing_update_buffers();
3036
3037         buffers_allocated = 1;
3038
3039         /*
3040          * trace_printk_init_buffers() can be called by modules.
3041          * If that happens, then we need to start cmdline recording
3042          * directly here. If the global_trace.buffer is already
3043          * allocated here, then this was called by module code.
3044          */
3045         if (global_trace.trace_buffer.buffer)
3046                 tracing_start_cmdline_record();
3047 }
3048
3049 void trace_printk_start_comm(void)
3050 {
3051         /* Start tracing comms if trace printk is set */
3052         if (!buffers_allocated)
3053                 return;
3054         tracing_start_cmdline_record();
3055 }
3056
3057 static void trace_printk_start_stop_comm(int enabled)
3058 {
3059         if (!buffers_allocated)
3060                 return;
3061
3062         if (enabled)
3063                 tracing_start_cmdline_record();
3064         else
3065                 tracing_stop_cmdline_record();
3066 }
3067
3068 /**
3069  * trace_vbprintk - write binary msg to tracing buffer
3070  *
3071  */
3072 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3073 {
3074         struct trace_event_call *call = &event_bprint;
3075         struct ring_buffer_event *event;
3076         struct ring_buffer *buffer;
3077         struct trace_array *tr = &global_trace;
3078         struct bprint_entry *entry;
3079         unsigned long flags;
3080         char *tbuffer;
3081         int len = 0, size, pc;
3082
3083         if (unlikely(tracing_selftest_running || tracing_disabled))
3084                 return 0;
3085
3086         /* Don't pollute graph traces with trace_vprintk internals */
3087         pause_graph_tracing();
3088
3089         pc = preempt_count();
3090         preempt_disable_notrace();
3091
3092         tbuffer = get_trace_buf();
3093         if (!tbuffer) {
3094                 len = 0;
3095                 goto out_nobuffer;
3096         }
3097
3098         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3099
3100         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3101                 goto out;
3102
3103         local_save_flags(flags);
3104         size = sizeof(*entry) + sizeof(u32) * len;
3105         buffer = tr->trace_buffer.buffer;
3106         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3107                                             flags, pc);
3108         if (!event)
3109                 goto out;
3110         entry = ring_buffer_event_data(event);
3111         entry->ip                       = ip;
3112         entry->fmt                      = fmt;
3113
3114         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3115         if (!call_filter_check_discard(call, entry, buffer, event)) {
3116                 __buffer_unlock_commit(buffer, event);
3117                 ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
3118         }
3119
3120 out:
3121         put_trace_buf();
3122
3123 out_nobuffer:
3124         preempt_enable_notrace();
3125         unpause_graph_tracing();
3126
3127         return len;
3128 }
3129 EXPORT_SYMBOL_GPL(trace_vbprintk);
3130
3131 __printf(3, 0)
3132 static int
3133 __trace_array_vprintk(struct ring_buffer *buffer,
3134                       unsigned long ip, const char *fmt, va_list args)
3135 {
3136         struct trace_event_call *call = &event_print;
3137         struct ring_buffer_event *event;
3138         int len = 0, size, pc;
3139         struct print_entry *entry;
3140         unsigned long flags;
3141         char *tbuffer;
3142
3143         if (tracing_disabled || tracing_selftest_running)
3144                 return 0;
3145
3146         /* Don't pollute graph traces with trace_vprintk internals */
3147         pause_graph_tracing();
3148
3149         pc = preempt_count();
3150         preempt_disable_notrace();
3151
3152
3153         tbuffer = get_trace_buf();
3154         if (!tbuffer) {
3155                 len = 0;
3156                 goto out_nobuffer;
3157         }
3158
3159         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3160
3161         local_save_flags(flags);
3162         size = sizeof(*entry) + len + 1;
3163         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3164                                             flags, pc);
3165         if (!event)
3166                 goto out;
3167         entry = ring_buffer_event_data(event);
3168         entry->ip = ip;
3169
3170         memcpy(&entry->buf, tbuffer, len + 1);
3171         if (!call_filter_check_discard(call, entry, buffer, event)) {
3172                 __buffer_unlock_commit(buffer, event);
3173                 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
3174         }
3175
3176 out:
3177         put_trace_buf();
3178
3179 out_nobuffer:
3180         preempt_enable_notrace();
3181         unpause_graph_tracing();
3182
3183         return len;
3184 }
3185
3186 __printf(3, 0)
3187 int trace_array_vprintk(struct trace_array *tr,
3188                         unsigned long ip, const char *fmt, va_list args)
3189 {
3190         return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
3191 }
3192
3193 __printf(3, 0)
3194 int trace_array_printk(struct trace_array *tr,
3195                        unsigned long ip, const char *fmt, ...)
3196 {
3197         int ret;
3198         va_list ap;
3199
3200         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3201                 return 0;
3202
3203         va_start(ap, fmt);
3204         ret = trace_array_vprintk(tr, ip, fmt, ap);
3205         va_end(ap);
3206         return ret;
3207 }
3208
3209 __printf(3, 4)
3210 int trace_array_printk_buf(struct ring_buffer *buffer,
3211                            unsigned long ip, const char *fmt, ...)
3212 {
3213         int ret;
3214         va_list ap;
3215
3216         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3217                 return 0;
3218
3219         va_start(ap, fmt);
3220         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3221         va_end(ap);
3222         return ret;
3223 }
3224
3225 __printf(2, 0)
3226 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3227 {
3228         return trace_array_vprintk(&global_trace, ip, fmt, args);
3229 }
3230 EXPORT_SYMBOL_GPL(trace_vprintk);
3231
3232 static void trace_iterator_increment(struct trace_iterator *iter)
3233 {
3234         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3235
3236         iter->idx++;
3237         if (buf_iter)
3238                 ring_buffer_read(buf_iter, NULL);
3239 }
3240
3241 static struct trace_entry *
3242 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3243                 unsigned long *lost_events)
3244 {
3245         struct ring_buffer_event *event;
3246         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3247
3248         if (buf_iter)
3249                 event = ring_buffer_iter_peek(buf_iter, ts);
3250         else
3251                 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
3252                                          lost_events);
3253
3254         if (event) {
3255                 iter->ent_size = ring_buffer_event_length(event);
3256                 return ring_buffer_event_data(event);
3257         }
3258         iter->ent_size = 0;
3259         return NULL;
3260 }
3261
3262 static struct trace_entry *
3263 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3264                   unsigned long *missing_events, u64 *ent_ts)
3265 {
3266         struct ring_buffer *buffer = iter->trace_buffer->buffer;
3267         struct trace_entry *ent, *next = NULL;
3268         unsigned long lost_events = 0, next_lost = 0;
3269         int cpu_file = iter->cpu_file;
3270         u64 next_ts = 0, ts;
3271         int next_cpu = -1;
3272         int next_size = 0;
3273         int cpu;
3274
3275         /*
3276          * If we are in a per_cpu trace file, don't bother by iterating over
3277          * all cpu and peek directly.
3278          */
3279         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3280                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3281                         return NULL;
3282                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3283                 if (ent_cpu)
3284                         *ent_cpu = cpu_file;
3285
3286                 return ent;
3287         }
3288
3289         for_each_tracing_cpu(cpu) {
3290
3291                 if (ring_buffer_empty_cpu(buffer, cpu))
3292                         continue;
3293
3294                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3295
3296                 /*
3297                  * Pick the entry with the smallest timestamp:
3298                  */
3299                 if (ent && (!next || ts < next_ts)) {
3300                         next = ent;
3301                         next_cpu = cpu;
3302                         next_ts = ts;
3303                         next_lost = lost_events;
3304                         next_size = iter->ent_size;
3305                 }
3306         }
3307
3308         iter->ent_size = next_size;
3309
3310         if (ent_cpu)
3311                 *ent_cpu = next_cpu;
3312
3313         if (ent_ts)
3314                 *ent_ts = next_ts;
3315
3316         if (missing_events)
3317                 *missing_events = next_lost;
3318
3319         return next;
3320 }
3321
3322 /* Find the next real entry, without updating the iterator itself */
3323 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3324                                           int *ent_cpu, u64 *ent_ts)
3325 {
3326         return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3327 }
3328
3329 /* Find the next real entry, and increment the iterator to the next entry */
3330 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3331 {
3332         iter->ent = __find_next_entry(iter, &iter->cpu,
3333                                       &iter->lost_events, &iter->ts);
3334
3335         if (iter->ent)
3336                 trace_iterator_increment(iter);
3337
3338         return iter->ent ? iter : NULL;
3339 }
3340
3341 static void trace_consume(struct trace_iterator *iter)
3342 {
3343         ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
3344                             &iter->lost_events);
3345 }
3346
3347 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3348 {
3349         struct trace_iterator *iter = m->private;
3350         int i = (int)*pos;
3351         void *ent;
3352
3353         WARN_ON_ONCE(iter->leftover);
3354
3355         (*pos)++;
3356
3357         /* can't go backwards */
3358         if (iter->idx > i)
3359                 return NULL;
3360
3361         if (iter->idx < 0)
3362                 ent = trace_find_next_entry_inc(iter);
3363         else
3364                 ent = iter;
3365
3366         while (ent && iter->idx < i)
3367                 ent = trace_find_next_entry_inc(iter);
3368
3369         iter->pos = *pos;
3370
3371         return ent;
3372 }
3373
3374 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3375 {
3376         struct ring_buffer_event *event;
3377         struct ring_buffer_iter *buf_iter;
3378         unsigned long entries = 0;
3379         u64 ts;
3380
3381         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
3382
3383         buf_iter = trace_buffer_iter(iter, cpu);
3384         if (!buf_iter)
3385                 return;
3386
3387         ring_buffer_iter_reset(buf_iter);
3388
3389         /*
3390          * We could have the case with the max latency tracers
3391          * that a reset never took place on a cpu. This is evident
3392          * by the timestamp being before the start of the buffer.
3393          */
3394         while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
3395                 if (ts >= iter->trace_buffer->time_start)
3396                         break;
3397                 entries++;
3398                 ring_buffer_read(buf_iter, NULL);
3399         }
3400
3401         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
3402 }
3403
3404 /*
3405  * The current tracer is copied to avoid a global locking
3406  * all around.
3407  */
3408 static void *s_start(struct seq_file *m, loff_t *pos)
3409 {
3410         struct trace_iterator *iter = m->private;
3411         struct trace_array *tr = iter->tr;
3412         int cpu_file = iter->cpu_file;
3413         void *p = NULL;
3414         loff_t l = 0;
3415         int cpu;
3416
3417         /*
3418          * copy the tracer to avoid using a global lock all around.
3419          * iter->trace is a copy of current_trace, the pointer to the
3420          * name may be used instead of a strcmp(), as iter->trace->name
3421          * will point to the same string as current_trace->name.
3422          */
3423         mutex_lock(&trace_types_lock);
3424         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3425                 *iter->trace = *tr->current_trace;
3426         mutex_unlock(&trace_types_lock);
3427
3428 #ifdef CONFIG_TRACER_MAX_TRACE
3429         if (iter->snapshot && iter->trace->use_max_tr)
3430                 return ERR_PTR(-EBUSY);
3431 #endif
3432
3433         if (!iter->snapshot)
3434                 atomic_inc(&trace_record_taskinfo_disabled);
3435
3436         if (*pos != iter->pos) {
3437                 iter->ent = NULL;
3438                 iter->cpu = 0;
3439                 iter->idx = -1;
3440
3441                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
3442                         for_each_tracing_cpu(cpu)
3443                                 tracing_iter_reset(iter, cpu);
3444                 } else
3445                         tracing_iter_reset(iter, cpu_file);
3446
3447                 iter->leftover = 0;
3448                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3449                         ;
3450
3451         } else {
3452                 /*
3453                  * If we overflowed the seq_file before, then we want
3454                  * to just reuse the trace_seq buffer again.
3455                  */
3456                 if (iter->leftover)
3457                         p = iter;
3458                 else {
3459                         l = *pos - 1;
3460                         p = s_next(m, p, &l);
3461                 }
3462         }
3463
3464         trace_event_read_lock();
3465         trace_access_lock(cpu_file);
3466         return p;
3467 }
3468
3469 static void s_stop(struct seq_file *m, void *p)
3470 {
3471         struct trace_iterator *iter = m->private;
3472
3473 #ifdef CONFIG_TRACER_MAX_TRACE
3474         if (iter->snapshot && iter->trace->use_max_tr)
3475                 return;
3476 #endif
3477
3478         if (!iter->snapshot)
3479                 atomic_dec(&trace_record_taskinfo_disabled);
3480
3481         trace_access_unlock(iter->cpu_file);
3482         trace_event_read_unlock();
3483 }
3484
3485 static void
3486 get_total_entries(struct trace_buffer *buf,
3487                   unsigned long *total, unsigned long *entries)
3488 {
3489         unsigned long count;
3490         int cpu;
3491
3492         *total = 0;
3493         *entries = 0;
3494
3495         for_each_tracing_cpu(cpu) {
3496                 count = ring_buffer_entries_cpu(buf->buffer, cpu);
3497                 /*
3498                  * If this buffer has skipped entries, then we hold all
3499                  * entries for the trace and we need to ignore the
3500                  * ones before the time stamp.
3501                  */
3502                 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3503                         count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3504                         /* total is the same as the entries */
3505                         *total += count;
3506                 } else
3507                         *total += count +
3508                                 ring_buffer_overrun_cpu(buf->buffer, cpu);
3509                 *entries += count;
3510         }
3511 }
3512
3513 static void print_lat_help_header(struct seq_file *m)
3514 {
3515         seq_puts(m, "#                  _------=> CPU#            \n"
3516                     "#                 / _-----=> irqs-off        \n"
3517                     "#                | / _----=> need-resched    \n"
3518                     "#                || / _---=> hardirq/softirq \n"
3519                     "#                ||| / _--=> preempt-depth   \n"
3520                     "#                |||| /     delay            \n"
3521                     "#  cmd     pid   ||||| time  |   caller      \n"
3522                     "#     \\   /      |||||  \\    |   /         \n");
3523 }
3524
3525 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
3526 {
3527         unsigned long total;
3528         unsigned long entries;
3529
3530         get_total_entries(buf, &total, &entries);
3531         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3532                    entries, total, num_online_cpus());
3533         seq_puts(m, "#\n");
3534 }
3535
3536 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m,
3537                                    unsigned int flags)
3538 {
3539         bool tgid = flags & TRACE_ITER_RECORD_TGID;
3540
3541         print_event_info(buf, m);
3542
3543         seq_printf(m, "#           TASK-PID   %s  CPU#   TIMESTAMP  FUNCTION\n", tgid ? "TGID     " : "");
3544         seq_printf(m, "#              | |     %s    |       |         |\n",      tgid ? "  |      " : "");
3545 }
3546
3547 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m,
3548                                        unsigned int flags)
3549 {
3550         bool tgid = flags & TRACE_ITER_RECORD_TGID;
3551         const char tgid_space[] = "          ";
3552         const char space[] = "  ";
3553
3554         seq_printf(m, "#                          %s  _-----=> irqs-off\n",
3555                    tgid ? tgid_space : space);
3556         seq_printf(m, "#                          %s / _----=> need-resched\n",
3557                    tgid ? tgid_space : space);
3558         seq_printf(m, "#                          %s| / _---=> hardirq/softirq\n",
3559                    tgid ? tgid_space : space);
3560         seq_printf(m, "#                          %s|| / _--=> preempt-depth\n",
3561                    tgid ? tgid_space : space);
3562         seq_printf(m, "#                          %s||| /     delay\n",
3563                    tgid ? tgid_space : space);
3564         seq_printf(m, "#           TASK-PID %sCPU#  ||||    TIMESTAMP  FUNCTION\n",
3565                    tgid ? "   TGID   " : space);
3566         seq_printf(m, "#              | |   %s  |   ||||       |         |\n",
3567                    tgid ? "     |    " : space);
3568 }
3569
3570 void
3571 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3572 {
3573         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3574         struct trace_buffer *buf = iter->trace_buffer;
3575         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3576         struct tracer *type = iter->trace;
3577         unsigned long entries;
3578         unsigned long total;
3579         const char *name = "preemption";
3580
3581         name = type->name;
3582
3583         get_total_entries(buf, &total, &entries);
3584
3585         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3586                    name, UTS_RELEASE);
3587         seq_puts(m, "# -----------------------------------"
3588                  "---------------------------------\n");
3589         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3590                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3591                    nsecs_to_usecs(data->saved_latency),
3592                    entries,
3593                    total,
3594                    buf->cpu,
3595 #if defined(CONFIG_PREEMPT_NONE)
3596                    "server",
3597 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3598                    "desktop",
3599 #elif defined(CONFIG_PREEMPT)
3600                    "preempt",
3601 #else
3602                    "unknown",
3603 #endif
3604                    /* These are reserved for later use */
3605                    0, 0, 0, 0);
3606 #ifdef CONFIG_SMP
3607         seq_printf(m, " #P:%d)\n", num_online_cpus());
3608 #else
3609         seq_puts(m, ")\n");
3610 #endif
3611         seq_puts(m, "#    -----------------\n");
3612         seq_printf(m, "#    | task: %.16s-%d "
3613                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3614                    data->comm, data->pid,
3615                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3616                    data->policy, data->rt_priority);
3617         seq_puts(m, "#    -----------------\n");
3618
3619         if (data->critical_start) {
3620                 seq_puts(m, "#  => started at: ");
3621                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3622                 trace_print_seq(m, &iter->seq);
3623                 seq_puts(m, "\n#  => ended at:   ");
3624                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3625                 trace_print_seq(m, &iter->seq);
3626                 seq_puts(m, "\n#\n");
3627         }
3628
3629         seq_puts(m, "#\n");
3630 }
3631
3632 static void test_cpu_buff_start(struct trace_iterator *iter)
3633 {
3634         struct trace_seq *s = &iter->seq;
3635         struct trace_array *tr = iter->tr;
3636
3637         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3638                 return;
3639
3640         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3641                 return;
3642
3643         if (cpumask_available(iter->started) &&
3644             cpumask_test_cpu(iter->cpu, iter->started))
3645                 return;
3646
3647         if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3648                 return;
3649
3650         if (cpumask_available(iter->started))
3651                 cpumask_set_cpu(iter->cpu, iter->started);
3652
3653         /* Don't print started cpu buffer for the first entry of the trace */
3654         if (iter->idx > 1)
3655                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3656                                 iter->cpu);
3657 }
3658
3659 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3660 {
3661         struct trace_array *tr = iter->tr;
3662         struct trace_seq *s = &iter->seq;
3663         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3664         struct trace_entry *entry;
3665         struct trace_event *event;
3666
3667         entry = iter->ent;
3668
3669         test_cpu_buff_start(iter);
3670
3671         event = ftrace_find_event(entry->type);
3672
3673         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3674                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3675                         trace_print_lat_context(iter);
3676                 else
3677                         trace_print_context(iter);
3678         }
3679
3680         if (trace_seq_has_overflowed(s))
3681                 return TRACE_TYPE_PARTIAL_LINE;
3682
3683         if (event)
3684                 return event->funcs->trace(iter, sym_flags, event);
3685
3686         trace_seq_printf(s, "Unknown type %d\n", entry->type);
3687
3688         return trace_handle_return(s);
3689 }
3690
3691 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3692 {
3693         struct trace_array *tr = iter->tr;
3694         struct trace_seq *s = &iter->seq;
3695         struct trace_entry *entry;
3696         struct trace_event *event;
3697
3698         entry = iter->ent;
3699
3700         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3701                 trace_seq_printf(s, "%d %d %llu ",
3702                                  entry->pid, iter->cpu, iter->ts);
3703
3704         if (trace_seq_has_overflowed(s))
3705                 return TRACE_TYPE_PARTIAL_LINE;
3706
3707         event = ftrace_find_event(entry->type);
3708         if (event)
3709                 return event->funcs->raw(iter, 0, event);
3710
3711         trace_seq_printf(s, "%d ?\n", entry->type);
3712
3713         return trace_handle_return(s);
3714 }
3715
3716 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3717 {
3718         struct trace_array *tr = iter->tr;
3719         struct trace_seq *s = &iter->seq;
3720         unsigned char newline = '\n';
3721         struct trace_entry *entry;
3722         struct trace_event *event;
3723
3724         entry = iter->ent;
3725
3726         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3727                 SEQ_PUT_HEX_FIELD(s, entry->pid);
3728                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
3729                 SEQ_PUT_HEX_FIELD(s, iter->ts);
3730                 if (trace_seq_has_overflowed(s))
3731                         return TRACE_TYPE_PARTIAL_LINE;
3732         }
3733
3734         event = ftrace_find_event(entry->type);
3735         if (event) {
3736                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
3737                 if (ret != TRACE_TYPE_HANDLED)
3738                         return ret;
3739         }
3740
3741         SEQ_PUT_FIELD(s, newline);
3742
3743         return trace_handle_return(s);
3744 }
3745
3746 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3747 {
3748         struct trace_array *tr = iter->tr;
3749         struct trace_seq *s = &iter->seq;
3750         struct trace_entry *entry;
3751         struct trace_event *event;
3752
3753         entry = iter->ent;
3754
3755         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3756                 SEQ_PUT_FIELD(s, entry->pid);
3757                 SEQ_PUT_FIELD(s, iter->cpu);
3758                 SEQ_PUT_FIELD(s, iter->ts);
3759                 if (trace_seq_has_overflowed(s))
3760                         return TRACE_TYPE_PARTIAL_LINE;
3761         }
3762
3763         event = ftrace_find_event(entry->type);
3764         return event ? event->funcs->binary(iter, 0, event) :
3765                 TRACE_TYPE_HANDLED;
3766 }
3767
3768 int trace_empty(struct trace_iterator *iter)
3769 {
3770         struct ring_buffer_iter *buf_iter;
3771         int cpu;
3772
3773         /* If we are looking at one CPU buffer, only check that one */
3774         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3775                 cpu = iter->cpu_file;
3776                 buf_iter = trace_buffer_iter(iter, cpu);
3777                 if (buf_iter) {
3778                         if (!ring_buffer_iter_empty(buf_iter))
3779                                 return 0;
3780                 } else {
3781                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3782                                 return 0;
3783                 }
3784                 return 1;
3785         }
3786
3787         for_each_tracing_cpu(cpu) {
3788                 buf_iter = trace_buffer_iter(iter, cpu);
3789                 if (buf_iter) {
3790                         if (!ring_buffer_iter_empty(buf_iter))
3791                                 return 0;
3792                 } else {
3793                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3794                                 return 0;
3795                 }
3796         }
3797
3798         return 1;
3799 }
3800
3801 /*  Called with trace_event_read_lock() held. */
3802 enum print_line_t print_trace_line(struct trace_iterator *iter)
3803 {
3804         struct trace_array *tr = iter->tr;
3805         unsigned long trace_flags = tr->trace_flags;
3806         enum print_line_t ret;
3807
3808         if (iter->lost_events) {
3809                 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3810                                  iter->cpu, iter->lost_events);
3811                 if (trace_seq_has_overflowed(&iter->seq))
3812                         return TRACE_TYPE_PARTIAL_LINE;
3813         }
3814
3815         if (iter->trace && iter->trace->print_line) {
3816                 ret = iter->trace->print_line(iter);
3817                 if (ret != TRACE_TYPE_UNHANDLED)
3818                         return ret;
3819         }
3820
3821         if (iter->ent->type == TRACE_BPUTS &&
3822                         trace_flags & TRACE_ITER_PRINTK &&
3823                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3824                 return trace_print_bputs_msg_only(iter);
3825
3826         if (iter->ent->type == TRACE_BPRINT &&
3827                         trace_flags & TRACE_ITER_PRINTK &&
3828                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3829                 return trace_print_bprintk_msg_only(iter);
3830
3831         if (iter->ent->type == TRACE_PRINT &&
3832                         trace_flags & TRACE_ITER_PRINTK &&
3833                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3834                 return trace_print_printk_msg_only(iter);
3835
3836         if (trace_flags & TRACE_ITER_BIN)
3837                 return print_bin_fmt(iter);
3838
3839         if (trace_flags & TRACE_ITER_HEX)
3840                 return print_hex_fmt(iter);
3841
3842         if (trace_flags & TRACE_ITER_RAW)
3843                 return print_raw_fmt(iter);
3844
3845         return print_trace_fmt(iter);
3846 }
3847
3848 void trace_latency_header(struct seq_file *m)
3849 {
3850         struct trace_iterator *iter = m->private;
3851         struct trace_array *tr = iter->tr;
3852
3853         /* print nothing if the buffers are empty */
3854         if (trace_empty(iter))
3855                 return;
3856
3857         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3858                 print_trace_header(m, iter);
3859
3860         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
3861                 print_lat_help_header(m);
3862 }
3863
3864 void trace_default_header(struct seq_file *m)
3865 {
3866         struct trace_iterator *iter = m->private;
3867         struct trace_array *tr = iter->tr;
3868         unsigned long trace_flags = tr->trace_flags;
3869
3870         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3871                 return;
3872
3873         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3874                 /* print nothing if the buffers are empty */
3875                 if (trace_empty(iter))
3876                         return;
3877                 print_trace_header(m, iter);
3878                 if (!(trace_flags & TRACE_ITER_VERBOSE))
3879                         print_lat_help_header(m);
3880         } else {
3881                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
3882                         if (trace_flags & TRACE_ITER_IRQ_INFO)
3883                                 print_func_help_header_irq(iter->trace_buffer,
3884                                                            m, trace_flags);
3885                         else
3886                                 print_func_help_header(iter->trace_buffer, m,
3887                                                        trace_flags);
3888                 }
3889         }
3890 }
3891
3892 static void test_ftrace_alive(struct seq_file *m)
3893 {
3894         if (!ftrace_is_dead())
3895                 return;
3896         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3897                     "#          MAY BE MISSING FUNCTION EVENTS\n");
3898 }
3899
3900 #ifdef CONFIG_TRACER_MAX_TRACE
3901 static void show_snapshot_main_help(struct seq_file *m)
3902 {
3903         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3904                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3905                     "#                      Takes a snapshot of the main buffer.\n"
3906                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3907                     "#                      (Doesn't have to be '2' works with any number that\n"
3908                     "#                       is not a '0' or '1')\n");
3909 }
3910
3911 static void show_snapshot_percpu_help(struct seq_file *m)
3912 {
3913         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3914 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3915         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3916                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
3917 #else
3918         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3919                     "#                     Must use main snapshot file to allocate.\n");
3920 #endif
3921         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3922                     "#                      (Doesn't have to be '2' works with any number that\n"
3923                     "#                       is not a '0' or '1')\n");
3924 }
3925
3926 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3927 {
3928         if (iter->tr->allocated_snapshot)
3929                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3930         else
3931                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3932
3933         seq_puts(m, "# Snapshot commands:\n");
3934         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3935                 show_snapshot_main_help(m);
3936         else
3937                 show_snapshot_percpu_help(m);
3938 }
3939 #else
3940 /* Should never be called */
3941 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3942 #endif
3943
3944 static int s_show(struct seq_file *m, void *v)
3945 {
3946         struct trace_iterator *iter = v;
3947         int ret;
3948
3949         if (iter->ent == NULL) {
3950                 if (iter->tr) {
3951                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
3952                         seq_puts(m, "#\n");
3953                         test_ftrace_alive(m);
3954                 }
3955                 if (iter->snapshot && trace_empty(iter))
3956                         print_snapshot_help(m, iter);
3957                 else if (iter->trace && iter->trace->print_header)
3958                         iter->trace->print_header(m);
3959                 else
3960                         trace_default_header(m);
3961
3962         } else if (iter->leftover) {
3963                 /*
3964                  * If we filled the seq_file buffer earlier, we
3965                  * want to just show it now.
3966                  */
3967                 ret = trace_print_seq(m, &iter->seq);
3968
3969                 /* ret should this time be zero, but you never know */
3970                 iter->leftover = ret;
3971
3972         } else {
3973                 print_trace_line(iter);
3974                 ret = trace_print_seq(m, &iter->seq);
3975                 /*
3976                  * If we overflow the seq_file buffer, then it will
3977                  * ask us for this data again at start up.
3978                  * Use that instead.
3979                  *  ret is 0 if seq_file write succeeded.
3980                  *        -1 otherwise.
3981                  */
3982                 iter->leftover = ret;
3983         }
3984
3985         return 0;
3986 }
3987
3988 /*
3989  * Should be used after trace_array_get(), trace_types_lock
3990  * ensures that i_cdev was already initialized.
3991  */
3992 static inline int tracing_get_cpu(struct inode *inode)
3993 {
3994         if (inode->i_cdev) /* See trace_create_cpu_file() */
3995                 return (long)inode->i_cdev - 1;
3996         return RING_BUFFER_ALL_CPUS;
3997 }
3998
3999 static const struct seq_operations tracer_seq_ops = {
4000         .start          = s_start,
4001         .next           = s_next,
4002         .stop           = s_stop,
4003         .show           = s_show,
4004 };
4005
4006 static struct trace_iterator *
4007 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4008 {
4009         struct trace_array *tr = inode->i_private;
4010         struct trace_iterator *iter;
4011         int cpu;
4012
4013         if (tracing_disabled)
4014                 return ERR_PTR(-ENODEV);
4015
4016         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4017         if (!iter)
4018                 return ERR_PTR(-ENOMEM);
4019
4020         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4021                                     GFP_KERNEL);
4022         if (!iter->buffer_iter)
4023                 goto release;
4024
4025         /*
4026          * We make a copy of the current tracer to avoid concurrent
4027          * changes on it while we are reading.
4028          */
4029         mutex_lock(&trace_types_lock);
4030         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4031         if (!iter->trace)
4032                 goto fail;
4033
4034         *iter->trace = *tr->current_trace;
4035
4036         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4037                 goto fail;
4038
4039         iter->tr = tr;
4040
4041 #ifdef CONFIG_TRACER_MAX_TRACE
4042         /* Currently only the top directory has a snapshot */
4043         if (tr->current_trace->print_max || snapshot)
4044                 iter->trace_buffer = &tr->max_buffer;
4045         else
4046 #endif
4047                 iter->trace_buffer = &tr->trace_buffer;
4048         iter->snapshot = snapshot;
4049         iter->pos = -1;
4050         iter->cpu_file = tracing_get_cpu(inode);
4051         mutex_init(&iter->mutex);
4052
4053         /* Notify the tracer early; before we stop tracing. */
4054         if (iter->trace && iter->trace->open)
4055                 iter->trace->open(iter);
4056
4057         /* Annotate start of buffers if we had overruns */
4058         if (ring_buffer_overruns(iter->trace_buffer->buffer))
4059                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4060
4061         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4062         if (trace_clocks[tr->clock_id].in_ns)
4063                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4064
4065         /* stop the trace while dumping if we are not opening "snapshot" */
4066         if (!iter->snapshot)
4067                 tracing_stop_tr(tr);
4068
4069         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4070                 for_each_tracing_cpu(cpu) {
4071                         iter->buffer_iter[cpu] =
4072                                 ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
4073                 }
4074                 ring_buffer_read_prepare_sync();
4075                 for_each_tracing_cpu(cpu) {
4076                         ring_buffer_read_start(iter->buffer_iter[cpu]);
4077                         tracing_iter_reset(iter, cpu);
4078                 }
4079         } else {
4080                 cpu = iter->cpu_file;
4081                 iter->buffer_iter[cpu] =
4082                         ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
4083                 ring_buffer_read_prepare_sync();
4084                 ring_buffer_read_start(iter->buffer_iter[cpu]);
4085                 tracing_iter_reset(iter, cpu);
4086         }
4087
4088         mutex_unlock(&trace_types_lock);
4089
4090         return iter;
4091
4092  fail:
4093         mutex_unlock(&trace_types_lock);
4094         kfree(iter->trace);
4095         kfree(iter->buffer_iter);
4096 release:
4097         seq_release_private(inode, file);
4098         return ERR_PTR(-ENOMEM);
4099 }
4100
4101 int tracing_open_generic(struct inode *inode, struct file *filp)
4102 {
4103         if (tracing_disabled)
4104                 return -ENODEV;
4105
4106         filp->private_data = inode->i_private;
4107         return 0;
4108 }
4109
4110 bool tracing_is_disabled(void)
4111 {
4112         return (tracing_disabled) ? true: false;
4113 }
4114
4115 /*
4116  * Open and update trace_array ref count.
4117  * Must have the current trace_array passed to it.
4118  */
4119 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4120 {
4121         struct trace_array *tr = inode->i_private;
4122
4123         if (tracing_disabled)
4124                 return -ENODEV;
4125
4126         if (trace_array_get(tr) < 0)
4127                 return -ENODEV;
4128
4129         filp->private_data = inode->i_private;
4130
4131         return 0;
4132 }
4133
4134 static int tracing_release(struct inode *inode, struct file *file)
4135 {
4136         struct trace_array *tr = inode->i_private;
4137         struct seq_file *m = file->private_data;
4138         struct trace_iterator *iter;
4139         int cpu;
4140
4141         if (!(file->f_mode & FMODE_READ)) {
4142                 trace_array_put(tr);
4143                 return 0;
4144         }
4145
4146         /* Writes do not use seq_file */
4147         iter = m->private;
4148         mutex_lock(&trace_types_lock);
4149
4150         for_each_tracing_cpu(cpu) {
4151                 if (iter->buffer_iter[cpu])
4152                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
4153         }
4154
4155         if (iter->trace && iter->trace->close)
4156                 iter->trace->close(iter);
4157
4158         if (!iter->snapshot)
4159                 /* reenable tracing if it was previously enabled */
4160                 tracing_start_tr(tr);
4161
4162         __trace_array_put(tr);
4163
4164         mutex_unlock(&trace_types_lock);
4165
4166         mutex_destroy(&iter->mutex);
4167         free_cpumask_var(iter->started);
4168         kfree(iter->trace);
4169         kfree(iter->buffer_iter);
4170         seq_release_private(inode, file);
4171
4172         return 0;
4173 }
4174
4175 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4176 {
4177         struct trace_array *tr = inode->i_private;
4178
4179         trace_array_put(tr);
4180         return 0;
4181 }
4182
4183 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4184 {
4185         struct trace_array *tr = inode->i_private;
4186
4187         trace_array_put(tr);
4188
4189         return single_release(inode, file);
4190 }
4191
4192 static int tracing_open(struct inode *inode, struct file *file)
4193 {
4194         struct trace_array *tr = inode->i_private;
4195         struct trace_iterator *iter;
4196         int ret = 0;
4197
4198         if (trace_array_get(tr) < 0)
4199                 return -ENODEV;
4200
4201         /* If this file was open for write, then erase contents */
4202         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4203                 int cpu = tracing_get_cpu(inode);
4204                 struct trace_buffer *trace_buf = &tr->trace_buffer;
4205
4206 #ifdef CONFIG_TRACER_MAX_TRACE
4207                 if (tr->current_trace->print_max)
4208                         trace_buf = &tr->max_buffer;
4209 #endif
4210
4211                 if (cpu == RING_BUFFER_ALL_CPUS)
4212                         tracing_reset_online_cpus(trace_buf);
4213                 else
4214                         tracing_reset(trace_buf, cpu);
4215         }
4216
4217         if (file->f_mode & FMODE_READ) {
4218                 iter = __tracing_open(inode, file, false);
4219                 if (IS_ERR(iter))
4220                         ret = PTR_ERR(iter);
4221                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4222                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
4223         }
4224
4225         if (ret < 0)
4226                 trace_array_put(tr);
4227
4228         return ret;
4229 }
4230
4231 /*
4232  * Some tracers are not suitable for instance buffers.
4233  * A tracer is always available for the global array (toplevel)
4234  * or if it explicitly states that it is.
4235  */
4236 static bool
4237 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4238 {
4239         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4240 }
4241
4242 /* Find the next tracer that this trace array may use */
4243 static struct tracer *
4244 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4245 {
4246         while (t && !trace_ok_for_array(t, tr))
4247                 t = t->next;
4248
4249         return t;
4250 }
4251
4252 static void *
4253 t_next(struct seq_file *m, void *v, loff_t *pos)
4254 {
4255         struct trace_array *tr = m->private;
4256         struct tracer *t = v;
4257
4258         (*pos)++;
4259
4260         if (t)
4261                 t = get_tracer_for_array(tr, t->next);
4262
4263         return t;
4264 }
4265
4266 static void *t_start(struct seq_file *m, loff_t *pos)
4267 {
4268         struct trace_array *tr = m->private;
4269         struct tracer *t;
4270         loff_t l = 0;
4271
4272         mutex_lock(&trace_types_lock);
4273
4274         t = get_tracer_for_array(tr, trace_types);
4275         for (; t && l < *pos; t = t_next(m, t, &l))
4276                         ;
4277
4278         return t;
4279 }
4280
4281 static void t_stop(struct seq_file *m, void *p)
4282 {
4283         mutex_unlock(&trace_types_lock);
4284 }
4285
4286 static int t_show(struct seq_file *m, void *v)
4287 {
4288         struct tracer *t = v;
4289
4290         if (!t)
4291                 return 0;
4292
4293         seq_puts(m, t->name);
4294         if (t->next)
4295                 seq_putc(m, ' ');
4296         else
4297                 seq_putc(m, '\n');
4298
4299         return 0;
4300 }
4301
4302 static const struct seq_operations show_traces_seq_ops = {
4303         .start          = t_start,
4304         .next           = t_next,
4305         .stop           = t_stop,
4306         .show           = t_show,
4307 };
4308
4309 static int show_traces_open(struct inode *inode, struct file *file)
4310 {
4311         struct trace_array *tr = inode->i_private;
4312         struct seq_file *m;
4313         int ret;
4314
4315         if (tracing_disabled)
4316                 return -ENODEV;
4317
4318         ret = seq_open(file, &show_traces_seq_ops);
4319         if (ret)
4320                 return ret;
4321
4322         m = file->private_data;
4323         m->private = tr;
4324
4325         return 0;
4326 }
4327
4328 static ssize_t
4329 tracing_write_stub(struct file *filp, const char __user *ubuf,
4330                    size_t count, loff_t *ppos)
4331 {
4332         return count;
4333 }
4334
4335 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4336 {
4337         int ret;
4338
4339         if (file->f_mode & FMODE_READ)
4340                 ret = seq_lseek(file, offset, whence);
4341         else
4342                 file->f_pos = ret = 0;
4343
4344         return ret;
4345 }
4346
4347 static const struct file_operations tracing_fops = {
4348         .open           = tracing_open,
4349         .read           = seq_read,
4350         .write          = tracing_write_stub,
4351         .llseek         = tracing_lseek,
4352         .release        = tracing_release,
4353 };
4354
4355 static const struct file_operations show_traces_fops = {
4356         .open           = show_traces_open,
4357         .read           = seq_read,
4358         .release        = seq_release,
4359         .llseek         = seq_lseek,
4360 };
4361
4362 static ssize_t
4363 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4364                      size_t count, loff_t *ppos)
4365 {
4366         struct trace_array *tr = file_inode(filp)->i_private;
4367         char *mask_str;
4368         int len;
4369
4370         len = snprintf(NULL, 0, "%*pb\n",
4371                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
4372         mask_str = kmalloc(len, GFP_KERNEL);
4373         if (!mask_str)
4374                 return -ENOMEM;
4375
4376         len = snprintf(mask_str, len, "%*pb\n",
4377                        cpumask_pr_args(tr->tracing_cpumask));
4378         if (len >= count) {
4379                 count = -EINVAL;
4380                 goto out_err;
4381         }
4382         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4383
4384 out_err:
4385         kfree(mask_str);
4386
4387         return count;
4388 }
4389
4390 static ssize_t
4391 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4392                       size_t count, loff_t *ppos)
4393 {
4394         struct trace_array *tr = file_inode(filp)->i_private;
4395         cpumask_var_t tracing_cpumask_new;
4396         int err, cpu;
4397
4398         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4399                 return -ENOMEM;
4400
4401         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4402         if (err)
4403                 goto err_unlock;
4404
4405         local_irq_disable();
4406         arch_spin_lock(&tr->max_lock);
4407         for_each_tracing_cpu(cpu) {
4408                 /*
4409                  * Increase/decrease the disabled counter if we are
4410                  * about to flip a bit in the cpumask:
4411                  */
4412                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4413                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4414                         atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4415                         ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
4416                 }
4417                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4418                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4419                         atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4420                         ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
4421                 }
4422         }
4423         arch_spin_unlock(&tr->max_lock);
4424         local_irq_enable();
4425
4426         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4427         free_cpumask_var(tracing_cpumask_new);
4428
4429         return count;
4430
4431 err_unlock:
4432         free_cpumask_var(tracing_cpumask_new);
4433
4434         return err;
4435 }
4436
4437 static const struct file_operations tracing_cpumask_fops = {
4438         .open           = tracing_open_generic_tr,
4439         .read           = tracing_cpumask_read,
4440         .write          = tracing_cpumask_write,
4441         .release        = tracing_release_generic_tr,
4442         .llseek         = generic_file_llseek,
4443 };
4444
4445 static int tracing_trace_options_show(struct seq_file *m, void *v)
4446 {
4447         struct tracer_opt *trace_opts;
4448         struct trace_array *tr = m->private;
4449         u32 tracer_flags;
4450         int i;
4451
4452         mutex_lock(&trace_types_lock);
4453         tracer_flags = tr->current_trace->flags->val;
4454         trace_opts = tr->current_trace->flags->opts;
4455
4456         for (i = 0; trace_options[i]; i++) {
4457                 if (tr->trace_flags & (1 << i))
4458                         seq_printf(m, "%s\n", trace_options[i]);
4459                 else
4460                         seq_printf(m, "no%s\n", trace_options[i]);
4461         }
4462
4463         for (i = 0; trace_opts[i].name; i++) {
4464                 if (tracer_flags & trace_opts[i].bit)
4465                         seq_printf(m, "%s\n", trace_opts[i].name);
4466                 else
4467                         seq_printf(m, "no%s\n", trace_opts[i].name);
4468         }
4469         mutex_unlock(&trace_types_lock);
4470
4471         return 0;
4472 }
4473
4474 static int __set_tracer_option(struct trace_array *tr,
4475                                struct tracer_flags *tracer_flags,
4476                                struct tracer_opt *opts, int neg)
4477 {
4478         struct tracer *trace = tracer_flags->trace;
4479         int ret;
4480
4481         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4482         if (ret)
4483                 return ret;
4484
4485         if (neg)
4486                 tracer_flags->val &= ~opts->bit;
4487         else
4488                 tracer_flags->val |= opts->bit;
4489         return 0;
4490 }
4491
4492 /* Try to assign a tracer specific option */
4493 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4494 {
4495         struct tracer *trace = tr->current_trace;
4496         struct tracer_flags *tracer_flags = trace->flags;
4497         struct tracer_opt *opts = NULL;
4498         int i;
4499
4500         for (i = 0; tracer_flags->opts[i].name; i++) {
4501                 opts = &tracer_flags->opts[i];
4502
4503                 if (strcmp(cmp, opts->name) == 0)
4504                         return __set_tracer_option(tr, trace->flags, opts, neg);
4505         }
4506
4507         return -EINVAL;
4508 }
4509
4510 /* Some tracers require overwrite to stay enabled */
4511 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4512 {
4513         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4514                 return -1;
4515
4516         return 0;
4517 }
4518
4519 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4520 {
4521         /* do nothing if flag is already set */
4522         if (!!(tr->trace_flags & mask) == !!enabled)
4523                 return 0;
4524
4525         /* Give the tracer a chance to approve the change */
4526         if (tr->current_trace->flag_changed)
4527                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4528                         return -EINVAL;
4529
4530         if (enabled)
4531                 tr->trace_flags |= mask;
4532         else
4533                 tr->trace_flags &= ~mask;
4534
4535         if (mask == TRACE_ITER_RECORD_CMD)
4536                 trace_event_enable_cmd_record(enabled);
4537
4538         if (mask == TRACE_ITER_RECORD_TGID) {
4539                 if (!tgid_map)
4540                         tgid_map = kcalloc(PID_MAX_DEFAULT + 1,
4541                                            sizeof(*tgid_map),
4542                                            GFP_KERNEL);
4543                 if (!tgid_map) {
4544                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4545                         return -ENOMEM;
4546                 }
4547
4548                 trace_event_enable_tgid_record(enabled);
4549         }
4550
4551         if (mask == TRACE_ITER_EVENT_FORK)
4552                 trace_event_follow_fork(tr, enabled);
4553
4554         if (mask == TRACE_ITER_FUNC_FORK)
4555                 ftrace_pid_follow_fork(tr, enabled);
4556
4557         if (mask == TRACE_ITER_OVERWRITE) {
4558                 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
4559 #ifdef CONFIG_TRACER_MAX_TRACE
4560                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4561 #endif
4562         }
4563
4564         if (mask == TRACE_ITER_PRINTK) {
4565                 trace_printk_start_stop_comm(enabled);
4566                 trace_printk_control(enabled);
4567         }
4568
4569         return 0;
4570 }
4571
4572 static int trace_set_options(struct trace_array *tr, char *option)
4573 {
4574         char *cmp;
4575         int neg = 0;
4576         int ret;
4577         size_t orig_len = strlen(option);
4578         int len;
4579
4580         cmp = strstrip(option);
4581
4582         len = str_has_prefix(cmp, "no");
4583         if (len)
4584                 neg = 1;
4585
4586         cmp += len;
4587
4588         mutex_lock(&trace_types_lock);
4589
4590         ret = match_string(trace_options, -1, cmp);
4591         /* If no option could be set, test the specific tracer options */
4592         if (ret < 0)
4593                 ret = set_tracer_option(tr, cmp, neg);
4594         else
4595                 ret = set_tracer_flag(tr, 1 << ret, !neg);
4596
4597         mutex_unlock(&trace_types_lock);
4598
4599         /*
4600          * If the first trailing whitespace is replaced with '\0' by strstrip,
4601          * turn it back into a space.
4602          */
4603         if (orig_len > strlen(option))
4604                 option[strlen(option)] = ' ';
4605
4606         return ret;
4607 }
4608
4609 static void __init apply_trace_boot_options(void)
4610 {
4611         char *buf = trace_boot_options_buf;
4612         char *option;
4613
4614         while (true) {
4615                 option = strsep(&buf, ",");
4616
4617                 if (!option)
4618                         break;
4619
4620                 if (*option)
4621                         trace_set_options(&global_trace, option);
4622
4623                 /* Put back the comma to allow this to be called again */
4624                 if (buf)
4625                         *(buf - 1) = ',';
4626         }
4627 }
4628
4629 static ssize_t
4630 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4631                         size_t cnt, loff_t *ppos)
4632 {
4633         struct seq_file *m = filp->private_data;
4634         struct trace_array *tr = m->private;
4635         char buf[64];
4636         int ret;
4637
4638         if (cnt >= sizeof(buf))
4639                 return -EINVAL;
4640
4641         if (copy_from_user(buf, ubuf, cnt))
4642                 return -EFAULT;
4643
4644         buf[cnt] = 0;
4645
4646         ret = trace_set_options(tr, buf);
4647         if (ret < 0)
4648                 return ret;
4649
4650         *ppos += cnt;
4651
4652         return cnt;
4653 }
4654
4655 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4656 {
4657         struct trace_array *tr = inode->i_private;
4658         int ret;
4659
4660         if (tracing_disabled)
4661                 return -ENODEV;
4662
4663         if (trace_array_get(tr) < 0)
4664                 return -ENODEV;
4665
4666         ret = single_open(file, tracing_trace_options_show, inode->i_private);
4667         if (ret < 0)
4668                 trace_array_put(tr);
4669
4670         return ret;
4671 }
4672
4673 static const struct file_operations tracing_iter_fops = {
4674         .open           = tracing_trace_options_open,
4675         .read           = seq_read,
4676         .llseek         = seq_lseek,
4677         .release        = tracing_single_release_tr,
4678         .write          = tracing_trace_options_write,
4679 };
4680
4681 static const char readme_msg[] =
4682         "tracing mini-HOWTO:\n\n"
4683         "# echo 0 > tracing_on : quick way to disable tracing\n"
4684         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4685         " Important files:\n"
4686         "  trace\t\t\t- The static contents of the buffer\n"
4687         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
4688         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4689         "  current_tracer\t- function and latency tracers\n"
4690         "  available_tracers\t- list of configured tracers for current_tracer\n"
4691         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4692         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4693         "  trace_clock\t\t-change the clock used to order events\n"
4694         "       local:   Per cpu clock but may not be synced across CPUs\n"
4695         "      global:   Synced across CPUs but slows tracing down.\n"
4696         "     counter:   Not a clock, but just an increment\n"
4697         "      uptime:   Jiffy counter from time of boot\n"
4698         "        perf:   Same clock that perf events use\n"
4699 #ifdef CONFIG_X86_64
4700         "     x86-tsc:   TSC cycle counter\n"
4701 #endif
4702         "\n  timestamp_mode\t-view the mode used to timestamp events\n"
4703         "       delta:   Delta difference against a buffer-wide timestamp\n"
4704         "    absolute:   Absolute (standalone) timestamp\n"
4705         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4706         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4707         "  tracing_cpumask\t- Limit which CPUs to trace\n"
4708         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4709         "\t\t\t  Remove sub-buffer with rmdir\n"
4710         "  trace_options\t\t- Set format or modify how tracing happens\n"
4711         "\t\t\t  Disable an option by adding a suffix 'no' to the\n"
4712         "\t\t\t  option name\n"
4713         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4714 #ifdef CONFIG_DYNAMIC_FTRACE
4715         "\n  available_filter_functions - list of functions that can be filtered on\n"
4716         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
4717         "\t\t\t  functions\n"
4718         "\t     accepts: func_full_name or glob-matching-pattern\n"
4719         "\t     modules: Can select a group via module\n"
4720         "\t      Format: :mod:<module-name>\n"
4721         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
4722         "\t    triggers: a command to perform when function is hit\n"
4723         "\t      Format: <function>:<trigger>[:count]\n"
4724         "\t     trigger: traceon, traceoff\n"
4725         "\t\t      enable_event:<system>:<event>\n"
4726         "\t\t      disable_event:<system>:<event>\n"
4727 #ifdef CONFIG_STACKTRACE
4728         "\t\t      stacktrace\n"
4729 #endif
4730 #ifdef CONFIG_TRACER_SNAPSHOT
4731         "\t\t      snapshot\n"
4732 #endif
4733         "\t\t      dump\n"
4734         "\t\t      cpudump\n"
4735         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
4736         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
4737         "\t     The first one will disable tracing every time do_fault is hit\n"
4738         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
4739         "\t       The first time do trap is hit and it disables tracing, the\n"
4740         "\t       counter will decrement to 2. If tracing is already disabled,\n"
4741         "\t       the counter will not decrement. It only decrements when the\n"
4742         "\t       trigger did work\n"
4743         "\t     To remove trigger without count:\n"
4744         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
4745         "\t     To remove trigger with a count:\n"
4746         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4747         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
4748         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4749         "\t    modules: Can select a group via module command :mod:\n"
4750         "\t    Does not accept triggers\n"
4751 #endif /* CONFIG_DYNAMIC_FTRACE */
4752 #ifdef CONFIG_FUNCTION_TRACER
4753         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4754         "\t\t    (function)\n"
4755 #endif
4756 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4757         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4758         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4759         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4760 #endif
4761 #ifdef CONFIG_TRACER_SNAPSHOT
4762         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
4763         "\t\t\t  snapshot buffer. Read the contents for more\n"
4764         "\t\t\t  information\n"
4765 #endif
4766 #ifdef CONFIG_STACK_TRACER
4767         "  stack_trace\t\t- Shows the max stack trace when active\n"
4768         "  stack_max_size\t- Shows current max stack size that was traced\n"
4769         "\t\t\t  Write into this file to reset the max size (trigger a\n"
4770         "\t\t\t  new trace)\n"
4771 #ifdef CONFIG_DYNAMIC_FTRACE
4772         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4773         "\t\t\t  traces\n"
4774 #endif
4775 #endif /* CONFIG_STACK_TRACER */
4776 #ifdef CONFIG_DYNAMIC_EVENTS
4777         "  dynamic_events\t\t- Add/remove/show the generic dynamic events\n"
4778         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4779 #endif
4780 #ifdef CONFIG_KPROBE_EVENTS
4781         "  kprobe_events\t\t- Add/remove/show the kernel dynamic events\n"
4782         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4783 #endif
4784 #ifdef CONFIG_UPROBE_EVENTS
4785         "  uprobe_events\t\t- Add/remove/show the userspace dynamic events\n"
4786         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4787 #endif
4788 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
4789         "\t  accepts: event-definitions (one definition per line)\n"
4790         "\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
4791         "\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
4792 #ifdef CONFIG_HIST_TRIGGERS
4793         "\t           s:[synthetic/]<event> <field> [<field>]\n"
4794 #endif
4795         "\t           -:[<group>/]<event>\n"
4796 #ifdef CONFIG_KPROBE_EVENTS
4797         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4798   "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4799 #endif
4800 #ifdef CONFIG_UPROBE_EVENTS
4801   "   place (uprobe): <path>:<offset>[(ref_ctr_offset)]\n"
4802 #endif
4803         "\t     args: <name>=fetcharg[:type]\n"
4804         "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
4805 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
4806         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>\n"
4807 #else
4808         "\t           $stack<index>, $stack, $retval, $comm\n"
4809 #endif
4810         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
4811         "\t           b<bit-width>@<bit-offset>/<container-size>,\n"
4812         "\t           <type>\\[<array-size>\\]\n"
4813 #ifdef CONFIG_HIST_TRIGGERS
4814         "\t    field: <stype> <name>;\n"
4815         "\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
4816         "\t           [unsigned] char/int/long\n"
4817 #endif
4818 #endif
4819         "  events/\t\t- Directory containing all trace event subsystems:\n"
4820         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4821         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
4822         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4823         "\t\t\t  events\n"
4824         "      filter\t\t- If set, only events passing filter are traced\n"
4825         "  events/<system>/<event>/\t- Directory containing control files for\n"
4826         "\t\t\t  <event>:\n"
4827         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4828         "      filter\t\t- If set, only events passing filter are traced\n"
4829         "      trigger\t\t- If set, a command to perform when event is hit\n"
4830         "\t    Format: <trigger>[:count][if <filter>]\n"
4831         "\t   trigger: traceon, traceoff\n"
4832         "\t            enable_event:<system>:<event>\n"
4833         "\t            disable_event:<system>:<event>\n"
4834 #ifdef CONFIG_HIST_TRIGGERS
4835         "\t            enable_hist:<system>:<event>\n"
4836         "\t            disable_hist:<system>:<event>\n"
4837 #endif
4838 #ifdef CONFIG_STACKTRACE
4839         "\t\t    stacktrace\n"
4840 #endif
4841 #ifdef CONFIG_TRACER_SNAPSHOT
4842         "\t\t    snapshot\n"
4843 #endif
4844 #ifdef CONFIG_HIST_TRIGGERS
4845         "\t\t    hist (see below)\n"
4846 #endif
4847         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
4848         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
4849         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4850         "\t                  events/block/block_unplug/trigger\n"
4851         "\t   The first disables tracing every time block_unplug is hit.\n"
4852         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
4853         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
4854         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4855         "\t   Like function triggers, the counter is only decremented if it\n"
4856         "\t    enabled or disabled tracing.\n"
4857         "\t   To remove a trigger without a count:\n"
4858         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
4859         "\t   To remove a trigger with a count:\n"
4860         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
4861         "\t   Filters can be ignored when removing a trigger.\n"
4862 #ifdef CONFIG_HIST_TRIGGERS
4863         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
4864         "\t    Format: hist:keys=<field1[,field2,...]>\n"
4865         "\t            [:values=<field1[,field2,...]>]\n"
4866         "\t            [:sort=<field1[,field2,...]>]\n"
4867         "\t            [:size=#entries]\n"
4868         "\t            [:pause][:continue][:clear]\n"
4869         "\t            [:name=histname1]\n"
4870         "\t            [:<handler>.<action>]\n"
4871         "\t            [if <filter>]\n\n"
4872         "\t    When a matching event is hit, an entry is added to a hash\n"
4873         "\t    table using the key(s) and value(s) named, and the value of a\n"
4874         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
4875         "\t    correspond to fields in the event's format description.  Keys\n"
4876         "\t    can be any field, or the special string 'stacktrace'.\n"
4877         "\t    Compound keys consisting of up to two fields can be specified\n"
4878         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
4879         "\t    fields.  Sort keys consisting of up to two fields can be\n"
4880         "\t    specified using the 'sort' keyword.  The sort direction can\n"
4881         "\t    be modified by appending '.descending' or '.ascending' to a\n"
4882         "\t    sort field.  The 'size' parameter can be used to specify more\n"
4883         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
4884         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
4885         "\t    its histogram data will be shared with other triggers of the\n"
4886         "\t    same name, and trigger hits will update this common data.\n\n"
4887         "\t    Reading the 'hist' file for the event will dump the hash\n"
4888         "\t    table in its entirety to stdout.  If there are multiple hist\n"
4889         "\t    triggers attached to an event, there will be a table for each\n"
4890         "\t    trigger in the output.  The table displayed for a named\n"
4891         "\t    trigger will be the same as any other instance having the\n"
4892         "\t    same name.  The default format used to display a given field\n"
4893         "\t    can be modified by appending any of the following modifiers\n"
4894         "\t    to the field name, as applicable:\n\n"
4895         "\t            .hex        display a number as a hex value\n"
4896         "\t            .sym        display an address as a symbol\n"
4897         "\t            .sym-offset display an address as a symbol and offset\n"
4898         "\t            .execname   display a common_pid as a program name\n"
4899         "\t            .syscall    display a syscall id as a syscall name\n"
4900         "\t            .log2       display log2 value rather than raw number\n"
4901         "\t            .usecs      display a common_timestamp in microseconds\n\n"
4902         "\t    The 'pause' parameter can be used to pause an existing hist\n"
4903         "\t    trigger or to start a hist trigger but not log any events\n"
4904         "\t    until told to do so.  'continue' can be used to start or\n"
4905         "\t    restart a paused hist trigger.\n\n"
4906         "\t    The 'clear' parameter will clear the contents of a running\n"
4907         "\t    hist trigger and leave its current paused/active state\n"
4908         "\t    unchanged.\n\n"
4909         "\t    The enable_hist and disable_hist triggers can be used to\n"
4910         "\t    have one event conditionally start and stop another event's\n"
4911         "\t    already-attached hist trigger.  The syntax is analagous to\n"
4912         "\t    the enable_event and disable_event triggers.\n\n"
4913         "\t    Hist trigger handlers and actions are executed whenever a\n"
4914         "\t    a histogram entry is added or updated.  They take the form:\n\n"
4915         "\t        <handler>.<action>\n\n"
4916         "\t    The available handlers are:\n\n"
4917         "\t        onmatch(matching.event)  - invoke on addition or update\n"
4918         "\t        onmax(var)               - invoke if var exceeds current max\n"
4919         "\t        onchange(var)            - invoke action if var changes\n\n"
4920         "\t    The available actions are:\n\n"
4921         "\t        <synthetic_event>(param list)        - generate synthetic event\n"
4922         "\t        save(field,...)                      - save current event fields\n"
4923 #ifdef CONFIG_TRACER_SNAPSHOT
4924         "\t        snapshot()                           - snapshot the trace buffer\n"
4925 #endif
4926 #endif
4927 ;
4928
4929 static ssize_t
4930 tracing_readme_read(struct file *filp, char __user *ubuf,
4931                        size_t cnt, loff_t *ppos)
4932 {
4933         return simple_read_from_buffer(ubuf, cnt, ppos,
4934                                         readme_msg, strlen(readme_msg));
4935 }
4936
4937 static const struct file_operations tracing_readme_fops = {
4938         .open           = tracing_open_generic,
4939         .read           = tracing_readme_read,
4940         .llseek         = generic_file_llseek,
4941 };
4942
4943 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
4944 {
4945         int *ptr = v;
4946
4947         if (*pos || m->count)
4948                 ptr++;
4949
4950         (*pos)++;
4951
4952         for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
4953                 if (trace_find_tgid(*ptr))
4954                         return ptr;
4955         }
4956
4957         return NULL;
4958 }
4959
4960 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
4961 {
4962         void *v;
4963         loff_t l = 0;
4964
4965         if (!tgid_map)
4966                 return NULL;
4967
4968         v = &tgid_map[0];
4969         while (l <= *pos) {
4970                 v = saved_tgids_next(m, v, &l);
4971                 if (!v)
4972                         return NULL;
4973         }
4974
4975         return v;
4976 }
4977
4978 static void saved_tgids_stop(struct seq_file *m, void *v)
4979 {
4980 }
4981
4982 static int saved_tgids_show(struct seq_file *m, void *v)
4983 {
4984         int pid = (int *)v - tgid_map;
4985
4986         seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
4987         return 0;
4988 }
4989
4990 static const struct seq_operations tracing_saved_tgids_seq_ops = {
4991         .start          = saved_tgids_start,
4992         .stop           = saved_tgids_stop,
4993         .next           = saved_tgids_next,
4994         .show           = saved_tgids_show,
4995 };
4996
4997 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
4998 {
4999         if (tracing_disabled)
5000                 return -ENODEV;
5001
5002         return seq_open(filp, &tracing_saved_tgids_seq_ops);
5003 }
5004
5005
5006 static const struct file_operations tracing_saved_tgids_fops = {
5007         .open           = tracing_saved_tgids_open,
5008         .read           = seq_read,
5009         .llseek         = seq_lseek,
5010         .release        = seq_release,
5011 };
5012
5013 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5014 {
5015         unsigned int *ptr = v;
5016
5017         if (*pos || m->count)
5018                 ptr++;
5019
5020         (*pos)++;
5021
5022         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5023              ptr++) {
5024                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5025                         continue;
5026
5027                 return ptr;
5028         }
5029
5030         return NULL;
5031 }
5032
5033 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5034 {
5035         void *v;
5036         loff_t l = 0;
5037
5038         preempt_disable();
5039         arch_spin_lock(&trace_cmdline_lock);
5040
5041         v = &savedcmd->map_cmdline_to_pid[0];
5042         while (l <= *pos) {
5043                 v = saved_cmdlines_next(m, v, &l);
5044                 if (!v)
5045                         return NULL;
5046         }
5047
5048         return v;
5049 }
5050
5051 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5052 {
5053         arch_spin_unlock(&trace_cmdline_lock);
5054         preempt_enable();
5055 }
5056
5057 static int saved_cmdlines_show(struct seq_file *m, void *v)
5058 {
5059         char buf[TASK_COMM_LEN];
5060         unsigned int *pid = v;
5061
5062         __trace_find_cmdline(*pid, buf);
5063         seq_printf(m, "%d %s\n", *pid, buf);
5064         return 0;
5065 }
5066
5067 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5068         .start          = saved_cmdlines_start,
5069         .next           = saved_cmdlines_next,
5070         .stop           = saved_cmdlines_stop,
5071         .show           = saved_cmdlines_show,
5072 };
5073
5074 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5075 {
5076         if (tracing_disabled)
5077                 return -ENODEV;
5078
5079         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5080 }
5081
5082 static const struct file_operations tracing_saved_cmdlines_fops = {
5083         .open           = tracing_saved_cmdlines_open,
5084         .read           = seq_read,
5085         .llseek         = seq_lseek,
5086         .release        = seq_release,
5087 };
5088
5089 static ssize_t
5090 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5091                                  size_t cnt, loff_t *ppos)
5092 {
5093         char buf[64];
5094         int r;
5095
5096         arch_spin_lock(&trace_cmdline_lock);
5097         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5098         arch_spin_unlock(&trace_cmdline_lock);
5099
5100         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5101 }
5102
5103 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5104 {
5105         kfree(s->saved_cmdlines);
5106         kfree(s->map_cmdline_to_pid);
5107         kfree(s);
5108 }
5109
5110 static int tracing_resize_saved_cmdlines(unsigned int val)
5111 {
5112         struct saved_cmdlines_buffer *s, *savedcmd_temp;
5113
5114         s = kmalloc(sizeof(*s), GFP_KERNEL);
5115         if (!s)
5116                 return -ENOMEM;
5117
5118         if (allocate_cmdlines_buffer(val, s) < 0) {
5119                 kfree(s);
5120                 return -ENOMEM;
5121         }
5122
5123         arch_spin_lock(&trace_cmdline_lock);
5124         savedcmd_temp = savedcmd;
5125         savedcmd = s;
5126         arch_spin_unlock(&trace_cmdline_lock);
5127         free_saved_cmdlines_buffer(savedcmd_temp);
5128
5129         return 0;
5130 }
5131
5132 static ssize_t
5133 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5134                                   size_t cnt, loff_t *ppos)
5135 {
5136         unsigned long val;
5137         int ret;
5138
5139         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5140         if (ret)
5141                 return ret;
5142
5143         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
5144         if (!val || val > PID_MAX_DEFAULT)
5145                 return -EINVAL;
5146
5147         ret = tracing_resize_saved_cmdlines((unsigned int)val);
5148         if (ret < 0)
5149                 return ret;
5150
5151         *ppos += cnt;
5152
5153         return cnt;
5154 }
5155
5156 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5157         .open           = tracing_open_generic,
5158         .read           = tracing_saved_cmdlines_size_read,
5159         .write          = tracing_saved_cmdlines_size_write,
5160 };
5161
5162 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5163 static union trace_eval_map_item *
5164 update_eval_map(union trace_eval_map_item *ptr)
5165 {
5166         if (!ptr->map.eval_string) {
5167                 if (ptr->tail.next) {
5168                         ptr = ptr->tail.next;
5169                         /* Set ptr to the next real item (skip head) */
5170                         ptr++;
5171                 } else
5172                         return NULL;
5173         }
5174         return ptr;
5175 }
5176
5177 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5178 {
5179         union trace_eval_map_item *ptr = v;
5180
5181         /*
5182          * Paranoid! If ptr points to end, we don't want to increment past it.
5183          * This really should never happen.
5184          */
5185         ptr = update_eval_map(ptr);
5186         if (WARN_ON_ONCE(!ptr))
5187                 return NULL;
5188
5189         ptr++;
5190
5191         (*pos)++;
5192
5193         ptr = update_eval_map(ptr);
5194
5195         return ptr;
5196 }
5197
5198 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5199 {
5200         union trace_eval_map_item *v;
5201         loff_t l = 0;
5202
5203         mutex_lock(&trace_eval_mutex);
5204
5205         v = trace_eval_maps;
5206         if (v)
5207                 v++;
5208
5209         while (v && l < *pos) {
5210                 v = eval_map_next(m, v, &l);
5211         }
5212
5213         return v;
5214 }
5215
5216 static void eval_map_stop(struct seq_file *m, void *v)
5217 {
5218         mutex_unlock(&trace_eval_mutex);
5219 }
5220
5221 static int eval_map_show(struct seq_file *m, void *v)
5222 {
5223         union trace_eval_map_item *ptr = v;
5224
5225         seq_printf(m, "%s %ld (%s)\n",
5226                    ptr->map.eval_string, ptr->map.eval_value,
5227                    ptr->map.system);
5228
5229         return 0;
5230 }
5231
5232 static const struct seq_operations tracing_eval_map_seq_ops = {
5233         .start          = eval_map_start,
5234         .next           = eval_map_next,
5235         .stop           = eval_map_stop,
5236         .show           = eval_map_show,
5237 };
5238
5239 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5240 {
5241         if (tracing_disabled)
5242                 return -ENODEV;
5243
5244         return seq_open(filp, &tracing_eval_map_seq_ops);
5245 }
5246
5247 static const struct file_operations tracing_eval_map_fops = {
5248         .open           = tracing_eval_map_open,
5249         .read           = seq_read,
5250         .llseek         = seq_lseek,
5251         .release        = seq_release,
5252 };
5253
5254 static inline union trace_eval_map_item *
5255 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5256 {
5257         /* Return tail of array given the head */
5258         return ptr + ptr->head.length + 1;
5259 }
5260
5261 static void
5262 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5263                            int len)
5264 {
5265         struct trace_eval_map **stop;
5266         struct trace_eval_map **map;
5267         union trace_eval_map_item *map_array;
5268         union trace_eval_map_item *ptr;
5269
5270         stop = start + len;
5271
5272         /*
5273          * The trace_eval_maps contains the map plus a head and tail item,
5274          * where the head holds the module and length of array, and the
5275          * tail holds a pointer to the next list.
5276          */
5277         map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5278         if (!map_array) {
5279                 pr_warn("Unable to allocate trace eval mapping\n");
5280                 return;
5281         }
5282
5283         mutex_lock(&trace_eval_mutex);
5284
5285         if (!trace_eval_maps)
5286                 trace_eval_maps = map_array;
5287         else {
5288                 ptr = trace_eval_maps;
5289                 for (;;) {
5290                         ptr = trace_eval_jmp_to_tail(ptr);
5291                         if (!ptr->tail.next)
5292                                 break;
5293                         ptr = ptr->tail.next;
5294
5295                 }
5296                 ptr->tail.next = map_array;
5297         }
5298         map_array->head.mod = mod;
5299         map_array->head.length = len;
5300         map_array++;
5301
5302         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5303                 map_array->map = **map;
5304                 map_array++;
5305         }
5306         memset(map_array, 0, sizeof(*map_array));
5307
5308         mutex_unlock(&trace_eval_mutex);
5309 }
5310
5311 static void trace_create_eval_file(struct dentry *d_tracer)
5312 {
5313         trace_create_file("eval_map", 0444, d_tracer,
5314                           NULL, &tracing_eval_map_fops);
5315 }
5316
5317 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5318 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5319 static inline void trace_insert_eval_map_file(struct module *mod,
5320                               struct trace_eval_map **start, int len) { }
5321 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5322
5323 static void trace_insert_eval_map(struct module *mod,
5324                                   struct trace_eval_map **start, int len)
5325 {
5326         struct trace_eval_map **map;
5327
5328         if (len <= 0)
5329                 return;
5330
5331         map = start;
5332
5333         trace_event_eval_update(map, len);
5334
5335         trace_insert_eval_map_file(mod, start, len);
5336 }
5337
5338 static ssize_t
5339 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5340                        size_t cnt, loff_t *ppos)
5341 {
5342         struct trace_array *tr = filp->private_data;
5343         char buf[MAX_TRACER_SIZE+2];
5344         int r;
5345
5346         mutex_lock(&trace_types_lock);
5347         r = sprintf(buf, "%s\n", tr->current_trace->name);
5348         mutex_unlock(&trace_types_lock);
5349
5350         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5351 }
5352
5353 int tracer_init(struct tracer *t, struct trace_array *tr)
5354 {
5355         tracing_reset_online_cpus(&tr->trace_buffer);
5356         return t->init(tr);
5357 }
5358
5359 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
5360 {
5361         int cpu;
5362
5363         for_each_tracing_cpu(cpu)
5364                 per_cpu_ptr(buf->data, cpu)->entries = val;
5365 }
5366
5367 #ifdef CONFIG_TRACER_MAX_TRACE
5368 /* resize @tr's buffer to the size of @size_tr's entries */
5369 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
5370                                         struct trace_buffer *size_buf, int cpu_id)
5371 {
5372         int cpu, ret = 0;
5373
5374         if (cpu_id == RING_BUFFER_ALL_CPUS) {
5375                 for_each_tracing_cpu(cpu) {
5376                         ret = ring_buffer_resize(trace_buf->buffer,
5377                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5378                         if (ret < 0)
5379                                 break;
5380                         per_cpu_ptr(trace_buf->data, cpu)->entries =
5381                                 per_cpu_ptr(size_buf->data, cpu)->entries;
5382                 }
5383         } else {
5384                 ret = ring_buffer_resize(trace_buf->buffer,
5385                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5386                 if (ret == 0)
5387                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5388                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
5389         }
5390
5391         return ret;
5392 }
5393 #endif /* CONFIG_TRACER_MAX_TRACE */
5394
5395 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5396                                         unsigned long size, int cpu)
5397 {
5398         int ret;
5399
5400         /*
5401          * If kernel or user changes the size of the ring buffer
5402          * we use the size that was given, and we can forget about
5403          * expanding it later.
5404          */
5405         ring_buffer_expanded = true;
5406
5407         /* May be called before buffers are initialized */
5408         if (!tr->trace_buffer.buffer)
5409                 return 0;
5410
5411         ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
5412         if (ret < 0)
5413                 return ret;
5414
5415 #ifdef CONFIG_TRACER_MAX_TRACE
5416         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5417             !tr->current_trace->use_max_tr)
5418                 goto out;
5419
5420         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5421         if (ret < 0) {
5422                 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
5423                                                      &tr->trace_buffer, cpu);
5424                 if (r < 0) {
5425                         /*
5426                          * AARGH! We are left with different
5427                          * size max buffer!!!!
5428                          * The max buffer is our "snapshot" buffer.
5429                          * When a tracer needs a snapshot (one of the
5430                          * latency tracers), it swaps the max buffer
5431                          * with the saved snap shot. We succeeded to
5432                          * update the size of the main buffer, but failed to
5433                          * update the size of the max buffer. But when we tried
5434                          * to reset the main buffer to the original size, we
5435                          * failed there too. This is very unlikely to
5436                          * happen, but if it does, warn and kill all
5437                          * tracing.
5438                          */
5439                         WARN_ON(1);
5440                         tracing_disabled = 1;
5441                 }
5442                 return ret;
5443         }
5444
5445         if (cpu == RING_BUFFER_ALL_CPUS)
5446                 set_buffer_entries(&tr->max_buffer, size);
5447         else
5448                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5449
5450  out:
5451 #endif /* CONFIG_TRACER_MAX_TRACE */
5452
5453         if (cpu == RING_BUFFER_ALL_CPUS)
5454                 set_buffer_entries(&tr->trace_buffer, size);
5455         else
5456                 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
5457
5458         return ret;
5459 }
5460
5461 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5462                                           unsigned long size, int cpu_id)
5463 {
5464         int ret = size;
5465
5466         mutex_lock(&trace_types_lock);
5467
5468         if (cpu_id != RING_BUFFER_ALL_CPUS) {
5469                 /* make sure, this cpu is enabled in the mask */
5470                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5471                         ret = -EINVAL;
5472                         goto out;
5473                 }
5474         }
5475
5476         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5477         if (ret < 0)
5478                 ret = -ENOMEM;
5479
5480 out:
5481         mutex_unlock(&trace_types_lock);
5482
5483         return ret;
5484 }
5485
5486
5487 /**
5488  * tracing_update_buffers - used by tracing facility to expand ring buffers
5489  *
5490  * To save on memory when the tracing is never used on a system with it
5491  * configured in. The ring buffers are set to a minimum size. But once
5492  * a user starts to use the tracing facility, then they need to grow
5493  * to their default size.
5494  *
5495  * This function is to be called when a tracer is about to be used.
5496  */
5497 int tracing_update_buffers(void)
5498 {
5499         int ret = 0;
5500
5501         mutex_lock(&trace_types_lock);
5502         if (!ring_buffer_expanded)
5503                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5504                                                 RING_BUFFER_ALL_CPUS);
5505         mutex_unlock(&trace_types_lock);
5506
5507         return ret;
5508 }
5509
5510 struct trace_option_dentry;
5511
5512 static void
5513 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5514
5515 /*
5516  * Used to clear out the tracer before deletion of an instance.
5517  * Must have trace_types_lock held.
5518  */
5519 static void tracing_set_nop(struct trace_array *tr)
5520 {
5521         if (tr->current_trace == &nop_trace)
5522                 return;
5523         
5524         tr->current_trace->enabled--;
5525
5526         if (tr->current_trace->reset)
5527                 tr->current_trace->reset(tr);
5528
5529         tr->current_trace = &nop_trace;
5530 }
5531
5532 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5533 {
5534         /* Only enable if the directory has been created already. */
5535         if (!tr->dir)
5536                 return;
5537
5538         create_trace_option_files(tr, t);
5539 }
5540
5541 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
5542 {
5543         struct tracer *t;
5544 #ifdef CONFIG_TRACER_MAX_TRACE
5545         bool had_max_tr;
5546 #endif
5547         int ret = 0;
5548
5549         mutex_lock(&trace_types_lock);
5550
5551         if (!ring_buffer_expanded) {
5552                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5553                                                 RING_BUFFER_ALL_CPUS);
5554                 if (ret < 0)
5555                         goto out;
5556                 ret = 0;
5557         }
5558
5559         for (t = trace_types; t; t = t->next) {
5560                 if (strcmp(t->name, buf) == 0)
5561                         break;
5562         }
5563         if (!t) {
5564                 ret = -EINVAL;
5565                 goto out;
5566         }
5567         if (t == tr->current_trace)
5568                 goto out;
5569
5570 #ifdef CONFIG_TRACER_SNAPSHOT
5571         if (t->use_max_tr) {
5572                 arch_spin_lock(&tr->max_lock);
5573                 if (tr->cond_snapshot)
5574                         ret = -EBUSY;
5575                 arch_spin_unlock(&tr->max_lock);
5576                 if (ret)
5577                         goto out;
5578         }
5579 #endif
5580         /* Some tracers won't work on kernel command line */
5581         if (system_state < SYSTEM_RUNNING && t->noboot) {
5582                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5583                         t->name);
5584                 goto out;
5585         }
5586
5587         /* Some tracers are only allowed for the top level buffer */
5588         if (!trace_ok_for_array(t, tr)) {
5589                 ret = -EINVAL;
5590                 goto out;
5591         }
5592
5593         /* If trace pipe files are being read, we can't change the tracer */
5594         if (tr->current_trace->ref) {
5595                 ret = -EBUSY;
5596                 goto out;
5597         }
5598
5599         trace_branch_disable();
5600
5601         tr->current_trace->enabled--;
5602
5603         if (tr->current_trace->reset)
5604                 tr->current_trace->reset(tr);
5605
5606         /* Current trace needs to be nop_trace before synchronize_rcu */
5607         tr->current_trace = &nop_trace;
5608
5609 #ifdef CONFIG_TRACER_MAX_TRACE
5610         had_max_tr = tr->allocated_snapshot;
5611
5612         if (had_max_tr && !t->use_max_tr) {
5613                 /*
5614                  * We need to make sure that the update_max_tr sees that
5615                  * current_trace changed to nop_trace to keep it from
5616                  * swapping the buffers after we resize it.
5617                  * The update_max_tr is called from interrupts disabled
5618                  * so a synchronized_sched() is sufficient.
5619                  */
5620                 synchronize_rcu();
5621                 free_snapshot(tr);
5622         }
5623 #endif
5624
5625 #ifdef CONFIG_TRACER_MAX_TRACE
5626         if (t->use_max_tr && !had_max_tr) {
5627                 ret = tracing_alloc_snapshot_instance(tr);
5628                 if (ret < 0)
5629                         goto out;
5630         }
5631 #endif
5632
5633         if (t->init) {
5634                 ret = tracer_init(t, tr);
5635                 if (ret)
5636                         goto out;
5637         }
5638
5639         tr->current_trace = t;
5640         tr->current_trace->enabled++;
5641         trace_branch_enable(tr);
5642  out:
5643         mutex_unlock(&trace_types_lock);
5644
5645         return ret;
5646 }
5647
5648 static ssize_t
5649 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5650                         size_t cnt, loff_t *ppos)
5651 {
5652         struct trace_array *tr = filp->private_data;
5653         char buf[MAX_TRACER_SIZE+1];
5654         int i;
5655         size_t ret;
5656         int err;
5657
5658         ret = cnt;
5659
5660         if (cnt > MAX_TRACER_SIZE)
5661                 cnt = MAX_TRACER_SIZE;
5662
5663         if (copy_from_user(buf, ubuf, cnt))
5664                 return -EFAULT;
5665
5666         buf[cnt] = 0;
5667
5668         /* strip ending whitespace. */
5669         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
5670                 buf[i] = 0;
5671
5672         err = tracing_set_tracer(tr, buf);
5673         if (err)
5674                 return err;
5675
5676         *ppos += ret;
5677
5678         return ret;
5679 }
5680
5681 static ssize_t
5682 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5683                    size_t cnt, loff_t *ppos)
5684 {
5685         char buf[64];
5686         int r;
5687
5688         r = snprintf(buf, sizeof(buf), "%ld\n",
5689                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5690         if (r > sizeof(buf))
5691                 r = sizeof(buf);
5692         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5693 }
5694
5695 static ssize_t
5696 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5697                     size_t cnt, loff_t *ppos)
5698 {
5699         unsigned long val;
5700         int ret;
5701
5702         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5703         if (ret)
5704                 return ret;
5705
5706         *ptr = val * 1000;
5707
5708         return cnt;
5709 }
5710
5711 static ssize_t
5712 tracing_thresh_read(struct file *filp, char __user *ubuf,
5713                     size_t cnt, loff_t *ppos)
5714 {
5715         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
5716 }
5717
5718 static ssize_t
5719 tracing_thresh_write(struct file *filp, const char __user *ubuf,
5720                      size_t cnt, loff_t *ppos)
5721 {
5722         struct trace_array *tr = filp->private_data;
5723         int ret;
5724
5725         mutex_lock(&trace_types_lock);
5726         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
5727         if (ret < 0)
5728                 goto out;
5729
5730         if (tr->current_trace->update_thresh) {
5731                 ret = tr->current_trace->update_thresh(tr);
5732                 if (ret < 0)
5733                         goto out;
5734         }
5735
5736         ret = cnt;
5737 out:
5738         mutex_unlock(&trace_types_lock);
5739
5740         return ret;
5741 }
5742
5743 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5744
5745 static ssize_t
5746 tracing_max_lat_read(struct file *filp, char __user *ubuf,
5747                      size_t cnt, loff_t *ppos)
5748 {
5749         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
5750 }
5751
5752 static ssize_t
5753 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
5754                       size_t cnt, loff_t *ppos)
5755 {
5756         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
5757 }
5758
5759 #endif
5760
5761 static int tracing_open_pipe(struct inode *inode, struct file *filp)
5762 {
5763         struct trace_array *tr = inode->i_private;
5764         struct trace_iterator *iter;
5765         int ret = 0;
5766
5767         if (tracing_disabled)
5768                 return -ENODEV;
5769
5770         if (trace_array_get(tr) < 0)
5771                 return -ENODEV;
5772
5773         mutex_lock(&trace_types_lock);
5774
5775         /* create a buffer to store the information to pass to userspace */
5776         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5777         if (!iter) {
5778                 ret = -ENOMEM;
5779                 __trace_array_put(tr);
5780                 goto out;
5781         }
5782
5783         trace_seq_init(&iter->seq);
5784         iter->trace = tr->current_trace;
5785
5786         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5787                 ret = -ENOMEM;
5788                 goto fail;
5789         }
5790
5791         /* trace pipe does not show start of buffer */
5792         cpumask_setall(iter->started);
5793
5794         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5795                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
5796
5797         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
5798         if (trace_clocks[tr->clock_id].in_ns)
5799                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5800
5801         iter->tr = tr;
5802         iter->trace_buffer = &tr->trace_buffer;
5803         iter->cpu_file = tracing_get_cpu(inode);
5804         mutex_init(&iter->mutex);
5805         filp->private_data = iter;
5806
5807         if (iter->trace->pipe_open)
5808                 iter->trace->pipe_open(iter);
5809
5810         nonseekable_open(inode, filp);
5811
5812         tr->current_trace->ref++;
5813 out:
5814         mutex_unlock(&trace_types_lock);
5815         return ret;
5816
5817 fail:
5818         kfree(iter);
5819         __trace_array_put(tr);
5820         mutex_unlock(&trace_types_lock);
5821         return ret;
5822 }
5823
5824 static int tracing_release_pipe(struct inode *inode, struct file *file)
5825 {
5826         struct trace_iterator *iter = file->private_data;
5827         struct trace_array *tr = inode->i_private;
5828
5829         mutex_lock(&trace_types_lock);
5830
5831         tr->current_trace->ref--;
5832
5833         if (iter->trace->pipe_close)
5834                 iter->trace->pipe_close(iter);
5835
5836         mutex_unlock(&trace_types_lock);
5837
5838         free_cpumask_var(iter->started);
5839         mutex_destroy(&iter->mutex);
5840         kfree(iter);
5841
5842         trace_array_put(tr);
5843
5844         return 0;
5845 }
5846
5847 static __poll_t
5848 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5849 {
5850         struct trace_array *tr = iter->tr;
5851
5852         /* Iterators are static, they should be filled or empty */
5853         if (trace_buffer_iter(iter, iter->cpu_file))
5854                 return EPOLLIN | EPOLLRDNORM;
5855
5856         if (tr->trace_flags & TRACE_ITER_BLOCK)
5857                 /*
5858                  * Always select as readable when in blocking mode
5859                  */
5860                 return EPOLLIN | EPOLLRDNORM;
5861         else
5862                 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
5863                                              filp, poll_table);
5864 }
5865
5866 static __poll_t
5867 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5868 {
5869         struct trace_iterator *iter = filp->private_data;
5870
5871         return trace_poll(iter, filp, poll_table);
5872 }
5873
5874 /* Must be called with iter->mutex held. */
5875 static int tracing_wait_pipe(struct file *filp)
5876 {
5877         struct trace_iterator *iter = filp->private_data;
5878         int ret;
5879
5880         while (trace_empty(iter)) {
5881
5882                 if ((filp->f_flags & O_NONBLOCK)) {
5883                         return -EAGAIN;
5884                 }
5885
5886                 /*
5887                  * We block until we read something and tracing is disabled.
5888                  * We still block if tracing is disabled, but we have never
5889                  * read anything. This allows a user to cat this file, and
5890                  * then enable tracing. But after we have read something,
5891                  * we give an EOF when tracing is again disabled.
5892                  *
5893                  * iter->pos will be 0 if we haven't read anything.
5894                  */
5895                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
5896                         break;
5897
5898                 mutex_unlock(&iter->mutex);
5899
5900                 ret = wait_on_pipe(iter, 0);
5901
5902                 mutex_lock(&iter->mutex);
5903
5904                 if (ret)
5905                         return ret;
5906         }
5907
5908         return 1;
5909 }
5910
5911 /*
5912  * Consumer reader.
5913  */
5914 static ssize_t
5915 tracing_read_pipe(struct file *filp, char __user *ubuf,
5916                   size_t cnt, loff_t *ppos)
5917 {
5918         struct trace_iterator *iter = filp->private_data;
5919         ssize_t sret;
5920
5921         /*
5922          * Avoid more than one consumer on a single file descriptor
5923          * This is just a matter of traces coherency, the ring buffer itself
5924          * is protected.
5925          */
5926         mutex_lock(&iter->mutex);
5927
5928         /* return any leftover data */
5929         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5930         if (sret != -EBUSY)
5931                 goto out;
5932
5933         trace_seq_init(&iter->seq);
5934
5935         if (iter->trace->read) {
5936                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
5937                 if (sret)
5938                         goto out;
5939         }
5940
5941 waitagain:
5942         sret = tracing_wait_pipe(filp);
5943         if (sret <= 0)
5944                 goto out;
5945
5946         /* stop when tracing is finished */
5947         if (trace_empty(iter)) {
5948                 sret = 0;
5949                 goto out;
5950         }
5951
5952         if (cnt >= PAGE_SIZE)
5953                 cnt = PAGE_SIZE - 1;
5954
5955         /* reset all but tr, trace, and overruns */
5956         memset(&iter->seq, 0,
5957                sizeof(struct trace_iterator) -
5958                offsetof(struct trace_iterator, seq));
5959         cpumask_clear(iter->started);
5960         iter->pos = -1;
5961
5962         trace_event_read_lock();
5963         trace_access_lock(iter->cpu_file);
5964         while (trace_find_next_entry_inc(iter) != NULL) {
5965                 enum print_line_t ret;
5966                 int save_len = iter->seq.seq.len;
5967
5968                 ret = print_trace_line(iter);
5969                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5970                         /* don't print partial lines */
5971                         iter->seq.seq.len = save_len;
5972                         break;
5973                 }
5974                 if (ret != TRACE_TYPE_NO_CONSUME)
5975                         trace_consume(iter);
5976
5977                 if (trace_seq_used(&iter->seq) >= cnt)
5978                         break;
5979
5980                 /*
5981                  * Setting the full flag means we reached the trace_seq buffer
5982                  * size and we should leave by partial output condition above.
5983                  * One of the trace_seq_* functions is not used properly.
5984                  */
5985                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
5986                           iter->ent->type);
5987         }
5988         trace_access_unlock(iter->cpu_file);
5989         trace_event_read_unlock();
5990
5991         /* Now copy what we have to the user */
5992         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5993         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
5994                 trace_seq_init(&iter->seq);
5995
5996         /*
5997          * If there was nothing to send to user, in spite of consuming trace
5998          * entries, go back to wait for more entries.
5999          */
6000         if (sret == -EBUSY)
6001                 goto waitagain;
6002
6003 out:
6004         mutex_unlock(&iter->mutex);
6005
6006         return sret;
6007 }
6008
6009 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6010                                      unsigned int idx)
6011 {
6012         __free_page(spd->pages[idx]);
6013 }
6014
6015 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
6016         .can_merge              = 0,
6017         .confirm                = generic_pipe_buf_confirm,
6018         .release                = generic_pipe_buf_release,
6019         .steal                  = generic_pipe_buf_steal,
6020         .get                    = generic_pipe_buf_get,
6021 };
6022
6023 static size_t
6024 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6025 {
6026         size_t count;
6027         int save_len;
6028         int ret;
6029
6030         /* Seq buffer is page-sized, exactly what we need. */
6031         for (;;) {
6032                 save_len = iter->seq.seq.len;
6033                 ret = print_trace_line(iter);
6034
6035                 if (trace_seq_has_overflowed(&iter->seq)) {
6036                         iter->seq.seq.len = save_len;
6037                         break;
6038                 }
6039
6040                 /*
6041                  * This should not be hit, because it should only
6042                  * be set if the iter->seq overflowed. But check it
6043                  * anyway to be safe.
6044                  */
6045                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6046                         iter->seq.seq.len = save_len;
6047                         break;
6048                 }
6049
6050                 count = trace_seq_used(&iter->seq) - save_len;
6051                 if (rem < count) {
6052                         rem = 0;
6053                         iter->seq.seq.len = save_len;
6054                         break;
6055                 }
6056
6057                 if (ret != TRACE_TYPE_NO_CONSUME)
6058                         trace_consume(iter);
6059                 rem -= count;
6060                 if (!trace_find_next_entry_inc(iter))   {
6061                         rem = 0;
6062                         iter->ent = NULL;
6063                         break;
6064                 }
6065         }
6066
6067         return rem;
6068 }
6069
6070 static ssize_t tracing_splice_read_pipe(struct file *filp,
6071                                         loff_t *ppos,
6072                                         struct pipe_inode_info *pipe,
6073                                         size_t len,
6074                                         unsigned int flags)
6075 {
6076         struct page *pages_def[PIPE_DEF_BUFFERS];
6077         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6078         struct trace_iterator *iter = filp->private_data;
6079         struct splice_pipe_desc spd = {
6080                 .pages          = pages_def,
6081                 .partial        = partial_def,
6082                 .nr_pages       = 0, /* This gets updated below. */
6083                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6084                 .ops            = &tracing_pipe_buf_ops,
6085                 .spd_release    = tracing_spd_release_pipe,
6086         };
6087         ssize_t ret;
6088         size_t rem;
6089         unsigned int i;
6090
6091         if (splice_grow_spd(pipe, &spd))
6092                 return -ENOMEM;
6093
6094         mutex_lock(&iter->mutex);
6095
6096         if (iter->trace->splice_read) {
6097                 ret = iter->trace->splice_read(iter, filp,
6098                                                ppos, pipe, len, flags);
6099                 if (ret)
6100                         goto out_err;
6101         }
6102
6103         ret = tracing_wait_pipe(filp);
6104         if (ret <= 0)
6105                 goto out_err;
6106
6107         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6108                 ret = -EFAULT;
6109                 goto out_err;
6110         }
6111
6112         trace_event_read_lock();
6113         trace_access_lock(iter->cpu_file);
6114
6115         /* Fill as many pages as possible. */
6116         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6117                 spd.pages[i] = alloc_page(GFP_KERNEL);
6118                 if (!spd.pages[i])
6119                         break;
6120
6121                 rem = tracing_fill_pipe_page(rem, iter);
6122
6123                 /* Copy the data into the page, so we can start over. */
6124                 ret = trace_seq_to_buffer(&iter->seq,
6125                                           page_address(spd.pages[i]),
6126                                           trace_seq_used(&iter->seq));
6127                 if (ret < 0) {
6128                         __free_page(spd.pages[i]);
6129                         break;
6130                 }
6131                 spd.partial[i].offset = 0;
6132                 spd.partial[i].len = trace_seq_used(&iter->seq);
6133
6134                 trace_seq_init(&iter->seq);
6135         }
6136
6137         trace_access_unlock(iter->cpu_file);
6138         trace_event_read_unlock();
6139         mutex_unlock(&iter->mutex);
6140
6141         spd.nr_pages = i;
6142
6143         if (i)
6144                 ret = splice_to_pipe(pipe, &spd);
6145         else
6146                 ret = 0;
6147 out:
6148         splice_shrink_spd(&spd);
6149         return ret;
6150
6151 out_err:
6152         mutex_unlock(&iter->mutex);
6153         goto out;
6154 }
6155
6156 static ssize_t
6157 tracing_entries_read(struct file *filp, char __user *ubuf,
6158                      size_t cnt, loff_t *ppos)
6159 {
6160         struct inode *inode = file_inode(filp);
6161         struct trace_array *tr = inode->i_private;
6162         int cpu = tracing_get_cpu(inode);
6163         char buf[64];
6164         int r = 0;
6165         ssize_t ret;
6166
6167         mutex_lock(&trace_types_lock);
6168
6169         if (cpu == RING_BUFFER_ALL_CPUS) {
6170                 int cpu, buf_size_same;
6171                 unsigned long size;
6172
6173                 size = 0;
6174                 buf_size_same = 1;
6175                 /* check if all cpu sizes are same */
6176                 for_each_tracing_cpu(cpu) {
6177                         /* fill in the size from first enabled cpu */
6178                         if (size == 0)
6179                                 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
6180                         if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
6181                                 buf_size_same = 0;
6182                                 break;
6183                         }
6184                 }
6185
6186                 if (buf_size_same) {
6187                         if (!ring_buffer_expanded)
6188                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
6189                                             size >> 10,
6190                                             trace_buf_size >> 10);
6191                         else
6192                                 r = sprintf(buf, "%lu\n", size >> 10);
6193                 } else
6194                         r = sprintf(buf, "X\n");
6195         } else
6196                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
6197
6198         mutex_unlock(&trace_types_lock);
6199
6200         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6201         return ret;
6202 }
6203
6204 static ssize_t
6205 tracing_entries_write(struct file *filp, const char __user *ubuf,
6206                       size_t cnt, loff_t *ppos)
6207 {
6208         struct inode *inode = file_inode(filp);
6209         struct trace_array *tr = inode->i_private;
6210         unsigned long val;
6211         int ret;
6212
6213         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6214         if (ret)
6215                 return ret;
6216
6217         /* must have at least 1 entry */
6218         if (!val)
6219                 return -EINVAL;
6220
6221         /* value is in KB */
6222         val <<= 10;
6223         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6224         if (ret < 0)
6225                 return ret;
6226
6227         *ppos += cnt;
6228
6229         return cnt;
6230 }
6231
6232 static ssize_t
6233 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6234                                 size_t cnt, loff_t *ppos)
6235 {
6236         struct trace_array *tr = filp->private_data;
6237         char buf[64];
6238         int r, cpu;
6239         unsigned long size = 0, expanded_size = 0;
6240
6241         mutex_lock(&trace_types_lock);
6242         for_each_tracing_cpu(cpu) {
6243                 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
6244                 if (!ring_buffer_expanded)
6245                         expanded_size += trace_buf_size >> 10;
6246         }
6247         if (ring_buffer_expanded)
6248                 r = sprintf(buf, "%lu\n", size);
6249         else
6250                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6251         mutex_unlock(&trace_types_lock);
6252
6253         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6254 }
6255
6256 static ssize_t
6257 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6258                           size_t cnt, loff_t *ppos)
6259 {
6260         /*
6261          * There is no need to read what the user has written, this function
6262          * is just to make sure that there is no error when "echo" is used
6263          */
6264
6265         *ppos += cnt;
6266
6267         return cnt;
6268 }
6269
6270 static int
6271 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6272 {
6273         struct trace_array *tr = inode->i_private;
6274
6275         /* disable tracing ? */
6276         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6277                 tracer_tracing_off(tr);
6278         /* resize the ring buffer to 0 */
6279         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6280
6281         trace_array_put(tr);
6282
6283         return 0;
6284 }
6285
6286 static ssize_t
6287 tracing_mark_write(struct file *filp, const char __user *ubuf,
6288                                         size_t cnt, loff_t *fpos)
6289 {
6290         struct trace_array *tr = filp->private_data;
6291         struct ring_buffer_event *event;
6292         enum event_trigger_type tt = ETT_NONE;
6293         struct ring_buffer *buffer;
6294         struct print_entry *entry;
6295         unsigned long irq_flags;
6296         const char faulted[] = "<faulted>";
6297         ssize_t written;
6298         int size;
6299         int len;
6300
6301 /* Used in tracing_mark_raw_write() as well */
6302 #define FAULTED_SIZE (sizeof(faulted) - 1) /* '\0' is already accounted for */
6303
6304         if (tracing_disabled)
6305                 return -EINVAL;
6306
6307         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6308                 return -EINVAL;
6309
6310         if (cnt > TRACE_BUF_SIZE)
6311                 cnt = TRACE_BUF_SIZE;
6312
6313         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6314
6315         local_save_flags(irq_flags);
6316         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6317
6318         /* If less than "<faulted>", then make sure we can still add that */
6319         if (cnt < FAULTED_SIZE)
6320                 size += FAULTED_SIZE - cnt;
6321
6322         buffer = tr->trace_buffer.buffer;
6323         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6324                                             irq_flags, preempt_count());
6325         if (unlikely(!event))
6326                 /* Ring buffer disabled, return as if not open for write */
6327                 return -EBADF;
6328
6329         entry = ring_buffer_event_data(event);
6330         entry->ip = _THIS_IP_;
6331
6332         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6333         if (len) {
6334                 memcpy(&entry->buf, faulted, FAULTED_SIZE);
6335                 cnt = FAULTED_SIZE;
6336                 written = -EFAULT;
6337         } else
6338                 written = cnt;
6339         len = cnt;
6340
6341         if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6342                 /* do not add \n before testing triggers, but add \0 */
6343                 entry->buf[cnt] = '\0';
6344                 tt = event_triggers_call(tr->trace_marker_file, entry, event);
6345         }
6346
6347         if (entry->buf[cnt - 1] != '\n') {
6348                 entry->buf[cnt] = '\n';
6349                 entry->buf[cnt + 1] = '\0';
6350         } else
6351                 entry->buf[cnt] = '\0';
6352
6353         __buffer_unlock_commit(buffer, event);
6354
6355         if (tt)
6356                 event_triggers_post_call(tr->trace_marker_file, tt);
6357
6358         if (written > 0)
6359                 *fpos += written;
6360
6361         return written;
6362 }
6363
6364 /* Limit it for now to 3K (including tag) */
6365 #define RAW_DATA_MAX_SIZE (1024*3)
6366
6367 static ssize_t
6368 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6369                                         size_t cnt, loff_t *fpos)
6370 {
6371         struct trace_array *tr = filp->private_data;
6372         struct ring_buffer_event *event;
6373         struct ring_buffer *buffer;
6374         struct raw_data_entry *entry;
6375         const char faulted[] = "<faulted>";
6376         unsigned long irq_flags;
6377         ssize_t written;
6378         int size;
6379         int len;
6380
6381 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6382
6383         if (tracing_disabled)
6384                 return -EINVAL;
6385
6386         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6387                 return -EINVAL;
6388
6389         /* The marker must at least have a tag id */
6390         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6391                 return -EINVAL;
6392
6393         if (cnt > TRACE_BUF_SIZE)
6394                 cnt = TRACE_BUF_SIZE;
6395
6396         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6397
6398         local_save_flags(irq_flags);
6399         size = sizeof(*entry) + cnt;
6400         if (cnt < FAULT_SIZE_ID)
6401                 size += FAULT_SIZE_ID - cnt;
6402
6403         buffer = tr->trace_buffer.buffer;
6404         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6405                                             irq_flags, preempt_count());
6406         if (!event)
6407                 /* Ring buffer disabled, return as if not open for write */
6408                 return -EBADF;
6409
6410         entry = ring_buffer_event_data(event);
6411
6412         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6413         if (len) {
6414                 entry->id = -1;
6415                 memcpy(&entry->buf, faulted, FAULTED_SIZE);
6416                 written = -EFAULT;
6417         } else
6418                 written = cnt;
6419
6420         __buffer_unlock_commit(buffer, event);
6421
6422         if (written > 0)
6423                 *fpos += written;
6424
6425         return written;
6426 }
6427
6428 static int tracing_clock_show(struct seq_file *m, void *v)
6429 {
6430         struct trace_array *tr = m->private;
6431         int i;
6432
6433         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6434                 seq_printf(m,
6435                         "%s%s%s%s", i ? " " : "",
6436                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6437                         i == tr->clock_id ? "]" : "");
6438         seq_putc(m, '\n');
6439
6440         return 0;
6441 }
6442
6443 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6444 {
6445         int i;
6446
6447         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6448                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
6449                         break;
6450         }
6451         if (i == ARRAY_SIZE(trace_clocks))
6452                 return -EINVAL;
6453
6454         mutex_lock(&trace_types_lock);
6455
6456         tr->clock_id = i;
6457
6458         ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
6459
6460         /*
6461          * New clock may not be consistent with the previous clock.
6462          * Reset the buffer so that it doesn't have incomparable timestamps.
6463          */
6464         tracing_reset_online_cpus(&tr->trace_buffer);
6465
6466 #ifdef CONFIG_TRACER_MAX_TRACE
6467         if (tr->max_buffer.buffer)
6468                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6469         tracing_reset_online_cpus(&tr->max_buffer);
6470 #endif
6471
6472         mutex_unlock(&trace_types_lock);
6473
6474         return 0;
6475 }
6476
6477 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6478                                    size_t cnt, loff_t *fpos)
6479 {
6480         struct seq_file *m = filp->private_data;
6481         struct trace_array *tr = m->private;
6482         char buf[64];
6483         const char *clockstr;
6484         int ret;
6485
6486         if (cnt >= sizeof(buf))
6487                 return -EINVAL;
6488
6489         if (copy_from_user(buf, ubuf, cnt))
6490                 return -EFAULT;
6491
6492         buf[cnt] = 0;
6493
6494         clockstr = strstrip(buf);
6495
6496         ret = tracing_set_clock(tr, clockstr);
6497         if (ret)
6498                 return ret;
6499
6500         *fpos += cnt;
6501
6502         return cnt;
6503 }
6504
6505 static int tracing_clock_open(struct inode *inode, struct file *file)
6506 {
6507         struct trace_array *tr = inode->i_private;
6508         int ret;
6509
6510         if (tracing_disabled)
6511                 return -ENODEV;
6512
6513         if (trace_array_get(tr))
6514                 return -ENODEV;
6515
6516         ret = single_open(file, tracing_clock_show, inode->i_private);
6517         if (ret < 0)
6518                 trace_array_put(tr);
6519
6520         return ret;
6521 }
6522
6523 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
6524 {
6525         struct trace_array *tr = m->private;
6526
6527         mutex_lock(&trace_types_lock);
6528
6529         if (ring_buffer_time_stamp_abs(tr->trace_buffer.buffer))
6530                 seq_puts(m, "delta [absolute]\n");
6531         else
6532                 seq_puts(m, "[delta] absolute\n");
6533
6534         mutex_unlock(&trace_types_lock);
6535
6536         return 0;
6537 }
6538
6539 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
6540 {
6541         struct trace_array *tr = inode->i_private;
6542         int ret;
6543
6544         if (tracing_disabled)
6545                 return -ENODEV;
6546
6547         if (trace_array_get(tr))
6548                 return -ENODEV;
6549
6550         ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
6551         if (ret < 0)
6552                 trace_array_put(tr);
6553
6554         return ret;
6555 }
6556
6557 int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
6558 {
6559         int ret = 0;
6560
6561         mutex_lock(&trace_types_lock);
6562
6563         if (abs && tr->time_stamp_abs_ref++)
6564                 goto out;
6565
6566         if (!abs) {
6567                 if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) {
6568                         ret = -EINVAL;
6569                         goto out;
6570                 }
6571
6572                 if (--tr->time_stamp_abs_ref)
6573                         goto out;
6574         }
6575
6576         ring_buffer_set_time_stamp_abs(tr->trace_buffer.buffer, abs);
6577
6578 #ifdef CONFIG_TRACER_MAX_TRACE
6579         if (tr->max_buffer.buffer)
6580                 ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs);
6581 #endif
6582  out:
6583         mutex_unlock(&trace_types_lock);
6584
6585         return ret;
6586 }
6587
6588 struct ftrace_buffer_info {
6589         struct trace_iterator   iter;
6590         void                    *spare;
6591         unsigned int            spare_cpu;
6592         unsigned int            read;
6593 };
6594
6595 #ifdef CONFIG_TRACER_SNAPSHOT
6596 static int tracing_snapshot_open(struct inode *inode, struct file *file)
6597 {
6598         struct trace_array *tr = inode->i_private;
6599         struct trace_iterator *iter;
6600         struct seq_file *m;
6601         int ret = 0;
6602
6603         if (trace_array_get(tr) < 0)
6604                 return -ENODEV;
6605
6606         if (file->f_mode & FMODE_READ) {
6607                 iter = __tracing_open(inode, file, true);
6608                 if (IS_ERR(iter))
6609                         ret = PTR_ERR(iter);
6610         } else {
6611                 /* Writes still need the seq_file to hold the private data */
6612                 ret = -ENOMEM;
6613                 m = kzalloc(sizeof(*m), GFP_KERNEL);
6614                 if (!m)
6615                         goto out;
6616                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6617                 if (!iter) {
6618                         kfree(m);
6619                         goto out;
6620                 }
6621                 ret = 0;
6622
6623                 iter->tr = tr;
6624                 iter->trace_buffer = &tr->max_buffer;
6625                 iter->cpu_file = tracing_get_cpu(inode);
6626                 m->private = iter;
6627                 file->private_data = m;
6628         }
6629 out:
6630         if (ret < 0)
6631                 trace_array_put(tr);
6632
6633         return ret;
6634 }
6635
6636 static ssize_t
6637 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6638                        loff_t *ppos)
6639 {
6640         struct seq_file *m = filp->private_data;
6641         struct trace_iterator *iter = m->private;
6642         struct trace_array *tr = iter->tr;
6643         unsigned long val;
6644         int ret;
6645
6646         ret = tracing_update_buffers();
6647         if (ret < 0)
6648                 return ret;
6649
6650         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6651         if (ret)
6652                 return ret;
6653
6654         mutex_lock(&trace_types_lock);
6655
6656         if (tr->current_trace->use_max_tr) {
6657                 ret = -EBUSY;
6658                 goto out;
6659         }
6660
6661         arch_spin_lock(&tr->max_lock);
6662         if (tr->cond_snapshot)
6663                 ret = -EBUSY;
6664         arch_spin_unlock(&tr->max_lock);
6665         if (ret)
6666                 goto out;
6667
6668         switch (val) {
6669         case 0:
6670                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6671                         ret = -EINVAL;
6672                         break;
6673                 }
6674                 if (tr->allocated_snapshot)
6675                         free_snapshot(tr);
6676                 break;
6677         case 1:
6678 /* Only allow per-cpu swap if the ring buffer supports it */
6679 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
6680                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6681                         ret = -EINVAL;
6682                         break;
6683                 }
6684 #endif
6685                 if (!tr->allocated_snapshot) {
6686                         ret = tracing_alloc_snapshot_instance(tr);
6687                         if (ret < 0)
6688                                 break;
6689                 }
6690                 local_irq_disable();
6691                 /* Now, we're going to swap */
6692                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6693                         update_max_tr(tr, current, smp_processor_id(), NULL);
6694                 else
6695                         update_max_tr_single(tr, current, iter->cpu_file);
6696                 local_irq_enable();
6697                 break;
6698         default:
6699                 if (tr->allocated_snapshot) {
6700                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6701                                 tracing_reset_online_cpus(&tr->max_buffer);
6702                         else
6703                                 tracing_reset(&tr->max_buffer, iter->cpu_file);
6704                 }
6705                 break;
6706         }
6707
6708         if (ret >= 0) {
6709                 *ppos += cnt;
6710                 ret = cnt;
6711         }
6712 out:
6713         mutex_unlock(&trace_types_lock);
6714         return ret;
6715 }
6716
6717 static int tracing_snapshot_release(struct inode *inode, struct file *file)
6718 {
6719         struct seq_file *m = file->private_data;
6720         int ret;
6721
6722         ret = tracing_release(inode, file);
6723
6724         if (file->f_mode & FMODE_READ)
6725                 return ret;
6726
6727         /* If write only, the seq_file is just a stub */
6728         if (m)
6729                 kfree(m->private);
6730         kfree(m);
6731
6732         return 0;
6733 }
6734
6735 static int tracing_buffers_open(struct inode *inode, struct file *filp);
6736 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
6737                                     size_t count, loff_t *ppos);
6738 static int tracing_buffers_release(struct inode *inode, struct file *file);
6739 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6740                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
6741
6742 static int snapshot_raw_open(struct inode *inode, struct file *filp)
6743 {
6744         struct ftrace_buffer_info *info;
6745         int ret;
6746
6747         ret = tracing_buffers_open(inode, filp);
6748         if (ret < 0)
6749                 return ret;
6750
6751         info = filp->private_data;
6752
6753         if (info->iter.trace->use_max_tr) {
6754                 tracing_buffers_release(inode, filp);
6755                 return -EBUSY;
6756         }
6757
6758         info->iter.snapshot = true;
6759         info->iter.trace_buffer = &info->iter.tr->max_buffer;
6760
6761         return ret;
6762 }
6763
6764 #endif /* CONFIG_TRACER_SNAPSHOT */
6765
6766
6767 static const struct file_operations tracing_thresh_fops = {
6768         .open           = tracing_open_generic,
6769         .read           = tracing_thresh_read,
6770         .write          = tracing_thresh_write,
6771         .llseek         = generic_file_llseek,
6772 };
6773
6774 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6775 static const struct file_operations tracing_max_lat_fops = {
6776         .open           = tracing_open_generic,
6777         .read           = tracing_max_lat_read,
6778         .write          = tracing_max_lat_write,
6779         .llseek         = generic_file_llseek,
6780 };
6781 #endif
6782
6783 static const struct file_operations set_tracer_fops = {
6784         .open           = tracing_open_generic,
6785         .read           = tracing_set_trace_read,
6786         .write          = tracing_set_trace_write,
6787         .llseek         = generic_file_llseek,
6788 };
6789
6790 static const struct file_operations tracing_pipe_fops = {
6791         .open           = tracing_open_pipe,
6792         .poll           = tracing_poll_pipe,
6793         .read           = tracing_read_pipe,
6794         .splice_read    = tracing_splice_read_pipe,
6795         .release        = tracing_release_pipe,
6796         .llseek         = no_llseek,
6797 };
6798
6799 static const struct file_operations tracing_entries_fops = {
6800         .open           = tracing_open_generic_tr,
6801         .read           = tracing_entries_read,
6802         .write          = tracing_entries_write,
6803         .llseek         = generic_file_llseek,
6804         .release        = tracing_release_generic_tr,
6805 };
6806
6807 static const struct file_operations tracing_total_entries_fops = {
6808         .open           = tracing_open_generic_tr,
6809         .read           = tracing_total_entries_read,
6810         .llseek         = generic_file_llseek,
6811         .release        = tracing_release_generic_tr,
6812 };
6813
6814 static const struct file_operations tracing_free_buffer_fops = {
6815         .open           = tracing_open_generic_tr,
6816         .write          = tracing_free_buffer_write,
6817         .release        = tracing_free_buffer_release,
6818 };
6819
6820 static const struct file_operations tracing_mark_fops = {
6821         .open           = tracing_open_generic_tr,
6822         .write          = tracing_mark_write,
6823         .llseek         = generic_file_llseek,
6824         .release        = tracing_release_generic_tr,
6825 };
6826
6827 static const struct file_operations tracing_mark_raw_fops = {
6828         .open           = tracing_open_generic_tr,
6829         .write          = tracing_mark_raw_write,
6830         .llseek         = generic_file_llseek,
6831         .release        = tracing_release_generic_tr,
6832 };
6833
6834 static const struct file_operations trace_clock_fops = {
6835         .open           = tracing_clock_open,
6836         .read           = seq_read,
6837         .llseek         = seq_lseek,
6838         .release        = tracing_single_release_tr,
6839         .write          = tracing_clock_write,
6840 };
6841
6842 static const struct file_operations trace_time_stamp_mode_fops = {
6843         .open           = tracing_time_stamp_mode_open,
6844         .read           = seq_read,
6845         .llseek         = seq_lseek,
6846         .release        = tracing_single_release_tr,
6847 };
6848
6849 #ifdef CONFIG_TRACER_SNAPSHOT
6850 static const struct file_operations snapshot_fops = {
6851         .open           = tracing_snapshot_open,
6852         .read           = seq_read,
6853         .write          = tracing_snapshot_write,
6854         .llseek         = tracing_lseek,
6855         .release        = tracing_snapshot_release,
6856 };
6857
6858 static const struct file_operations snapshot_raw_fops = {
6859         .open           = snapshot_raw_open,
6860         .read           = tracing_buffers_read,
6861         .release        = tracing_buffers_release,
6862         .splice_read    = tracing_buffers_splice_read,
6863         .llseek         = no_llseek,
6864 };
6865
6866 #endif /* CONFIG_TRACER_SNAPSHOT */
6867
6868 static int tracing_buffers_open(struct inode *inode, struct file *filp)
6869 {
6870         struct trace_array *tr = inode->i_private;
6871         struct ftrace_buffer_info *info;
6872         int ret;
6873
6874         if (tracing_disabled)
6875                 return -ENODEV;
6876
6877         if (trace_array_get(tr) < 0)
6878                 return -ENODEV;
6879
6880         info = kzalloc(sizeof(*info), GFP_KERNEL);
6881         if (!info) {
6882                 trace_array_put(tr);
6883                 return -ENOMEM;
6884         }
6885
6886         mutex_lock(&trace_types_lock);
6887
6888         info->iter.tr           = tr;
6889         info->iter.cpu_file     = tracing_get_cpu(inode);
6890         info->iter.trace        = tr->current_trace;
6891         info->iter.trace_buffer = &tr->trace_buffer;
6892         info->spare             = NULL;
6893         /* Force reading ring buffer for first read */
6894         info->read              = (unsigned int)-1;
6895
6896         filp->private_data = info;
6897
6898         tr->current_trace->ref++;
6899
6900         mutex_unlock(&trace_types_lock);
6901
6902         ret = nonseekable_open(inode, filp);
6903         if (ret < 0)
6904                 trace_array_put(tr);
6905
6906         return ret;
6907 }
6908
6909 static __poll_t
6910 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
6911 {
6912         struct ftrace_buffer_info *info = filp->private_data;
6913         struct trace_iterator *iter = &info->iter;
6914
6915         return trace_poll(iter, filp, poll_table);
6916 }
6917
6918 static ssize_t
6919 tracing_buffers_read(struct file *filp, char __user *ubuf,
6920                      size_t count, loff_t *ppos)
6921 {
6922         struct ftrace_buffer_info *info = filp->private_data;
6923         struct trace_iterator *iter = &info->iter;
6924         ssize_t ret = 0;
6925         ssize_t size;
6926
6927         if (!count)
6928                 return 0;
6929
6930 #ifdef CONFIG_TRACER_MAX_TRACE
6931         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6932                 return -EBUSY;
6933 #endif
6934
6935         if (!info->spare) {
6936                 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
6937                                                           iter->cpu_file);
6938                 if (IS_ERR(info->spare)) {
6939                         ret = PTR_ERR(info->spare);
6940                         info->spare = NULL;
6941                 } else {
6942                         info->spare_cpu = iter->cpu_file;
6943                 }
6944         }
6945         if (!info->spare)
6946                 return ret;
6947
6948         /* Do we have previous read data to read? */
6949         if (info->read < PAGE_SIZE)
6950                 goto read;
6951
6952  again:
6953         trace_access_lock(iter->cpu_file);
6954         ret = ring_buffer_read_page(iter->trace_buffer->buffer,
6955                                     &info->spare,
6956                                     count,
6957                                     iter->cpu_file, 0);
6958         trace_access_unlock(iter->cpu_file);
6959
6960         if (ret < 0) {
6961                 if (trace_empty(iter)) {
6962                         if ((filp->f_flags & O_NONBLOCK))
6963                                 return -EAGAIN;
6964
6965                         ret = wait_on_pipe(iter, 0);
6966                         if (ret)
6967                                 return ret;
6968
6969                         goto again;
6970                 }
6971                 return 0;
6972         }
6973
6974         info->read = 0;
6975  read:
6976         size = PAGE_SIZE - info->read;
6977         if (size > count)
6978                 size = count;
6979
6980         ret = copy_to_user(ubuf, info->spare + info->read, size);
6981         if (ret == size)
6982                 return -EFAULT;
6983
6984         size -= ret;
6985
6986         *ppos += size;
6987         info->read += size;
6988
6989         return size;
6990 }
6991
6992 static int tracing_buffers_release(struct inode *inode, struct file *file)
6993 {
6994         struct ftrace_buffer_info *info = file->private_data;
6995         struct trace_iterator *iter = &info->iter;
6996
6997         mutex_lock(&trace_types_lock);
6998
6999         iter->tr->current_trace->ref--;
7000
7001         __trace_array_put(iter->tr);
7002
7003         if (info->spare)
7004                 ring_buffer_free_read_page(iter->trace_buffer->buffer,
7005                                            info->spare_cpu, info->spare);
7006         kfree(info);
7007
7008         mutex_unlock(&trace_types_lock);
7009
7010         return 0;
7011 }
7012
7013 struct buffer_ref {
7014         struct ring_buffer      *buffer;
7015         void                    *page;
7016         int                     cpu;
7017         int                     ref;
7018 };
7019
7020 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
7021                                     struct pipe_buffer *buf)
7022 {
7023         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7024
7025         if (--ref->ref)
7026                 return;
7027
7028         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
7029         kfree(ref);
7030         buf->private = 0;
7031 }
7032
7033 static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
7034                                 struct pipe_buffer *buf)
7035 {
7036         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7037
7038         ref->ref++;
7039 }
7040
7041 /* Pipe buffer operations for a buffer. */
7042 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
7043         .can_merge              = 0,
7044         .confirm                = generic_pipe_buf_confirm,
7045         .release                = buffer_pipe_buf_release,
7046         .steal                  = generic_pipe_buf_steal,
7047         .get                    = buffer_pipe_buf_get,
7048 };
7049
7050 /*
7051  * Callback from splice_to_pipe(), if we need to release some pages
7052  * at the end of the spd in case we error'ed out in filling the pipe.
7053  */
7054 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
7055 {
7056         struct buffer_ref *ref =
7057                 (struct buffer_ref *)spd->partial[i].private;
7058
7059         if (--ref->ref)
7060                 return;
7061
7062         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
7063         kfree(ref);
7064         spd->partial[i].private = 0;
7065 }
7066
7067 static ssize_t
7068 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7069                             struct pipe_inode_info *pipe, size_t len,
7070                             unsigned int flags)
7071 {
7072         struct ftrace_buffer_info *info = file->private_data;
7073         struct trace_iterator *iter = &info->iter;
7074         struct partial_page partial_def[PIPE_DEF_BUFFERS];
7075         struct page *pages_def[PIPE_DEF_BUFFERS];
7076         struct splice_pipe_desc spd = {
7077                 .pages          = pages_def,
7078                 .partial        = partial_def,
7079                 .nr_pages_max   = PIPE_DEF_BUFFERS,
7080                 .ops            = &buffer_pipe_buf_ops,
7081                 .spd_release    = buffer_spd_release,
7082         };
7083         struct buffer_ref *ref;
7084         int entries, i;
7085         ssize_t ret = 0;
7086
7087 #ifdef CONFIG_TRACER_MAX_TRACE
7088         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7089                 return -EBUSY;
7090 #endif
7091
7092         if (*ppos & (PAGE_SIZE - 1))
7093                 return -EINVAL;
7094
7095         if (len & (PAGE_SIZE - 1)) {
7096                 if (len < PAGE_SIZE)
7097                         return -EINVAL;
7098                 len &= PAGE_MASK;
7099         }
7100
7101         if (splice_grow_spd(pipe, &spd))
7102                 return -ENOMEM;
7103
7104  again:
7105         trace_access_lock(iter->cpu_file);
7106         entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
7107
7108         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
7109                 struct page *page;
7110                 int r;
7111
7112                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
7113                 if (!ref) {
7114                         ret = -ENOMEM;
7115                         break;
7116                 }
7117
7118                 ref->ref = 1;
7119                 ref->buffer = iter->trace_buffer->buffer;
7120                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
7121                 if (IS_ERR(ref->page)) {
7122                         ret = PTR_ERR(ref->page);
7123                         ref->page = NULL;
7124                         kfree(ref);
7125                         break;
7126                 }
7127                 ref->cpu = iter->cpu_file;
7128
7129                 r = ring_buffer_read_page(ref->buffer, &ref->page,
7130                                           len, iter->cpu_file, 1);
7131                 if (r < 0) {
7132                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
7133                                                    ref->page);
7134                         kfree(ref);
7135                         break;
7136                 }
7137
7138                 page = virt_to_page(ref->page);
7139
7140                 spd.pages[i] = page;
7141                 spd.partial[i].len = PAGE_SIZE;
7142                 spd.partial[i].offset = 0;
7143                 spd.partial[i].private = (unsigned long)ref;
7144                 spd.nr_pages++;
7145                 *ppos += PAGE_SIZE;
7146
7147                 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
7148         }
7149
7150         trace_access_unlock(iter->cpu_file);
7151         spd.nr_pages = i;
7152
7153         /* did we read anything? */
7154         if (!spd.nr_pages) {
7155                 if (ret)
7156                         goto out;
7157
7158                 ret = -EAGAIN;
7159                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
7160                         goto out;
7161
7162                 ret = wait_on_pipe(iter, iter->tr->buffer_percent);
7163                 if (ret)
7164                         goto out;
7165
7166                 goto again;
7167         }
7168
7169         ret = splice_to_pipe(pipe, &spd);
7170 out:
7171         splice_shrink_spd(&spd);
7172
7173         return ret;
7174 }
7175
7176 static const struct file_operations tracing_buffers_fops = {
7177         .open           = tracing_buffers_open,
7178         .read           = tracing_buffers_read,
7179         .poll           = tracing_buffers_poll,
7180         .release        = tracing_buffers_release,
7181         .splice_read    = tracing_buffers_splice_read,
7182         .llseek         = no_llseek,
7183 };
7184
7185 static ssize_t
7186 tracing_stats_read(struct file *filp, char __user *ubuf,
7187                    size_t count, loff_t *ppos)
7188 {
7189         struct inode *inode = file_inode(filp);
7190         struct trace_array *tr = inode->i_private;
7191         struct trace_buffer *trace_buf = &tr->trace_buffer;
7192         int cpu = tracing_get_cpu(inode);
7193         struct trace_seq *s;
7194         unsigned long cnt;
7195         unsigned long long t;
7196         unsigned long usec_rem;
7197
7198         s = kmalloc(sizeof(*s), GFP_KERNEL);
7199         if (!s)
7200                 return -ENOMEM;
7201
7202         trace_seq_init(s);
7203
7204         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
7205         trace_seq_printf(s, "entries: %ld\n", cnt);
7206
7207         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
7208         trace_seq_printf(s, "overrun: %ld\n", cnt);
7209
7210         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
7211         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
7212
7213         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
7214         trace_seq_printf(s, "bytes: %ld\n", cnt);
7215
7216         if (trace_clocks[tr->clock_id].in_ns) {
7217                 /* local or global for trace_clock */
7218                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7219                 usec_rem = do_div(t, USEC_PER_SEC);
7220                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
7221                                                                 t, usec_rem);
7222
7223                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
7224                 usec_rem = do_div(t, USEC_PER_SEC);
7225                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
7226         } else {
7227                 /* counter or tsc mode for trace_clock */
7228                 trace_seq_printf(s, "oldest event ts: %llu\n",
7229                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7230
7231                 trace_seq_printf(s, "now ts: %llu\n",
7232                                 ring_buffer_time_stamp(trace_buf->buffer, cpu));
7233         }
7234
7235         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
7236         trace_seq_printf(s, "dropped events: %ld\n", cnt);
7237
7238         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
7239         trace_seq_printf(s, "read events: %ld\n", cnt);
7240
7241         count = simple_read_from_buffer(ubuf, count, ppos,
7242                                         s->buffer, trace_seq_used(s));
7243
7244         kfree(s);
7245
7246         return count;
7247 }
7248
7249 static const struct file_operations tracing_stats_fops = {
7250         .open           = tracing_open_generic_tr,
7251         .read           = tracing_stats_read,
7252         .llseek         = generic_file_llseek,
7253         .release        = tracing_release_generic_tr,
7254 };
7255
7256 #ifdef CONFIG_DYNAMIC_FTRACE
7257
7258 static ssize_t
7259 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
7260                   size_t cnt, loff_t *ppos)
7261 {
7262         unsigned long *p = filp->private_data;
7263         char buf[64]; /* Not too big for a shallow stack */
7264         int r;
7265
7266         r = scnprintf(buf, 63, "%ld", *p);
7267         buf[r++] = '\n';
7268
7269         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7270 }
7271
7272 static const struct file_operations tracing_dyn_info_fops = {
7273         .open           = tracing_open_generic,
7274         .read           = tracing_read_dyn_info,
7275         .llseek         = generic_file_llseek,
7276 };
7277 #endif /* CONFIG_DYNAMIC_FTRACE */
7278
7279 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
7280 static void
7281 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
7282                 struct trace_array *tr, struct ftrace_probe_ops *ops,
7283                 void *data)
7284 {
7285         tracing_snapshot_instance(tr);
7286 }
7287
7288 static void
7289 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
7290                       struct trace_array *tr, struct ftrace_probe_ops *ops,
7291                       void *data)
7292 {
7293         struct ftrace_func_mapper *mapper = data;
7294         long *count = NULL;
7295
7296         if (mapper)
7297                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7298
7299         if (count) {
7300
7301                 if (*count <= 0)
7302                         return;
7303
7304                 (*count)--;
7305         }
7306
7307         tracing_snapshot_instance(tr);
7308 }
7309
7310 static int
7311 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
7312                       struct ftrace_probe_ops *ops, void *data)
7313 {
7314         struct ftrace_func_mapper *mapper = data;
7315         long *count = NULL;
7316
7317         seq_printf(m, "%ps:", (void *)ip);
7318
7319         seq_puts(m, "snapshot");
7320
7321         if (mapper)
7322                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7323
7324         if (count)
7325                 seq_printf(m, ":count=%ld\n", *count);
7326         else
7327                 seq_puts(m, ":unlimited\n");
7328
7329         return 0;
7330 }
7331
7332 static int
7333 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
7334                      unsigned long ip, void *init_data, void **data)
7335 {
7336         struct ftrace_func_mapper *mapper = *data;
7337
7338         if (!mapper) {
7339                 mapper = allocate_ftrace_func_mapper();
7340                 if (!mapper)
7341                         return -ENOMEM;
7342                 *data = mapper;
7343         }
7344
7345         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
7346 }
7347
7348 static void
7349 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
7350                      unsigned long ip, void *data)
7351 {
7352         struct ftrace_func_mapper *mapper = data;
7353
7354         if (!ip) {
7355                 if (!mapper)
7356                         return;
7357                 free_ftrace_func_mapper(mapper, NULL);
7358                 return;
7359         }
7360
7361         ftrace_func_mapper_remove_ip(mapper, ip);
7362 }
7363
7364 static struct ftrace_probe_ops snapshot_probe_ops = {
7365         .func                   = ftrace_snapshot,
7366         .print                  = ftrace_snapshot_print,
7367 };
7368
7369 static struct ftrace_probe_ops snapshot_count_probe_ops = {
7370         .func                   = ftrace_count_snapshot,
7371         .print                  = ftrace_snapshot_print,
7372         .init                   = ftrace_snapshot_init,
7373         .free                   = ftrace_snapshot_free,
7374 };
7375
7376 static int
7377 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
7378                                char *glob, char *cmd, char *param, int enable)
7379 {
7380         struct ftrace_probe_ops *ops;
7381         void *count = (void *)-1;
7382         char *number;
7383         int ret;
7384
7385         if (!tr)
7386                 return -ENODEV;
7387
7388         /* hash funcs only work with set_ftrace_filter */
7389         if (!enable)
7390                 return -EINVAL;
7391
7392         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
7393
7394         if (glob[0] == '!')
7395                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
7396
7397         if (!param)
7398                 goto out_reg;
7399
7400         number = strsep(&param, ":");
7401
7402         if (!strlen(number))
7403                 goto out_reg;
7404
7405         /*
7406          * We use the callback data field (which is a pointer)
7407          * as our counter.
7408          */
7409         ret = kstrtoul(number, 0, (unsigned long *)&count);
7410         if (ret)
7411                 return ret;
7412
7413  out_reg:
7414         ret = tracing_alloc_snapshot_instance(tr);
7415         if (ret < 0)
7416                 goto out;
7417
7418         ret = register_ftrace_function_probe(glob, tr, ops, count);
7419
7420  out:
7421         return ret < 0 ? ret : 0;
7422 }
7423
7424 static struct ftrace_func_command ftrace_snapshot_cmd = {
7425         .name                   = "snapshot",
7426         .func                   = ftrace_trace_snapshot_callback,
7427 };
7428
7429 static __init int register_snapshot_cmd(void)
7430 {
7431         return register_ftrace_command(&ftrace_snapshot_cmd);
7432 }
7433 #else
7434 static inline __init int register_snapshot_cmd(void) { return 0; }
7435 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
7436
7437 static struct dentry *tracing_get_dentry(struct trace_array *tr)
7438 {
7439         if (WARN_ON(!tr->dir))
7440                 return ERR_PTR(-ENODEV);
7441
7442         /* Top directory uses NULL as the parent */
7443         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
7444                 return NULL;
7445
7446         /* All sub buffers have a descriptor */
7447         return tr->dir;
7448 }
7449
7450 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
7451 {
7452         struct dentry *d_tracer;
7453
7454         if (tr->percpu_dir)
7455                 return tr->percpu_dir;
7456
7457         d_tracer = tracing_get_dentry(tr);
7458         if (IS_ERR(d_tracer))
7459                 return NULL;
7460
7461         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
7462
7463         WARN_ONCE(!tr->percpu_dir,
7464                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
7465
7466         return tr->percpu_dir;
7467 }
7468
7469 static struct dentry *
7470 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
7471                       void *data, long cpu, const struct file_operations *fops)
7472 {
7473         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
7474
7475         if (ret) /* See tracing_get_cpu() */
7476                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
7477         return ret;
7478 }
7479
7480 static void
7481 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
7482 {
7483         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
7484         struct dentry *d_cpu;
7485         char cpu_dir[30]; /* 30 characters should be more than enough */
7486
7487         if (!d_percpu)
7488                 return;
7489
7490         snprintf(cpu_dir, 30, "cpu%ld", cpu);
7491         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
7492         if (!d_cpu) {
7493                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
7494                 return;
7495         }
7496
7497         /* per cpu trace_pipe */
7498         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
7499                                 tr, cpu, &tracing_pipe_fops);
7500
7501         /* per cpu trace */
7502         trace_create_cpu_file("trace", 0644, d_cpu,
7503                                 tr, cpu, &tracing_fops);
7504
7505         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
7506                                 tr, cpu, &tracing_buffers_fops);
7507
7508         trace_create_cpu_file("stats", 0444, d_cpu,
7509                                 tr, cpu, &tracing_stats_fops);
7510
7511         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
7512                                 tr, cpu, &tracing_entries_fops);
7513
7514 #ifdef CONFIG_TRACER_SNAPSHOT
7515         trace_create_cpu_file("snapshot", 0644, d_cpu,
7516                                 tr, cpu, &snapshot_fops);
7517
7518         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
7519                                 tr, cpu, &snapshot_raw_fops);
7520 #endif
7521 }
7522
7523 #ifdef CONFIG_FTRACE_SELFTEST
7524 /* Let selftest have access to static functions in this file */
7525 #include "trace_selftest.c"
7526 #endif
7527
7528 static ssize_t
7529 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
7530                         loff_t *ppos)
7531 {
7532         struct trace_option_dentry *topt = filp->private_data;
7533         char *buf;
7534
7535         if (topt->flags->val & topt->opt->bit)
7536                 buf = "1\n";
7537         else
7538                 buf = "0\n";
7539
7540         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7541 }
7542
7543 static ssize_t
7544 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
7545                          loff_t *ppos)
7546 {
7547         struct trace_option_dentry *topt = filp->private_data;
7548         unsigned long val;
7549         int ret;
7550
7551         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7552         if (ret)
7553                 return ret;
7554
7555         if (val != 0 && val != 1)
7556                 return -EINVAL;
7557
7558         if (!!(topt->flags->val & topt->opt->bit) != val) {
7559                 mutex_lock(&trace_types_lock);
7560                 ret = __set_tracer_option(topt->tr, topt->flags,
7561                                           topt->opt, !val);
7562                 mutex_unlock(&trace_types_lock);
7563                 if (ret)
7564                         return ret;
7565         }
7566
7567         *ppos += cnt;
7568
7569         return cnt;
7570 }
7571
7572
7573 static const struct file_operations trace_options_fops = {
7574         .open = tracing_open_generic,
7575         .read = trace_options_read,
7576         .write = trace_options_write,
7577         .llseek = generic_file_llseek,
7578 };
7579
7580 /*
7581  * In order to pass in both the trace_array descriptor as well as the index
7582  * to the flag that the trace option file represents, the trace_array
7583  * has a character array of trace_flags_index[], which holds the index
7584  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
7585  * The address of this character array is passed to the flag option file
7586  * read/write callbacks.
7587  *
7588  * In order to extract both the index and the trace_array descriptor,
7589  * get_tr_index() uses the following algorithm.
7590  *
7591  *   idx = *ptr;
7592  *
7593  * As the pointer itself contains the address of the index (remember
7594  * index[1] == 1).
7595  *
7596  * Then to get the trace_array descriptor, by subtracting that index
7597  * from the ptr, we get to the start of the index itself.
7598  *
7599  *   ptr - idx == &index[0]
7600  *
7601  * Then a simple container_of() from that pointer gets us to the
7602  * trace_array descriptor.
7603  */
7604 static void get_tr_index(void *data, struct trace_array **ptr,
7605                          unsigned int *pindex)
7606 {
7607         *pindex = *(unsigned char *)data;
7608
7609         *ptr = container_of(data - *pindex, struct trace_array,
7610                             trace_flags_index);
7611 }
7612
7613 static ssize_t
7614 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
7615                         loff_t *ppos)
7616 {
7617         void *tr_index = filp->private_data;
7618         struct trace_array *tr;
7619         unsigned int index;
7620         char *buf;
7621
7622         get_tr_index(tr_index, &tr, &index);
7623
7624         if (tr->trace_flags & (1 << index))
7625                 buf = "1\n";
7626         else
7627                 buf = "0\n";
7628
7629         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7630 }
7631
7632 static ssize_t
7633 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
7634                          loff_t *ppos)
7635 {
7636         void *tr_index = filp->private_data;
7637         struct trace_array *tr;
7638         unsigned int index;
7639         unsigned long val;
7640         int ret;
7641
7642         get_tr_index(tr_index, &tr, &index);
7643
7644         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7645         if (ret)
7646                 return ret;
7647
7648         if (val != 0 && val != 1)
7649                 return -EINVAL;
7650
7651         mutex_lock(&trace_types_lock);
7652         ret = set_tracer_flag(tr, 1 << index, val);
7653         mutex_unlock(&trace_types_lock);
7654
7655         if (ret < 0)
7656                 return ret;
7657
7658         *ppos += cnt;
7659
7660         return cnt;
7661 }
7662
7663 static const struct file_operations trace_options_core_fops = {
7664         .open = tracing_open_generic,
7665         .read = trace_options_core_read,
7666         .write = trace_options_core_write,
7667         .llseek = generic_file_llseek,
7668 };
7669
7670 struct dentry *trace_create_file(const char *name,
7671                                  umode_t mode,
7672                                  struct dentry *parent,
7673                                  void *data,
7674                                  const struct file_operations *fops)
7675 {
7676         struct dentry *ret;
7677
7678         ret = tracefs_create_file(name, mode, parent, data, fops);
7679         if (!ret)
7680                 pr_warn("Could not create tracefs '%s' entry\n", name);
7681
7682         return ret;
7683 }
7684
7685
7686 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
7687 {
7688         struct dentry *d_tracer;
7689
7690         if (tr->options)
7691                 return tr->options;
7692
7693         d_tracer = tracing_get_dentry(tr);
7694         if (IS_ERR(d_tracer))
7695                 return NULL;
7696
7697         tr->options = tracefs_create_dir("options", d_tracer);
7698         if (!tr->options) {
7699                 pr_warn("Could not create tracefs directory 'options'\n");
7700                 return NULL;
7701         }
7702
7703         return tr->options;
7704 }
7705
7706 static void
7707 create_trace_option_file(struct trace_array *tr,
7708                          struct trace_option_dentry *topt,
7709                          struct tracer_flags *flags,
7710                          struct tracer_opt *opt)
7711 {
7712         struct dentry *t_options;
7713
7714         t_options = trace_options_init_dentry(tr);
7715         if (!t_options)
7716                 return;
7717
7718         topt->flags = flags;
7719         topt->opt = opt;
7720         topt->tr = tr;
7721
7722         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
7723                                     &trace_options_fops);
7724
7725 }
7726
7727 static void
7728 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
7729 {
7730         struct trace_option_dentry *topts;
7731         struct trace_options *tr_topts;
7732         struct tracer_flags *flags;
7733         struct tracer_opt *opts;
7734         int cnt;
7735         int i;
7736
7737         if (!tracer)
7738                 return;
7739
7740         flags = tracer->flags;
7741
7742         if (!flags || !flags->opts)
7743                 return;
7744
7745         /*
7746          * If this is an instance, only create flags for tracers
7747          * the instance may have.
7748          */
7749         if (!trace_ok_for_array(tracer, tr))
7750                 return;
7751
7752         for (i = 0; i < tr->nr_topts; i++) {
7753                 /* Make sure there's no duplicate flags. */
7754                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
7755                         return;
7756         }
7757
7758         opts = flags->opts;
7759
7760         for (cnt = 0; opts[cnt].name; cnt++)
7761                 ;
7762
7763         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
7764         if (!topts)
7765                 return;
7766
7767         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
7768                             GFP_KERNEL);
7769         if (!tr_topts) {
7770                 kfree(topts);
7771                 return;
7772         }
7773
7774         tr->topts = tr_topts;
7775         tr->topts[tr->nr_topts].tracer = tracer;
7776         tr->topts[tr->nr_topts].topts = topts;
7777         tr->nr_topts++;
7778
7779         for (cnt = 0; opts[cnt].name; cnt++) {
7780                 create_trace_option_file(tr, &topts[cnt], flags,
7781                                          &opts[cnt]);
7782                 WARN_ONCE(topts[cnt].entry == NULL,
7783                           "Failed to create trace option: %s",
7784                           opts[cnt].name);
7785         }
7786 }
7787
7788 static struct dentry *
7789 create_trace_option_core_file(struct trace_array *tr,
7790                               const char *option, long index)
7791 {
7792         struct dentry *t_options;
7793
7794         t_options = trace_options_init_dentry(tr);
7795         if (!t_options)
7796                 return NULL;
7797
7798         return trace_create_file(option, 0644, t_options,
7799                                  (void *)&tr->trace_flags_index[index],
7800                                  &trace_options_core_fops);
7801 }
7802
7803 static void create_trace_options_dir(struct trace_array *tr)
7804 {
7805         struct dentry *t_options;
7806         bool top_level = tr == &global_trace;
7807         int i;
7808
7809         t_options = trace_options_init_dentry(tr);
7810         if (!t_options)
7811                 return;
7812
7813         for (i = 0; trace_options[i]; i++) {
7814                 if (top_level ||
7815                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
7816                         create_trace_option_core_file(tr, trace_options[i], i);
7817         }
7818 }
7819
7820 static ssize_t
7821 rb_simple_read(struct file *filp, char __user *ubuf,
7822                size_t cnt, loff_t *ppos)
7823 {
7824         struct trace_array *tr = filp->private_data;
7825         char buf[64];
7826         int r;
7827
7828         r = tracer_tracing_is_on(tr);
7829         r = sprintf(buf, "%d\n", r);
7830
7831         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7832 }
7833
7834 static ssize_t
7835 rb_simple_write(struct file *filp, const char __user *ubuf,
7836                 size_t cnt, loff_t *ppos)
7837 {
7838         struct trace_array *tr = filp->private_data;
7839         struct ring_buffer *buffer = tr->trace_buffer.buffer;
7840         unsigned long val;
7841         int ret;
7842
7843         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7844         if (ret)
7845                 return ret;
7846
7847         if (buffer) {
7848                 mutex_lock(&trace_types_lock);
7849                 if (!!val == tracer_tracing_is_on(tr)) {
7850                         val = 0; /* do nothing */
7851                 } else if (val) {
7852                         tracer_tracing_on(tr);
7853                         if (tr->current_trace->start)
7854                                 tr->current_trace->start(tr);
7855                 } else {
7856                         tracer_tracing_off(tr);
7857                         if (tr->current_trace->stop)
7858                                 tr->current_trace->stop(tr);
7859                 }
7860                 mutex_unlock(&trace_types_lock);
7861         }
7862
7863         (*ppos)++;
7864
7865         return cnt;
7866 }
7867
7868 static const struct file_operations rb_simple_fops = {
7869         .open           = tracing_open_generic_tr,
7870         .read           = rb_simple_read,
7871         .write          = rb_simple_write,
7872         .release        = tracing_release_generic_tr,
7873         .llseek         = default_llseek,
7874 };
7875
7876 static ssize_t
7877 buffer_percent_read(struct file *filp, char __user *ubuf,
7878                     size_t cnt, loff_t *ppos)
7879 {
7880         struct trace_array *tr = filp->private_data;
7881         char buf[64];
7882         int r;
7883
7884         r = tr->buffer_percent;
7885         r = sprintf(buf, "%d\n", r);
7886
7887         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7888 }
7889
7890 static ssize_t
7891 buffer_percent_write(struct file *filp, const char __user *ubuf,
7892                      size_t cnt, loff_t *ppos)
7893 {
7894         struct trace_array *tr = filp->private_data;
7895         unsigned long val;
7896         int ret;
7897
7898         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7899         if (ret)
7900                 return ret;
7901
7902         if (val > 100)
7903                 return -EINVAL;
7904
7905         if (!val)
7906                 val = 1;
7907
7908         tr->buffer_percent = val;
7909
7910         (*ppos)++;
7911
7912         return cnt;
7913 }
7914
7915 static const struct file_operations buffer_percent_fops = {
7916         .open           = tracing_open_generic_tr,
7917         .read           = buffer_percent_read,
7918         .write          = buffer_percent_write,
7919         .release        = tracing_release_generic_tr,
7920         .llseek         = default_llseek,
7921 };
7922
7923 struct dentry *trace_instance_dir;
7924
7925 static void
7926 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
7927
7928 static int
7929 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
7930 {
7931         enum ring_buffer_flags rb_flags;
7932
7933         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
7934
7935         buf->tr = tr;
7936
7937         buf->buffer = ring_buffer_alloc(size, rb_flags);
7938         if (!buf->buffer)
7939                 return -ENOMEM;
7940
7941         buf->data = alloc_percpu(struct trace_array_cpu);
7942         if (!buf->data) {
7943                 ring_buffer_free(buf->buffer);
7944                 buf->buffer = NULL;
7945                 return -ENOMEM;
7946         }
7947
7948         /* Allocate the first page for all buffers */
7949         set_buffer_entries(&tr->trace_buffer,
7950                            ring_buffer_size(tr->trace_buffer.buffer, 0));
7951
7952         return 0;
7953 }
7954
7955 static int allocate_trace_buffers(struct trace_array *tr, int size)
7956 {
7957         int ret;
7958
7959         ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
7960         if (ret)
7961                 return ret;
7962
7963 #ifdef CONFIG_TRACER_MAX_TRACE
7964         ret = allocate_trace_buffer(tr, &tr->max_buffer,
7965                                     allocate_snapshot ? size : 1);
7966         if (WARN_ON(ret)) {
7967                 ring_buffer_free(tr->trace_buffer.buffer);
7968                 tr->trace_buffer.buffer = NULL;
7969                 free_percpu(tr->trace_buffer.data);
7970                 tr->trace_buffer.data = NULL;
7971                 return -ENOMEM;
7972         }
7973         tr->allocated_snapshot = allocate_snapshot;
7974
7975         /*
7976          * Only the top level trace array gets its snapshot allocated
7977          * from the kernel command line.
7978          */
7979         allocate_snapshot = false;
7980 #endif
7981         return 0;
7982 }
7983
7984 static void free_trace_buffer(struct trace_buffer *buf)
7985 {
7986         if (buf->buffer) {
7987                 ring_buffer_free(buf->buffer);
7988                 buf->buffer = NULL;
7989                 free_percpu(buf->data);
7990                 buf->data = NULL;
7991         }
7992 }
7993
7994 static void free_trace_buffers(struct trace_array *tr)
7995 {
7996         if (!tr)
7997                 return;
7998
7999         free_trace_buffer(&tr->trace_buffer);
8000
8001 #ifdef CONFIG_TRACER_MAX_TRACE
8002         free_trace_buffer(&tr->max_buffer);
8003 #endif
8004 }
8005
8006 static void init_trace_flags_index(struct trace_array *tr)
8007 {
8008         int i;
8009
8010         /* Used by the trace options files */
8011         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
8012                 tr->trace_flags_index[i] = i;
8013 }
8014
8015 static void __update_tracer_options(struct trace_array *tr)
8016 {
8017         struct tracer *t;
8018
8019         for (t = trace_types; t; t = t->next)
8020                 add_tracer_options(tr, t);
8021 }
8022
8023 static void update_tracer_options(struct trace_array *tr)
8024 {
8025         mutex_lock(&trace_types_lock);
8026         __update_tracer_options(tr);
8027         mutex_unlock(&trace_types_lock);
8028 }
8029
8030 static int instance_mkdir(const char *name)
8031 {
8032         struct trace_array *tr;
8033         int ret;
8034
8035         mutex_lock(&event_mutex);
8036         mutex_lock(&trace_types_lock);
8037
8038         ret = -EEXIST;
8039         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8040                 if (tr->name && strcmp(tr->name, name) == 0)
8041                         goto out_unlock;
8042         }
8043
8044         ret = -ENOMEM;
8045         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
8046         if (!tr)
8047                 goto out_unlock;
8048
8049         tr->name = kstrdup(name, GFP_KERNEL);
8050         if (!tr->name)
8051                 goto out_free_tr;
8052
8053         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
8054                 goto out_free_tr;
8055
8056         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
8057
8058         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
8059
8060         raw_spin_lock_init(&tr->start_lock);
8061
8062         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8063
8064         tr->current_trace = &nop_trace;
8065
8066         INIT_LIST_HEAD(&tr->systems);
8067         INIT_LIST_HEAD(&tr->events);
8068         INIT_LIST_HEAD(&tr->hist_vars);
8069
8070         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
8071                 goto out_free_tr;
8072
8073         tr->dir = tracefs_create_dir(name, trace_instance_dir);
8074         if (!tr->dir)
8075                 goto out_free_tr;
8076
8077         ret = event_trace_add_tracer(tr->dir, tr);
8078         if (ret) {
8079                 tracefs_remove_recursive(tr->dir);
8080                 goto out_free_tr;
8081         }
8082
8083         ftrace_init_trace_array(tr);
8084
8085         init_tracer_tracefs(tr, tr->dir);
8086         init_trace_flags_index(tr);
8087         __update_tracer_options(tr);
8088
8089         list_add(&tr->list, &ftrace_trace_arrays);
8090
8091         mutex_unlock(&trace_types_lock);
8092         mutex_unlock(&event_mutex);
8093
8094         return 0;
8095
8096  out_free_tr:
8097         free_trace_buffers(tr);
8098         free_cpumask_var(tr->tracing_cpumask);
8099         kfree(tr->name);
8100         kfree(tr);
8101
8102  out_unlock:
8103         mutex_unlock(&trace_types_lock);
8104         mutex_unlock(&event_mutex);
8105
8106         return ret;
8107
8108 }
8109
8110 static int instance_rmdir(const char *name)
8111 {
8112         struct trace_array *tr;
8113         int found = 0;
8114         int ret;
8115         int i;
8116
8117         mutex_lock(&event_mutex);
8118         mutex_lock(&trace_types_lock);
8119
8120         ret = -ENODEV;
8121         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8122                 if (tr->name && strcmp(tr->name, name) == 0) {
8123                         found = 1;
8124                         break;
8125                 }
8126         }
8127         if (!found)
8128                 goto out_unlock;
8129
8130         ret = -EBUSY;
8131         if (tr->ref || (tr->current_trace && tr->current_trace->ref))
8132                 goto out_unlock;
8133
8134         list_del(&tr->list);
8135
8136         /* Disable all the flags that were enabled coming in */
8137         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
8138                 if ((1 << i) & ZEROED_TRACE_FLAGS)
8139                         set_tracer_flag(tr, 1 << i, 0);
8140         }
8141
8142         tracing_set_nop(tr);
8143         clear_ftrace_function_probes(tr);
8144         event_trace_del_tracer(tr);
8145         ftrace_clear_pids(tr);
8146         ftrace_destroy_function_files(tr);
8147         tracefs_remove_recursive(tr->dir);
8148         free_trace_buffers(tr);
8149
8150         for (i = 0; i < tr->nr_topts; i++) {
8151                 kfree(tr->topts[i].topts);
8152         }
8153         kfree(tr->topts);
8154
8155         free_cpumask_var(tr->tracing_cpumask);
8156         kfree(tr->name);
8157         kfree(tr);
8158
8159         ret = 0;
8160
8161  out_unlock:
8162         mutex_unlock(&trace_types_lock);
8163         mutex_unlock(&event_mutex);
8164
8165         return ret;
8166 }
8167
8168 static __init void create_trace_instances(struct dentry *d_tracer)
8169 {
8170         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
8171                                                          instance_mkdir,
8172                                                          instance_rmdir);
8173         if (WARN_ON(!trace_instance_dir))
8174                 return;
8175 }
8176
8177 static void
8178 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
8179 {
8180         struct trace_event_file *file;
8181         int cpu;
8182
8183         trace_create_file("available_tracers", 0444, d_tracer,
8184                         tr, &show_traces_fops);
8185
8186         trace_create_file("current_tracer", 0644, d_tracer,
8187                         tr, &set_tracer_fops);
8188
8189         trace_create_file("tracing_cpumask", 0644, d_tracer,
8190                           tr, &tracing_cpumask_fops);
8191
8192         trace_create_file("trace_options", 0644, d_tracer,
8193                           tr, &tracing_iter_fops);
8194
8195         trace_create_file("trace", 0644, d_tracer,
8196                           tr, &tracing_fops);
8197
8198         trace_create_file("trace_pipe", 0444, d_tracer,
8199                           tr, &tracing_pipe_fops);
8200
8201         trace_create_file("buffer_size_kb", 0644, d_tracer,
8202                           tr, &tracing_entries_fops);
8203
8204         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
8205                           tr, &tracing_total_entries_fops);
8206
8207         trace_create_file("free_buffer", 0200, d_tracer,
8208                           tr, &tracing_free_buffer_fops);
8209
8210         trace_create_file("trace_marker", 0220, d_tracer,
8211                           tr, &tracing_mark_fops);
8212
8213         file = __find_event_file(tr, "ftrace", "print");
8214         if (file && file->dir)
8215                 trace_create_file("trigger", 0644, file->dir, file,
8216                                   &event_trigger_fops);
8217         tr->trace_marker_file = file;
8218
8219         trace_create_file("trace_marker_raw", 0220, d_tracer,
8220                           tr, &tracing_mark_raw_fops);
8221
8222         trace_create_file("trace_clock", 0644, d_tracer, tr,
8223                           &trace_clock_fops);
8224
8225         trace_create_file("tracing_on", 0644, d_tracer,
8226                           tr, &rb_simple_fops);
8227
8228         trace_create_file("timestamp_mode", 0444, d_tracer, tr,
8229                           &trace_time_stamp_mode_fops);
8230
8231         tr->buffer_percent = 50;
8232
8233         trace_create_file("buffer_percent", 0444, d_tracer,
8234                         tr, &buffer_percent_fops);
8235
8236         create_trace_options_dir(tr);
8237
8238 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
8239         trace_create_file("tracing_max_latency", 0644, d_tracer,
8240                         &tr->max_latency, &tracing_max_lat_fops);
8241 #endif
8242
8243         if (ftrace_create_function_files(tr, d_tracer))
8244                 WARN(1, "Could not allocate function filter files");
8245
8246 #ifdef CONFIG_TRACER_SNAPSHOT
8247         trace_create_file("snapshot", 0644, d_tracer,
8248                           tr, &snapshot_fops);
8249 #endif
8250
8251         for_each_tracing_cpu(cpu)
8252                 tracing_init_tracefs_percpu(tr, cpu);
8253
8254         ftrace_init_tracefs(tr, d_tracer);
8255 }
8256
8257 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
8258 {
8259         struct vfsmount *mnt;
8260         struct file_system_type *type;
8261
8262         /*
8263          * To maintain backward compatibility for tools that mount
8264          * debugfs to get to the tracing facility, tracefs is automatically
8265          * mounted to the debugfs/tracing directory.
8266          */
8267         type = get_fs_type("tracefs");
8268         if (!type)
8269                 return NULL;
8270         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
8271         put_filesystem(type);
8272         if (IS_ERR(mnt))
8273                 return NULL;
8274         mntget(mnt);
8275
8276         return mnt;
8277 }
8278
8279 /**
8280  * tracing_init_dentry - initialize top level trace array
8281  *
8282  * This is called when creating files or directories in the tracing
8283  * directory. It is called via fs_initcall() by any of the boot up code
8284  * and expects to return the dentry of the top level tracing directory.
8285  */
8286 struct dentry *tracing_init_dentry(void)
8287 {
8288         struct trace_array *tr = &global_trace;
8289
8290         /* The top level trace array uses  NULL as parent */
8291         if (tr->dir)
8292                 return NULL;
8293
8294         if (WARN_ON(!tracefs_initialized()) ||
8295                 (IS_ENABLED(CONFIG_DEBUG_FS) &&
8296                  WARN_ON(!debugfs_initialized())))
8297                 return ERR_PTR(-ENODEV);
8298
8299         /*
8300          * As there may still be users that expect the tracing
8301          * files to exist in debugfs/tracing, we must automount
8302          * the tracefs file system there, so older tools still
8303          * work with the newer kerenl.
8304          */
8305         tr->dir = debugfs_create_automount("tracing", NULL,
8306                                            trace_automount, NULL);
8307         if (!tr->dir) {
8308                 pr_warn_once("Could not create debugfs directory 'tracing'\n");
8309                 return ERR_PTR(-ENOMEM);
8310         }
8311
8312         return NULL;
8313 }
8314
8315 extern struct trace_eval_map *__start_ftrace_eval_maps[];
8316 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
8317
8318 static void __init trace_eval_init(void)
8319 {
8320         int len;
8321
8322         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
8323         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
8324 }
8325
8326 #ifdef CONFIG_MODULES
8327 static void trace_module_add_evals(struct module *mod)
8328 {
8329         if (!mod->num_trace_evals)
8330                 return;
8331
8332         /*
8333          * Modules with bad taint do not have events created, do
8334          * not bother with enums either.
8335          */
8336         if (trace_module_has_bad_taint(mod))
8337                 return;
8338
8339         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
8340 }
8341
8342 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
8343 static void trace_module_remove_evals(struct module *mod)
8344 {
8345         union trace_eval_map_item *map;
8346         union trace_eval_map_item **last = &trace_eval_maps;
8347
8348         if (!mod->num_trace_evals)
8349                 return;
8350
8351         mutex_lock(&trace_eval_mutex);
8352
8353         map = trace_eval_maps;
8354
8355         while (map) {
8356                 if (map->head.mod == mod)
8357                         break;
8358                 map = trace_eval_jmp_to_tail(map);
8359                 last = &map->tail.next;
8360                 map = map->tail.next;
8361         }
8362         if (!map)
8363                 goto out;
8364
8365         *last = trace_eval_jmp_to_tail(map)->tail.next;
8366         kfree(map);
8367  out:
8368         mutex_unlock(&trace_eval_mutex);
8369 }
8370 #else
8371 static inline void trace_module_remove_evals(struct module *mod) { }
8372 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
8373
8374 static int trace_module_notify(struct notifier_block *self,
8375                                unsigned long val, void *data)
8376 {
8377         struct module *mod = data;
8378
8379         switch (val) {
8380         case MODULE_STATE_COMING:
8381                 trace_module_add_evals(mod);
8382                 break;
8383         case MODULE_STATE_GOING:
8384                 trace_module_remove_evals(mod);
8385                 break;
8386         }
8387
8388         return 0;
8389 }
8390
8391 static struct notifier_block trace_module_nb = {
8392         .notifier_call = trace_module_notify,
8393         .priority = 0,
8394 };
8395 #endif /* CONFIG_MODULES */
8396
8397 static __init int tracer_init_tracefs(void)
8398 {
8399         struct dentry *d_tracer;
8400
8401         trace_access_lock_init();
8402
8403         d_tracer = tracing_init_dentry();
8404         if (IS_ERR(d_tracer))
8405                 return 0;
8406
8407         event_trace_init();
8408
8409         init_tracer_tracefs(&global_trace, d_tracer);
8410         ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
8411
8412         trace_create_file("tracing_thresh", 0644, d_tracer,
8413                         &global_trace, &tracing_thresh_fops);
8414
8415         trace_create_file("README", 0444, d_tracer,
8416                         NULL, &tracing_readme_fops);
8417
8418         trace_create_file("saved_cmdlines", 0444, d_tracer,
8419                         NULL, &tracing_saved_cmdlines_fops);
8420
8421         trace_create_file("saved_cmdlines_size", 0644, d_tracer,
8422                           NULL, &tracing_saved_cmdlines_size_fops);
8423
8424         trace_create_file("saved_tgids", 0444, d_tracer,
8425                         NULL, &tracing_saved_tgids_fops);
8426
8427         trace_eval_init();
8428
8429         trace_create_eval_file(d_tracer);
8430
8431 #ifdef CONFIG_MODULES
8432         register_module_notifier(&trace_module_nb);
8433 #endif
8434
8435 #ifdef CONFIG_DYNAMIC_FTRACE
8436         trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
8437                         &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
8438 #endif
8439
8440         create_trace_instances(d_tracer);
8441
8442         update_tracer_options(&global_trace);
8443
8444         return 0;
8445 }
8446
8447 static int trace_panic_handler(struct notifier_block *this,
8448                                unsigned long event, void *unused)
8449 {
8450         if (ftrace_dump_on_oops)
8451                 ftrace_dump(ftrace_dump_on_oops);
8452         return NOTIFY_OK;
8453 }
8454
8455 static struct notifier_block trace_panic_notifier = {
8456         .notifier_call  = trace_panic_handler,
8457         .next           = NULL,
8458         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
8459 };
8460
8461 static int trace_die_handler(struct notifier_block *self,
8462                              unsigned long val,
8463                              void *data)
8464 {
8465         switch (val) {
8466         case DIE_OOPS:
8467                 if (ftrace_dump_on_oops)
8468                         ftrace_dump(ftrace_dump_on_oops);
8469                 break;
8470         default:
8471                 break;
8472         }
8473         return NOTIFY_OK;
8474 }
8475
8476 static struct notifier_block trace_die_notifier = {
8477         .notifier_call = trace_die_handler,
8478         .priority = 200
8479 };
8480
8481 /*
8482  * printk is set to max of 1024, we really don't need it that big.
8483  * Nothing should be printing 1000 characters anyway.
8484  */
8485 #define TRACE_MAX_PRINT         1000
8486
8487 /*
8488  * Define here KERN_TRACE so that we have one place to modify
8489  * it if we decide to change what log level the ftrace dump
8490  * should be at.
8491  */
8492 #define KERN_TRACE              KERN_EMERG
8493
8494 void
8495 trace_printk_seq(struct trace_seq *s)
8496 {
8497         /* Probably should print a warning here. */
8498         if (s->seq.len >= TRACE_MAX_PRINT)
8499                 s->seq.len = TRACE_MAX_PRINT;
8500
8501         /*
8502          * More paranoid code. Although the buffer size is set to
8503          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
8504          * an extra layer of protection.
8505          */
8506         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
8507                 s->seq.len = s->seq.size - 1;
8508
8509         /* should be zero ended, but we are paranoid. */
8510         s->buffer[s->seq.len] = 0;
8511
8512         printk(KERN_TRACE "%s", s->buffer);
8513
8514         trace_seq_init(s);
8515 }
8516
8517 void trace_init_global_iter(struct trace_iterator *iter)
8518 {
8519         iter->tr = &global_trace;
8520         iter->trace = iter->tr->current_trace;
8521         iter->cpu_file = RING_BUFFER_ALL_CPUS;
8522         iter->trace_buffer = &global_trace.trace_buffer;
8523
8524         if (iter->trace && iter->trace->open)
8525                 iter->trace->open(iter);
8526
8527         /* Annotate start of buffers if we had overruns */
8528         if (ring_buffer_overruns(iter->trace_buffer->buffer))
8529                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
8530
8531         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
8532         if (trace_clocks[iter->tr->clock_id].in_ns)
8533                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
8534 }
8535
8536 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
8537 {
8538         /* use static because iter can be a bit big for the stack */
8539         static struct trace_iterator iter;
8540         static atomic_t dump_running;
8541         struct trace_array *tr = &global_trace;
8542         unsigned int old_userobj;
8543         unsigned long flags;
8544         int cnt = 0, cpu;
8545
8546         /* Only allow one dump user at a time. */
8547         if (atomic_inc_return(&dump_running) != 1) {
8548                 atomic_dec(&dump_running);
8549                 return;
8550         }
8551
8552         /*
8553          * Always turn off tracing when we dump.
8554          * We don't need to show trace output of what happens
8555          * between multiple crashes.
8556          *
8557          * If the user does a sysrq-z, then they can re-enable
8558          * tracing with echo 1 > tracing_on.
8559          */
8560         tracing_off();
8561
8562         local_irq_save(flags);
8563         printk_nmi_direct_enter();
8564
8565         /* Simulate the iterator */
8566         trace_init_global_iter(&iter);
8567
8568         for_each_tracing_cpu(cpu) {
8569                 atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8570         }
8571
8572         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
8573
8574         /* don't look at user memory in panic mode */
8575         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
8576
8577         switch (oops_dump_mode) {
8578         case DUMP_ALL:
8579                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
8580                 break;
8581         case DUMP_ORIG:
8582                 iter.cpu_file = raw_smp_processor_id();
8583                 break;
8584         case DUMP_NONE:
8585                 goto out_enable;
8586         default:
8587                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
8588                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
8589         }
8590
8591         printk(KERN_TRACE "Dumping ftrace buffer:\n");
8592
8593         /* Did function tracer already get disabled? */
8594         if (ftrace_is_dead()) {
8595                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
8596                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
8597         }
8598
8599         /*
8600          * We need to stop all tracing on all CPUS to read the
8601          * the next buffer. This is a bit expensive, but is
8602          * not done often. We fill all what we can read,
8603          * and then release the locks again.
8604          */
8605
8606         while (!trace_empty(&iter)) {
8607
8608                 if (!cnt)
8609                         printk(KERN_TRACE "---------------------------------\n");
8610
8611                 cnt++;
8612
8613                 /* reset all but tr, trace, and overruns */
8614                 memset(&iter.seq, 0,
8615                        sizeof(struct trace_iterator) -
8616                        offsetof(struct trace_iterator, seq));
8617                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
8618                 iter.pos = -1;
8619
8620                 if (trace_find_next_entry_inc(&iter) != NULL) {
8621                         int ret;
8622
8623                         ret = print_trace_line(&iter);
8624                         if (ret != TRACE_TYPE_NO_CONSUME)
8625                                 trace_consume(&iter);
8626                 }
8627                 touch_nmi_watchdog();
8628
8629                 trace_printk_seq(&iter.seq);
8630         }
8631
8632         if (!cnt)
8633                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
8634         else
8635                 printk(KERN_TRACE "---------------------------------\n");
8636
8637  out_enable:
8638         tr->trace_flags |= old_userobj;
8639
8640         for_each_tracing_cpu(cpu) {
8641                 atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8642         }
8643         atomic_dec(&dump_running);
8644         printk_nmi_direct_exit();
8645         local_irq_restore(flags);
8646 }
8647 EXPORT_SYMBOL_GPL(ftrace_dump);
8648
8649 int trace_run_command(const char *buf, int (*createfn)(int, char **))
8650 {
8651         char **argv;
8652         int argc, ret;
8653
8654         argc = 0;
8655         ret = 0;
8656         argv = argv_split(GFP_KERNEL, buf, &argc);
8657         if (!argv)
8658                 return -ENOMEM;
8659
8660         if (argc)
8661                 ret = createfn(argc, argv);
8662
8663         argv_free(argv);
8664
8665         return ret;
8666 }
8667
8668 #define WRITE_BUFSIZE  4096
8669
8670 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
8671                                 size_t count, loff_t *ppos,
8672                                 int (*createfn)(int, char **))
8673 {
8674         char *kbuf, *buf, *tmp;
8675         int ret = 0;
8676         size_t done = 0;
8677         size_t size;
8678
8679         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
8680         if (!kbuf)
8681                 return -ENOMEM;
8682
8683         while (done < count) {
8684                 size = count - done;
8685
8686                 if (size >= WRITE_BUFSIZE)
8687                         size = WRITE_BUFSIZE - 1;
8688
8689                 if (copy_from_user(kbuf, buffer + done, size)) {
8690                         ret = -EFAULT;
8691                         goto out;
8692                 }
8693                 kbuf[size] = '\0';
8694                 buf = kbuf;
8695                 do {
8696                         tmp = strchr(buf, '\n');
8697                         if (tmp) {
8698                                 *tmp = '\0';
8699                                 size = tmp - buf + 1;
8700                         } else {
8701                                 size = strlen(buf);
8702                                 if (done + size < count) {
8703                                         if (buf != kbuf)
8704                                                 break;
8705                                         /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
8706                                         pr_warn("Line length is too long: Should be less than %d\n",
8707                                                 WRITE_BUFSIZE - 2);
8708                                         ret = -EINVAL;
8709                                         goto out;
8710                                 }
8711                         }
8712                         done += size;
8713
8714                         /* Remove comments */
8715                         tmp = strchr(buf, '#');
8716
8717                         if (tmp)
8718                                 *tmp = '\0';
8719
8720                         ret = trace_run_command(buf, createfn);
8721                         if (ret)
8722                                 goto out;
8723                         buf += size;
8724
8725                 } while (done < count);
8726         }
8727         ret = done;
8728
8729 out:
8730         kfree(kbuf);
8731
8732         return ret;
8733 }
8734
8735 __init static int tracer_alloc_buffers(void)
8736 {
8737         int ring_buf_size;
8738         int ret = -ENOMEM;
8739
8740         /*
8741          * Make sure we don't accidently add more trace options
8742          * than we have bits for.
8743          */
8744         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
8745
8746         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
8747                 goto out;
8748
8749         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
8750                 goto out_free_buffer_mask;
8751
8752         /* Only allocate trace_printk buffers if a trace_printk exists */
8753         if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
8754                 /* Must be called before global_trace.buffer is allocated */
8755                 trace_printk_init_buffers();
8756
8757         /* To save memory, keep the ring buffer size to its minimum */
8758         if (ring_buffer_expanded)
8759                 ring_buf_size = trace_buf_size;
8760         else
8761                 ring_buf_size = 1;
8762
8763         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
8764         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
8765
8766         raw_spin_lock_init(&global_trace.start_lock);
8767
8768         /*
8769          * The prepare callbacks allocates some memory for the ring buffer. We
8770          * don't free the buffer if the if the CPU goes down. If we were to free
8771          * the buffer, then the user would lose any trace that was in the
8772          * buffer. The memory will be removed once the "instance" is removed.
8773          */
8774         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
8775                                       "trace/RB:preapre", trace_rb_cpu_prepare,
8776                                       NULL);
8777         if (ret < 0)
8778                 goto out_free_cpumask;
8779         /* Used for event triggers */
8780         ret = -ENOMEM;
8781         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
8782         if (!temp_buffer)
8783                 goto out_rm_hp_state;
8784
8785         if (trace_create_savedcmd() < 0)
8786                 goto out_free_temp_buffer;
8787
8788         /* TODO: make the number of buffers hot pluggable with CPUS */
8789         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
8790                 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
8791                 WARN_ON(1);
8792                 goto out_free_savedcmd;
8793         }
8794
8795         if (global_trace.buffer_disabled)
8796                 tracing_off();
8797
8798         if (trace_boot_clock) {
8799                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
8800                 if (ret < 0)
8801                         pr_warn("Trace clock %s not defined, going back to default\n",
8802                                 trace_boot_clock);
8803         }
8804
8805         /*
8806          * register_tracer() might reference current_trace, so it
8807          * needs to be set before we register anything. This is
8808          * just a bootstrap of current_trace anyway.
8809          */
8810         global_trace.current_trace = &nop_trace;
8811
8812         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8813
8814         ftrace_init_global_array_ops(&global_trace);
8815
8816         init_trace_flags_index(&global_trace);
8817
8818         register_tracer(&nop_trace);
8819
8820         /* Function tracing may start here (via kernel command line) */
8821         init_function_trace();
8822
8823         /* All seems OK, enable tracing */
8824         tracing_disabled = 0;
8825
8826         atomic_notifier_chain_register(&panic_notifier_list,
8827                                        &trace_panic_notifier);
8828
8829         register_die_notifier(&trace_die_notifier);
8830
8831         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
8832
8833         INIT_LIST_HEAD(&global_trace.systems);
8834         INIT_LIST_HEAD(&global_trace.events);
8835         INIT_LIST_HEAD(&global_trace.hist_vars);
8836         list_add(&global_trace.list, &ftrace_trace_arrays);
8837
8838         apply_trace_boot_options();
8839
8840         register_snapshot_cmd();
8841
8842         return 0;
8843
8844 out_free_savedcmd:
8845         free_saved_cmdlines_buffer(savedcmd);
8846 out_free_temp_buffer:
8847         ring_buffer_free(temp_buffer);
8848 out_rm_hp_state:
8849         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
8850 out_free_cpumask:
8851         free_cpumask_var(global_trace.tracing_cpumask);
8852 out_free_buffer_mask:
8853         free_cpumask_var(tracing_buffer_mask);
8854 out:
8855         return ret;
8856 }
8857
8858 void __init early_trace_init(void)
8859 {
8860         if (tracepoint_printk) {
8861                 tracepoint_print_iter =
8862                         kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
8863                 if (WARN_ON(!tracepoint_print_iter))
8864                         tracepoint_printk = 0;
8865                 else
8866                         static_key_enable(&tracepoint_printk_key.key);
8867         }
8868         tracer_alloc_buffers();
8869 }
8870
8871 void __init trace_init(void)
8872 {
8873         trace_event_init();
8874 }
8875
8876 __init static int clear_boot_tracer(void)
8877 {
8878         /*
8879          * The default tracer at boot buffer is an init section.
8880          * This function is called in lateinit. If we did not
8881          * find the boot tracer, then clear it out, to prevent
8882          * later registration from accessing the buffer that is
8883          * about to be freed.
8884          */
8885         if (!default_bootup_tracer)
8886                 return 0;
8887
8888         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
8889                default_bootup_tracer);
8890         default_bootup_tracer = NULL;
8891
8892         return 0;
8893 }
8894
8895 fs_initcall(tracer_init_tracefs);
8896 late_initcall_sync(clear_boot_tracer);
8897
8898 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
8899 __init static int tracing_set_default_clock(void)
8900 {
8901         /* sched_clock_stable() is determined in late_initcall */
8902         if (!trace_boot_clock && !sched_clock_stable()) {
8903                 printk(KERN_WARNING
8904                        "Unstable clock detected, switching default tracing clock to \"global\"\n"
8905                        "If you want to keep using the local clock, then add:\n"
8906                        "  \"trace_clock=local\"\n"
8907                        "on the kernel command line\n");
8908                 tracing_set_clock(&global_trace, "global");
8909         }
8910
8911         return 0;
8912 }
8913 late_initcall_sync(tracing_set_default_clock);
8914 #endif