]> asedeno.scripts.mit.edu Git - linux.git/blob - kernel/trace/trace.c
tracing: Add trace_total_entries() / trace_total_entries_cpu()
[linux.git] / kernel / trace / trace.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/seq_file.h>
21 #include <linux/notifier.h>
22 #include <linux/irqflags.h>
23 #include <linux/debugfs.h>
24 #include <linux/tracefs.h>
25 #include <linux/pagemap.h>
26 #include <linux/hardirq.h>
27 #include <linux/linkage.h>
28 #include <linux/uaccess.h>
29 #include <linux/vmalloc.h>
30 #include <linux/ftrace.h>
31 #include <linux/module.h>
32 #include <linux/percpu.h>
33 #include <linux/splice.h>
34 #include <linux/kdebug.h>
35 #include <linux/string.h>
36 #include <linux/mount.h>
37 #include <linux/rwsem.h>
38 #include <linux/slab.h>
39 #include <linux/ctype.h>
40 #include <linux/init.h>
41 #include <linux/poll.h>
42 #include <linux/nmi.h>
43 #include <linux/fs.h>
44 #include <linux/trace.h>
45 #include <linux/sched/clock.h>
46 #include <linux/sched/rt.h>
47
48 #include "trace.h"
49 #include "trace_output.h"
50
51 /*
52  * On boot up, the ring buffer is set to the minimum size, so that
53  * we do not waste memory on systems that are not using tracing.
54  */
55 bool ring_buffer_expanded;
56
57 /*
58  * We need to change this state when a selftest is running.
59  * A selftest will lurk into the ring-buffer to count the
60  * entries inserted during the selftest although some concurrent
61  * insertions into the ring-buffer such as trace_printk could occurred
62  * at the same time, giving false positive or negative results.
63  */
64 static bool __read_mostly tracing_selftest_running;
65
66 /*
67  * If a tracer is running, we do not want to run SELFTEST.
68  */
69 bool __read_mostly tracing_selftest_disabled;
70
71 /* Pipe tracepoints to printk */
72 struct trace_iterator *tracepoint_print_iter;
73 int tracepoint_printk;
74 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
75
76 /* For tracers that don't implement custom flags */
77 static struct tracer_opt dummy_tracer_opt[] = {
78         { }
79 };
80
81 static int
82 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
83 {
84         return 0;
85 }
86
87 /*
88  * To prevent the comm cache from being overwritten when no
89  * tracing is active, only save the comm when a trace event
90  * occurred.
91  */
92 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
93
94 /*
95  * Kill all tracing for good (never come back).
96  * It is initialized to 1 but will turn to zero if the initialization
97  * of the tracer is successful. But that is the only place that sets
98  * this back to zero.
99  */
100 static int tracing_disabled = 1;
101
102 cpumask_var_t __read_mostly     tracing_buffer_mask;
103
104 /*
105  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
106  *
107  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
108  * is set, then ftrace_dump is called. This will output the contents
109  * of the ftrace buffers to the console.  This is very useful for
110  * capturing traces that lead to crashes and outputing it to a
111  * serial console.
112  *
113  * It is default off, but you can enable it with either specifying
114  * "ftrace_dump_on_oops" in the kernel command line, or setting
115  * /proc/sys/kernel/ftrace_dump_on_oops
116  * Set 1 if you want to dump buffers of all CPUs
117  * Set 2 if you want to dump the buffer of the CPU that triggered oops
118  */
119
120 enum ftrace_dump_mode ftrace_dump_on_oops;
121
122 /* When set, tracing will stop when a WARN*() is hit */
123 int __disable_trace_on_warning;
124
125 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
126 /* Map of enums to their values, for "eval_map" file */
127 struct trace_eval_map_head {
128         struct module                   *mod;
129         unsigned long                   length;
130 };
131
132 union trace_eval_map_item;
133
134 struct trace_eval_map_tail {
135         /*
136          * "end" is first and points to NULL as it must be different
137          * than "mod" or "eval_string"
138          */
139         union trace_eval_map_item       *next;
140         const char                      *end;   /* points to NULL */
141 };
142
143 static DEFINE_MUTEX(trace_eval_mutex);
144
145 /*
146  * The trace_eval_maps are saved in an array with two extra elements,
147  * one at the beginning, and one at the end. The beginning item contains
148  * the count of the saved maps (head.length), and the module they
149  * belong to if not built in (head.mod). The ending item contains a
150  * pointer to the next array of saved eval_map items.
151  */
152 union trace_eval_map_item {
153         struct trace_eval_map           map;
154         struct trace_eval_map_head      head;
155         struct trace_eval_map_tail      tail;
156 };
157
158 static union trace_eval_map_item *trace_eval_maps;
159 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
160
161 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
162
163 #define MAX_TRACER_SIZE         100
164 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
165 static char *default_bootup_tracer;
166
167 static bool allocate_snapshot;
168
169 static int __init set_cmdline_ftrace(char *str)
170 {
171         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
172         default_bootup_tracer = bootup_tracer_buf;
173         /* We are using ftrace early, expand it */
174         ring_buffer_expanded = true;
175         return 1;
176 }
177 __setup("ftrace=", set_cmdline_ftrace);
178
179 static int __init set_ftrace_dump_on_oops(char *str)
180 {
181         if (*str++ != '=' || !*str) {
182                 ftrace_dump_on_oops = DUMP_ALL;
183                 return 1;
184         }
185
186         if (!strcmp("orig_cpu", str)) {
187                 ftrace_dump_on_oops = DUMP_ORIG;
188                 return 1;
189         }
190
191         return 0;
192 }
193 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
194
195 static int __init stop_trace_on_warning(char *str)
196 {
197         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
198                 __disable_trace_on_warning = 1;
199         return 1;
200 }
201 __setup("traceoff_on_warning", stop_trace_on_warning);
202
203 static int __init boot_alloc_snapshot(char *str)
204 {
205         allocate_snapshot = true;
206         /* We also need the main ring buffer expanded */
207         ring_buffer_expanded = true;
208         return 1;
209 }
210 __setup("alloc_snapshot", boot_alloc_snapshot);
211
212
213 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
214
215 static int __init set_trace_boot_options(char *str)
216 {
217         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
218         return 0;
219 }
220 __setup("trace_options=", set_trace_boot_options);
221
222 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
223 static char *trace_boot_clock __initdata;
224
225 static int __init set_trace_boot_clock(char *str)
226 {
227         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
228         trace_boot_clock = trace_boot_clock_buf;
229         return 0;
230 }
231 __setup("trace_clock=", set_trace_boot_clock);
232
233 static int __init set_tracepoint_printk(char *str)
234 {
235         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
236                 tracepoint_printk = 1;
237         return 1;
238 }
239 __setup("tp_printk", set_tracepoint_printk);
240
241 unsigned long long ns2usecs(u64 nsec)
242 {
243         nsec += 500;
244         do_div(nsec, 1000);
245         return nsec;
246 }
247
248 /* trace_flags holds trace_options default values */
249 #define TRACE_DEFAULT_FLAGS                                             \
250         (FUNCTION_DEFAULT_FLAGS |                                       \
251          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
252          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
253          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
254          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
255
256 /* trace_options that are only supported by global_trace */
257 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
258                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
259
260 /* trace_flags that are default zero for instances */
261 #define ZEROED_TRACE_FLAGS \
262         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
263
264 /*
265  * The global_trace is the descriptor that holds the top-level tracing
266  * buffers for the live tracing.
267  */
268 static struct trace_array global_trace = {
269         .trace_flags = TRACE_DEFAULT_FLAGS,
270 };
271
272 LIST_HEAD(ftrace_trace_arrays);
273
274 int trace_array_get(struct trace_array *this_tr)
275 {
276         struct trace_array *tr;
277         int ret = -ENODEV;
278
279         mutex_lock(&trace_types_lock);
280         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
281                 if (tr == this_tr) {
282                         tr->ref++;
283                         ret = 0;
284                         break;
285                 }
286         }
287         mutex_unlock(&trace_types_lock);
288
289         return ret;
290 }
291
292 static void __trace_array_put(struct trace_array *this_tr)
293 {
294         WARN_ON(!this_tr->ref);
295         this_tr->ref--;
296 }
297
298 void trace_array_put(struct trace_array *this_tr)
299 {
300         mutex_lock(&trace_types_lock);
301         __trace_array_put(this_tr);
302         mutex_unlock(&trace_types_lock);
303 }
304
305 int call_filter_check_discard(struct trace_event_call *call, void *rec,
306                               struct ring_buffer *buffer,
307                               struct ring_buffer_event *event)
308 {
309         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
310             !filter_match_preds(call->filter, rec)) {
311                 __trace_event_discard_commit(buffer, event);
312                 return 1;
313         }
314
315         return 0;
316 }
317
318 void trace_free_pid_list(struct trace_pid_list *pid_list)
319 {
320         vfree(pid_list->pids);
321         kfree(pid_list);
322 }
323
324 /**
325  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
326  * @filtered_pids: The list of pids to check
327  * @search_pid: The PID to find in @filtered_pids
328  *
329  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
330  */
331 bool
332 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
333 {
334         /*
335          * If pid_max changed after filtered_pids was created, we
336          * by default ignore all pids greater than the previous pid_max.
337          */
338         if (search_pid >= filtered_pids->pid_max)
339                 return false;
340
341         return test_bit(search_pid, filtered_pids->pids);
342 }
343
344 /**
345  * trace_ignore_this_task - should a task be ignored for tracing
346  * @filtered_pids: The list of pids to check
347  * @task: The task that should be ignored if not filtered
348  *
349  * Checks if @task should be traced or not from @filtered_pids.
350  * Returns true if @task should *NOT* be traced.
351  * Returns false if @task should be traced.
352  */
353 bool
354 trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
355 {
356         /*
357          * Return false, because if filtered_pids does not exist,
358          * all pids are good to trace.
359          */
360         if (!filtered_pids)
361                 return false;
362
363         return !trace_find_filtered_pid(filtered_pids, task->pid);
364 }
365
366 /**
367  * trace_pid_filter_add_remove_task - Add or remove a task from a pid_list
368  * @pid_list: The list to modify
369  * @self: The current task for fork or NULL for exit
370  * @task: The task to add or remove
371  *
372  * If adding a task, if @self is defined, the task is only added if @self
373  * is also included in @pid_list. This happens on fork and tasks should
374  * only be added when the parent is listed. If @self is NULL, then the
375  * @task pid will be removed from the list, which would happen on exit
376  * of a task.
377  */
378 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
379                                   struct task_struct *self,
380                                   struct task_struct *task)
381 {
382         if (!pid_list)
383                 return;
384
385         /* For forks, we only add if the forking task is listed */
386         if (self) {
387                 if (!trace_find_filtered_pid(pid_list, self->pid))
388                         return;
389         }
390
391         /* Sorry, but we don't support pid_max changing after setting */
392         if (task->pid >= pid_list->pid_max)
393                 return;
394
395         /* "self" is set for forks, and NULL for exits */
396         if (self)
397                 set_bit(task->pid, pid_list->pids);
398         else
399                 clear_bit(task->pid, pid_list->pids);
400 }
401
402 /**
403  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
404  * @pid_list: The pid list to show
405  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
406  * @pos: The position of the file
407  *
408  * This is used by the seq_file "next" operation to iterate the pids
409  * listed in a trace_pid_list structure.
410  *
411  * Returns the pid+1 as we want to display pid of zero, but NULL would
412  * stop the iteration.
413  */
414 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
415 {
416         unsigned long pid = (unsigned long)v;
417
418         (*pos)++;
419
420         /* pid already is +1 of the actual prevous bit */
421         pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
422
423         /* Return pid + 1 to allow zero to be represented */
424         if (pid < pid_list->pid_max)
425                 return (void *)(pid + 1);
426
427         return NULL;
428 }
429
430 /**
431  * trace_pid_start - Used for seq_file to start reading pid lists
432  * @pid_list: The pid list to show
433  * @pos: The position of the file
434  *
435  * This is used by seq_file "start" operation to start the iteration
436  * of listing pids.
437  *
438  * Returns the pid+1 as we want to display pid of zero, but NULL would
439  * stop the iteration.
440  */
441 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
442 {
443         unsigned long pid;
444         loff_t l = 0;
445
446         pid = find_first_bit(pid_list->pids, pid_list->pid_max);
447         if (pid >= pid_list->pid_max)
448                 return NULL;
449
450         /* Return pid + 1 so that zero can be the exit value */
451         for (pid++; pid && l < *pos;
452              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
453                 ;
454         return (void *)pid;
455 }
456
457 /**
458  * trace_pid_show - show the current pid in seq_file processing
459  * @m: The seq_file structure to write into
460  * @v: A void pointer of the pid (+1) value to display
461  *
462  * Can be directly used by seq_file operations to display the current
463  * pid value.
464  */
465 int trace_pid_show(struct seq_file *m, void *v)
466 {
467         unsigned long pid = (unsigned long)v - 1;
468
469         seq_printf(m, "%lu\n", pid);
470         return 0;
471 }
472
473 /* 128 should be much more than enough */
474 #define PID_BUF_SIZE            127
475
476 int trace_pid_write(struct trace_pid_list *filtered_pids,
477                     struct trace_pid_list **new_pid_list,
478                     const char __user *ubuf, size_t cnt)
479 {
480         struct trace_pid_list *pid_list;
481         struct trace_parser parser;
482         unsigned long val;
483         int nr_pids = 0;
484         ssize_t read = 0;
485         ssize_t ret = 0;
486         loff_t pos;
487         pid_t pid;
488
489         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
490                 return -ENOMEM;
491
492         /*
493          * Always recreate a new array. The write is an all or nothing
494          * operation. Always create a new array when adding new pids by
495          * the user. If the operation fails, then the current list is
496          * not modified.
497          */
498         pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
499         if (!pid_list)
500                 return -ENOMEM;
501
502         pid_list->pid_max = READ_ONCE(pid_max);
503
504         /* Only truncating will shrink pid_max */
505         if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
506                 pid_list->pid_max = filtered_pids->pid_max;
507
508         pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
509         if (!pid_list->pids) {
510                 kfree(pid_list);
511                 return -ENOMEM;
512         }
513
514         if (filtered_pids) {
515                 /* copy the current bits to the new max */
516                 for_each_set_bit(pid, filtered_pids->pids,
517                                  filtered_pids->pid_max) {
518                         set_bit(pid, pid_list->pids);
519                         nr_pids++;
520                 }
521         }
522
523         while (cnt > 0) {
524
525                 pos = 0;
526
527                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
528                 if (ret < 0 || !trace_parser_loaded(&parser))
529                         break;
530
531                 read += ret;
532                 ubuf += ret;
533                 cnt -= ret;
534
535                 ret = -EINVAL;
536                 if (kstrtoul(parser.buffer, 0, &val))
537                         break;
538                 if (val >= pid_list->pid_max)
539                         break;
540
541                 pid = (pid_t)val;
542
543                 set_bit(pid, pid_list->pids);
544                 nr_pids++;
545
546                 trace_parser_clear(&parser);
547                 ret = 0;
548         }
549         trace_parser_put(&parser);
550
551         if (ret < 0) {
552                 trace_free_pid_list(pid_list);
553                 return ret;
554         }
555
556         if (!nr_pids) {
557                 /* Cleared the list of pids */
558                 trace_free_pid_list(pid_list);
559                 read = ret;
560                 pid_list = NULL;
561         }
562
563         *new_pid_list = pid_list;
564
565         return read;
566 }
567
568 static u64 buffer_ftrace_now(struct trace_buffer *buf, int cpu)
569 {
570         u64 ts;
571
572         /* Early boot up does not have a buffer yet */
573         if (!buf->buffer)
574                 return trace_clock_local();
575
576         ts = ring_buffer_time_stamp(buf->buffer, cpu);
577         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
578
579         return ts;
580 }
581
582 u64 ftrace_now(int cpu)
583 {
584         return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
585 }
586
587 /**
588  * tracing_is_enabled - Show if global_trace has been disabled
589  *
590  * Shows if the global trace has been enabled or not. It uses the
591  * mirror flag "buffer_disabled" to be used in fast paths such as for
592  * the irqsoff tracer. But it may be inaccurate due to races. If you
593  * need to know the accurate state, use tracing_is_on() which is a little
594  * slower, but accurate.
595  */
596 int tracing_is_enabled(void)
597 {
598         /*
599          * For quick access (irqsoff uses this in fast path), just
600          * return the mirror variable of the state of the ring buffer.
601          * It's a little racy, but we don't really care.
602          */
603         smp_rmb();
604         return !global_trace.buffer_disabled;
605 }
606
607 /*
608  * trace_buf_size is the size in bytes that is allocated
609  * for a buffer. Note, the number of bytes is always rounded
610  * to page size.
611  *
612  * This number is purposely set to a low number of 16384.
613  * If the dump on oops happens, it will be much appreciated
614  * to not have to wait for all that output. Anyway this can be
615  * boot time and run time configurable.
616  */
617 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
618
619 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
620
621 /* trace_types holds a link list of available tracers. */
622 static struct tracer            *trace_types __read_mostly;
623
624 /*
625  * trace_types_lock is used to protect the trace_types list.
626  */
627 DEFINE_MUTEX(trace_types_lock);
628
629 /*
630  * serialize the access of the ring buffer
631  *
632  * ring buffer serializes readers, but it is low level protection.
633  * The validity of the events (which returns by ring_buffer_peek() ..etc)
634  * are not protected by ring buffer.
635  *
636  * The content of events may become garbage if we allow other process consumes
637  * these events concurrently:
638  *   A) the page of the consumed events may become a normal page
639  *      (not reader page) in ring buffer, and this page will be rewrited
640  *      by events producer.
641  *   B) The page of the consumed events may become a page for splice_read,
642  *      and this page will be returned to system.
643  *
644  * These primitives allow multi process access to different cpu ring buffer
645  * concurrently.
646  *
647  * These primitives don't distinguish read-only and read-consume access.
648  * Multi read-only access are also serialized.
649  */
650
651 #ifdef CONFIG_SMP
652 static DECLARE_RWSEM(all_cpu_access_lock);
653 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
654
655 static inline void trace_access_lock(int cpu)
656 {
657         if (cpu == RING_BUFFER_ALL_CPUS) {
658                 /* gain it for accessing the whole ring buffer. */
659                 down_write(&all_cpu_access_lock);
660         } else {
661                 /* gain it for accessing a cpu ring buffer. */
662
663                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
664                 down_read(&all_cpu_access_lock);
665
666                 /* Secondly block other access to this @cpu ring buffer. */
667                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
668         }
669 }
670
671 static inline void trace_access_unlock(int cpu)
672 {
673         if (cpu == RING_BUFFER_ALL_CPUS) {
674                 up_write(&all_cpu_access_lock);
675         } else {
676                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
677                 up_read(&all_cpu_access_lock);
678         }
679 }
680
681 static inline void trace_access_lock_init(void)
682 {
683         int cpu;
684
685         for_each_possible_cpu(cpu)
686                 mutex_init(&per_cpu(cpu_access_lock, cpu));
687 }
688
689 #else
690
691 static DEFINE_MUTEX(access_lock);
692
693 static inline void trace_access_lock(int cpu)
694 {
695         (void)cpu;
696         mutex_lock(&access_lock);
697 }
698
699 static inline void trace_access_unlock(int cpu)
700 {
701         (void)cpu;
702         mutex_unlock(&access_lock);
703 }
704
705 static inline void trace_access_lock_init(void)
706 {
707 }
708
709 #endif
710
711 #ifdef CONFIG_STACKTRACE
712 static void __ftrace_trace_stack(struct ring_buffer *buffer,
713                                  unsigned long flags,
714                                  int skip, int pc, struct pt_regs *regs);
715 static inline void ftrace_trace_stack(struct trace_array *tr,
716                                       struct ring_buffer *buffer,
717                                       unsigned long flags,
718                                       int skip, int pc, struct pt_regs *regs);
719
720 #else
721 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
722                                         unsigned long flags,
723                                         int skip, int pc, struct pt_regs *regs)
724 {
725 }
726 static inline void ftrace_trace_stack(struct trace_array *tr,
727                                       struct ring_buffer *buffer,
728                                       unsigned long flags,
729                                       int skip, int pc, struct pt_regs *regs)
730 {
731 }
732
733 #endif
734
735 static __always_inline void
736 trace_event_setup(struct ring_buffer_event *event,
737                   int type, unsigned long flags, int pc)
738 {
739         struct trace_entry *ent = ring_buffer_event_data(event);
740
741         tracing_generic_entry_update(ent, flags, pc);
742         ent->type = type;
743 }
744
745 static __always_inline struct ring_buffer_event *
746 __trace_buffer_lock_reserve(struct ring_buffer *buffer,
747                           int type,
748                           unsigned long len,
749                           unsigned long flags, int pc)
750 {
751         struct ring_buffer_event *event;
752
753         event = ring_buffer_lock_reserve(buffer, len);
754         if (event != NULL)
755                 trace_event_setup(event, type, flags, pc);
756
757         return event;
758 }
759
760 void tracer_tracing_on(struct trace_array *tr)
761 {
762         if (tr->trace_buffer.buffer)
763                 ring_buffer_record_on(tr->trace_buffer.buffer);
764         /*
765          * This flag is looked at when buffers haven't been allocated
766          * yet, or by some tracers (like irqsoff), that just want to
767          * know if the ring buffer has been disabled, but it can handle
768          * races of where it gets disabled but we still do a record.
769          * As the check is in the fast path of the tracers, it is more
770          * important to be fast than accurate.
771          */
772         tr->buffer_disabled = 0;
773         /* Make the flag seen by readers */
774         smp_wmb();
775 }
776
777 /**
778  * tracing_on - enable tracing buffers
779  *
780  * This function enables tracing buffers that may have been
781  * disabled with tracing_off.
782  */
783 void tracing_on(void)
784 {
785         tracer_tracing_on(&global_trace);
786 }
787 EXPORT_SYMBOL_GPL(tracing_on);
788
789
790 static __always_inline void
791 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
792 {
793         __this_cpu_write(trace_taskinfo_save, true);
794
795         /* If this is the temp buffer, we need to commit fully */
796         if (this_cpu_read(trace_buffered_event) == event) {
797                 /* Length is in event->array[0] */
798                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
799                 /* Release the temp buffer */
800                 this_cpu_dec(trace_buffered_event_cnt);
801         } else
802                 ring_buffer_unlock_commit(buffer, event);
803 }
804
805 /**
806  * __trace_puts - write a constant string into the trace buffer.
807  * @ip:    The address of the caller
808  * @str:   The constant string to write
809  * @size:  The size of the string.
810  */
811 int __trace_puts(unsigned long ip, const char *str, int size)
812 {
813         struct ring_buffer_event *event;
814         struct ring_buffer *buffer;
815         struct print_entry *entry;
816         unsigned long irq_flags;
817         int alloc;
818         int pc;
819
820         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
821                 return 0;
822
823         pc = preempt_count();
824
825         if (unlikely(tracing_selftest_running || tracing_disabled))
826                 return 0;
827
828         alloc = sizeof(*entry) + size + 2; /* possible \n added */
829
830         local_save_flags(irq_flags);
831         buffer = global_trace.trace_buffer.buffer;
832         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
833                                             irq_flags, pc);
834         if (!event)
835                 return 0;
836
837         entry = ring_buffer_event_data(event);
838         entry->ip = ip;
839
840         memcpy(&entry->buf, str, size);
841
842         /* Add a newline if necessary */
843         if (entry->buf[size - 1] != '\n') {
844                 entry->buf[size] = '\n';
845                 entry->buf[size + 1] = '\0';
846         } else
847                 entry->buf[size] = '\0';
848
849         __buffer_unlock_commit(buffer, event);
850         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
851
852         return size;
853 }
854 EXPORT_SYMBOL_GPL(__trace_puts);
855
856 /**
857  * __trace_bputs - write the pointer to a constant string into trace buffer
858  * @ip:    The address of the caller
859  * @str:   The constant string to write to the buffer to
860  */
861 int __trace_bputs(unsigned long ip, const char *str)
862 {
863         struct ring_buffer_event *event;
864         struct ring_buffer *buffer;
865         struct bputs_entry *entry;
866         unsigned long irq_flags;
867         int size = sizeof(struct bputs_entry);
868         int pc;
869
870         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
871                 return 0;
872
873         pc = preempt_count();
874
875         if (unlikely(tracing_selftest_running || tracing_disabled))
876                 return 0;
877
878         local_save_flags(irq_flags);
879         buffer = global_trace.trace_buffer.buffer;
880         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
881                                             irq_flags, pc);
882         if (!event)
883                 return 0;
884
885         entry = ring_buffer_event_data(event);
886         entry->ip                       = ip;
887         entry->str                      = str;
888
889         __buffer_unlock_commit(buffer, event);
890         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
891
892         return 1;
893 }
894 EXPORT_SYMBOL_GPL(__trace_bputs);
895
896 #ifdef CONFIG_TRACER_SNAPSHOT
897 void tracing_snapshot_instance_cond(struct trace_array *tr, void *cond_data)
898 {
899         struct tracer *tracer = tr->current_trace;
900         unsigned long flags;
901
902         if (in_nmi()) {
903                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
904                 internal_trace_puts("*** snapshot is being ignored        ***\n");
905                 return;
906         }
907
908         if (!tr->allocated_snapshot) {
909                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
910                 internal_trace_puts("*** stopping trace here!   ***\n");
911                 tracing_off();
912                 return;
913         }
914
915         /* Note, snapshot can not be used when the tracer uses it */
916         if (tracer->use_max_tr) {
917                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
918                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
919                 return;
920         }
921
922         local_irq_save(flags);
923         update_max_tr(tr, current, smp_processor_id(), cond_data);
924         local_irq_restore(flags);
925 }
926
927 void tracing_snapshot_instance(struct trace_array *tr)
928 {
929         tracing_snapshot_instance_cond(tr, NULL);
930 }
931
932 /**
933  * tracing_snapshot - take a snapshot of the current buffer.
934  *
935  * This causes a swap between the snapshot buffer and the current live
936  * tracing buffer. You can use this to take snapshots of the live
937  * trace when some condition is triggered, but continue to trace.
938  *
939  * Note, make sure to allocate the snapshot with either
940  * a tracing_snapshot_alloc(), or by doing it manually
941  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
942  *
943  * If the snapshot buffer is not allocated, it will stop tracing.
944  * Basically making a permanent snapshot.
945  */
946 void tracing_snapshot(void)
947 {
948         struct trace_array *tr = &global_trace;
949
950         tracing_snapshot_instance(tr);
951 }
952 EXPORT_SYMBOL_GPL(tracing_snapshot);
953
954 /**
955  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
956  * @tr:         The tracing instance to snapshot
957  * @cond_data:  The data to be tested conditionally, and possibly saved
958  *
959  * This is the same as tracing_snapshot() except that the snapshot is
960  * conditional - the snapshot will only happen if the
961  * cond_snapshot.update() implementation receiving the cond_data
962  * returns true, which means that the trace array's cond_snapshot
963  * update() operation used the cond_data to determine whether the
964  * snapshot should be taken, and if it was, presumably saved it along
965  * with the snapshot.
966  */
967 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
968 {
969         tracing_snapshot_instance_cond(tr, cond_data);
970 }
971 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
972
973 /**
974  * tracing_snapshot_cond_data - get the user data associated with a snapshot
975  * @tr:         The tracing instance
976  *
977  * When the user enables a conditional snapshot using
978  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
979  * with the snapshot.  This accessor is used to retrieve it.
980  *
981  * Should not be called from cond_snapshot.update(), since it takes
982  * the tr->max_lock lock, which the code calling
983  * cond_snapshot.update() has already done.
984  *
985  * Returns the cond_data associated with the trace array's snapshot.
986  */
987 void *tracing_cond_snapshot_data(struct trace_array *tr)
988 {
989         void *cond_data = NULL;
990
991         arch_spin_lock(&tr->max_lock);
992
993         if (tr->cond_snapshot)
994                 cond_data = tr->cond_snapshot->cond_data;
995
996         arch_spin_unlock(&tr->max_lock);
997
998         return cond_data;
999 }
1000 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1001
1002 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
1003                                         struct trace_buffer *size_buf, int cpu_id);
1004 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
1005
1006 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1007 {
1008         int ret;
1009
1010         if (!tr->allocated_snapshot) {
1011
1012                 /* allocate spare buffer */
1013                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1014                                    &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
1015                 if (ret < 0)
1016                         return ret;
1017
1018                 tr->allocated_snapshot = true;
1019         }
1020
1021         return 0;
1022 }
1023
1024 static void free_snapshot(struct trace_array *tr)
1025 {
1026         /*
1027          * We don't free the ring buffer. instead, resize it because
1028          * The max_tr ring buffer has some state (e.g. ring->clock) and
1029          * we want preserve it.
1030          */
1031         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1032         set_buffer_entries(&tr->max_buffer, 1);
1033         tracing_reset_online_cpus(&tr->max_buffer);
1034         tr->allocated_snapshot = false;
1035 }
1036
1037 /**
1038  * tracing_alloc_snapshot - allocate snapshot buffer.
1039  *
1040  * This only allocates the snapshot buffer if it isn't already
1041  * allocated - it doesn't also take a snapshot.
1042  *
1043  * This is meant to be used in cases where the snapshot buffer needs
1044  * to be set up for events that can't sleep but need to be able to
1045  * trigger a snapshot.
1046  */
1047 int tracing_alloc_snapshot(void)
1048 {
1049         struct trace_array *tr = &global_trace;
1050         int ret;
1051
1052         ret = tracing_alloc_snapshot_instance(tr);
1053         WARN_ON(ret < 0);
1054
1055         return ret;
1056 }
1057 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1058
1059 /**
1060  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1061  *
1062  * This is similar to tracing_snapshot(), but it will allocate the
1063  * snapshot buffer if it isn't already allocated. Use this only
1064  * where it is safe to sleep, as the allocation may sleep.
1065  *
1066  * This causes a swap between the snapshot buffer and the current live
1067  * tracing buffer. You can use this to take snapshots of the live
1068  * trace when some condition is triggered, but continue to trace.
1069  */
1070 void tracing_snapshot_alloc(void)
1071 {
1072         int ret;
1073
1074         ret = tracing_alloc_snapshot();
1075         if (ret < 0)
1076                 return;
1077
1078         tracing_snapshot();
1079 }
1080 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1081
1082 /**
1083  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1084  * @tr:         The tracing instance
1085  * @cond_data:  User data to associate with the snapshot
1086  * @update:     Implementation of the cond_snapshot update function
1087  *
1088  * Check whether the conditional snapshot for the given instance has
1089  * already been enabled, or if the current tracer is already using a
1090  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1091  * save the cond_data and update function inside.
1092  *
1093  * Returns 0 if successful, error otherwise.
1094  */
1095 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1096                                  cond_update_fn_t update)
1097 {
1098         struct cond_snapshot *cond_snapshot;
1099         int ret = 0;
1100
1101         cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1102         if (!cond_snapshot)
1103                 return -ENOMEM;
1104
1105         cond_snapshot->cond_data = cond_data;
1106         cond_snapshot->update = update;
1107
1108         mutex_lock(&trace_types_lock);
1109
1110         ret = tracing_alloc_snapshot_instance(tr);
1111         if (ret)
1112                 goto fail_unlock;
1113
1114         if (tr->current_trace->use_max_tr) {
1115                 ret = -EBUSY;
1116                 goto fail_unlock;
1117         }
1118
1119         /*
1120          * The cond_snapshot can only change to NULL without the
1121          * trace_types_lock. We don't care if we race with it going
1122          * to NULL, but we want to make sure that it's not set to
1123          * something other than NULL when we get here, which we can
1124          * do safely with only holding the trace_types_lock and not
1125          * having to take the max_lock.
1126          */
1127         if (tr->cond_snapshot) {
1128                 ret = -EBUSY;
1129                 goto fail_unlock;
1130         }
1131
1132         arch_spin_lock(&tr->max_lock);
1133         tr->cond_snapshot = cond_snapshot;
1134         arch_spin_unlock(&tr->max_lock);
1135
1136         mutex_unlock(&trace_types_lock);
1137
1138         return ret;
1139
1140  fail_unlock:
1141         mutex_unlock(&trace_types_lock);
1142         kfree(cond_snapshot);
1143         return ret;
1144 }
1145 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1146
1147 /**
1148  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1149  * @tr:         The tracing instance
1150  *
1151  * Check whether the conditional snapshot for the given instance is
1152  * enabled; if so, free the cond_snapshot associated with it,
1153  * otherwise return -EINVAL.
1154  *
1155  * Returns 0 if successful, error otherwise.
1156  */
1157 int tracing_snapshot_cond_disable(struct trace_array *tr)
1158 {
1159         int ret = 0;
1160
1161         arch_spin_lock(&tr->max_lock);
1162
1163         if (!tr->cond_snapshot)
1164                 ret = -EINVAL;
1165         else {
1166                 kfree(tr->cond_snapshot);
1167                 tr->cond_snapshot = NULL;
1168         }
1169
1170         arch_spin_unlock(&tr->max_lock);
1171
1172         return ret;
1173 }
1174 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1175 #else
1176 void tracing_snapshot(void)
1177 {
1178         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1179 }
1180 EXPORT_SYMBOL_GPL(tracing_snapshot);
1181 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1182 {
1183         WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1184 }
1185 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1186 int tracing_alloc_snapshot(void)
1187 {
1188         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1189         return -ENODEV;
1190 }
1191 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1192 void tracing_snapshot_alloc(void)
1193 {
1194         /* Give warning */
1195         tracing_snapshot();
1196 }
1197 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1198 void *tracing_cond_snapshot_data(struct trace_array *tr)
1199 {
1200         return NULL;
1201 }
1202 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1203 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1204 {
1205         return -ENODEV;
1206 }
1207 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1208 int tracing_snapshot_cond_disable(struct trace_array *tr)
1209 {
1210         return false;
1211 }
1212 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1213 #endif /* CONFIG_TRACER_SNAPSHOT */
1214
1215 void tracer_tracing_off(struct trace_array *tr)
1216 {
1217         if (tr->trace_buffer.buffer)
1218                 ring_buffer_record_off(tr->trace_buffer.buffer);
1219         /*
1220          * This flag is looked at when buffers haven't been allocated
1221          * yet, or by some tracers (like irqsoff), that just want to
1222          * know if the ring buffer has been disabled, but it can handle
1223          * races of where it gets disabled but we still do a record.
1224          * As the check is in the fast path of the tracers, it is more
1225          * important to be fast than accurate.
1226          */
1227         tr->buffer_disabled = 1;
1228         /* Make the flag seen by readers */
1229         smp_wmb();
1230 }
1231
1232 /**
1233  * tracing_off - turn off tracing buffers
1234  *
1235  * This function stops the tracing buffers from recording data.
1236  * It does not disable any overhead the tracers themselves may
1237  * be causing. This function simply causes all recording to
1238  * the ring buffers to fail.
1239  */
1240 void tracing_off(void)
1241 {
1242         tracer_tracing_off(&global_trace);
1243 }
1244 EXPORT_SYMBOL_GPL(tracing_off);
1245
1246 void disable_trace_on_warning(void)
1247 {
1248         if (__disable_trace_on_warning)
1249                 tracing_off();
1250 }
1251
1252 /**
1253  * tracer_tracing_is_on - show real state of ring buffer enabled
1254  * @tr : the trace array to know if ring buffer is enabled
1255  *
1256  * Shows real state of the ring buffer if it is enabled or not.
1257  */
1258 bool tracer_tracing_is_on(struct trace_array *tr)
1259 {
1260         if (tr->trace_buffer.buffer)
1261                 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1262         return !tr->buffer_disabled;
1263 }
1264
1265 /**
1266  * tracing_is_on - show state of ring buffers enabled
1267  */
1268 int tracing_is_on(void)
1269 {
1270         return tracer_tracing_is_on(&global_trace);
1271 }
1272 EXPORT_SYMBOL_GPL(tracing_is_on);
1273
1274 static int __init set_buf_size(char *str)
1275 {
1276         unsigned long buf_size;
1277
1278         if (!str)
1279                 return 0;
1280         buf_size = memparse(str, &str);
1281         /* nr_entries can not be zero */
1282         if (buf_size == 0)
1283                 return 0;
1284         trace_buf_size = buf_size;
1285         return 1;
1286 }
1287 __setup("trace_buf_size=", set_buf_size);
1288
1289 static int __init set_tracing_thresh(char *str)
1290 {
1291         unsigned long threshold;
1292         int ret;
1293
1294         if (!str)
1295                 return 0;
1296         ret = kstrtoul(str, 0, &threshold);
1297         if (ret < 0)
1298                 return 0;
1299         tracing_thresh = threshold * 1000;
1300         return 1;
1301 }
1302 __setup("tracing_thresh=", set_tracing_thresh);
1303
1304 unsigned long nsecs_to_usecs(unsigned long nsecs)
1305 {
1306         return nsecs / 1000;
1307 }
1308
1309 /*
1310  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1311  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1312  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1313  * of strings in the order that the evals (enum) were defined.
1314  */
1315 #undef C
1316 #define C(a, b) b
1317
1318 /* These must match the bit postions in trace_iterator_flags */
1319 static const char *trace_options[] = {
1320         TRACE_FLAGS
1321         NULL
1322 };
1323
1324 static struct {
1325         u64 (*func)(void);
1326         const char *name;
1327         int in_ns;              /* is this clock in nanoseconds? */
1328 } trace_clocks[] = {
1329         { trace_clock_local,            "local",        1 },
1330         { trace_clock_global,           "global",       1 },
1331         { trace_clock_counter,          "counter",      0 },
1332         { trace_clock_jiffies,          "uptime",       0 },
1333         { trace_clock,                  "perf",         1 },
1334         { ktime_get_mono_fast_ns,       "mono",         1 },
1335         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1336         { ktime_get_boot_fast_ns,       "boot",         1 },
1337         ARCH_TRACE_CLOCKS
1338 };
1339
1340 bool trace_clock_in_ns(struct trace_array *tr)
1341 {
1342         if (trace_clocks[tr->clock_id].in_ns)
1343                 return true;
1344
1345         return false;
1346 }
1347
1348 /*
1349  * trace_parser_get_init - gets the buffer for trace parser
1350  */
1351 int trace_parser_get_init(struct trace_parser *parser, int size)
1352 {
1353         memset(parser, 0, sizeof(*parser));
1354
1355         parser->buffer = kmalloc(size, GFP_KERNEL);
1356         if (!parser->buffer)
1357                 return 1;
1358
1359         parser->size = size;
1360         return 0;
1361 }
1362
1363 /*
1364  * trace_parser_put - frees the buffer for trace parser
1365  */
1366 void trace_parser_put(struct trace_parser *parser)
1367 {
1368         kfree(parser->buffer);
1369         parser->buffer = NULL;
1370 }
1371
1372 /*
1373  * trace_get_user - reads the user input string separated by  space
1374  * (matched by isspace(ch))
1375  *
1376  * For each string found the 'struct trace_parser' is updated,
1377  * and the function returns.
1378  *
1379  * Returns number of bytes read.
1380  *
1381  * See kernel/trace/trace.h for 'struct trace_parser' details.
1382  */
1383 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1384         size_t cnt, loff_t *ppos)
1385 {
1386         char ch;
1387         size_t read = 0;
1388         ssize_t ret;
1389
1390         if (!*ppos)
1391                 trace_parser_clear(parser);
1392
1393         ret = get_user(ch, ubuf++);
1394         if (ret)
1395                 goto out;
1396
1397         read++;
1398         cnt--;
1399
1400         /*
1401          * The parser is not finished with the last write,
1402          * continue reading the user input without skipping spaces.
1403          */
1404         if (!parser->cont) {
1405                 /* skip white space */
1406                 while (cnt && isspace(ch)) {
1407                         ret = get_user(ch, ubuf++);
1408                         if (ret)
1409                                 goto out;
1410                         read++;
1411                         cnt--;
1412                 }
1413
1414                 parser->idx = 0;
1415
1416                 /* only spaces were written */
1417                 if (isspace(ch) || !ch) {
1418                         *ppos += read;
1419                         ret = read;
1420                         goto out;
1421                 }
1422         }
1423
1424         /* read the non-space input */
1425         while (cnt && !isspace(ch) && ch) {
1426                 if (parser->idx < parser->size - 1)
1427                         parser->buffer[parser->idx++] = ch;
1428                 else {
1429                         ret = -EINVAL;
1430                         goto out;
1431                 }
1432                 ret = get_user(ch, ubuf++);
1433                 if (ret)
1434                         goto out;
1435                 read++;
1436                 cnt--;
1437         }
1438
1439         /* We either got finished input or we have to wait for another call. */
1440         if (isspace(ch) || !ch) {
1441                 parser->buffer[parser->idx] = 0;
1442                 parser->cont = false;
1443         } else if (parser->idx < parser->size - 1) {
1444                 parser->cont = true;
1445                 parser->buffer[parser->idx++] = ch;
1446                 /* Make sure the parsed string always terminates with '\0'. */
1447                 parser->buffer[parser->idx] = 0;
1448         } else {
1449                 ret = -EINVAL;
1450                 goto out;
1451         }
1452
1453         *ppos += read;
1454         ret = read;
1455
1456 out:
1457         return ret;
1458 }
1459
1460 /* TODO add a seq_buf_to_buffer() */
1461 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1462 {
1463         int len;
1464
1465         if (trace_seq_used(s) <= s->seq.readpos)
1466                 return -EBUSY;
1467
1468         len = trace_seq_used(s) - s->seq.readpos;
1469         if (cnt > len)
1470                 cnt = len;
1471         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1472
1473         s->seq.readpos += cnt;
1474         return cnt;
1475 }
1476
1477 unsigned long __read_mostly     tracing_thresh;
1478
1479 #ifdef CONFIG_TRACER_MAX_TRACE
1480 /*
1481  * Copy the new maximum trace into the separate maximum-trace
1482  * structure. (this way the maximum trace is permanently saved,
1483  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1484  */
1485 static void
1486 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1487 {
1488         struct trace_buffer *trace_buf = &tr->trace_buffer;
1489         struct trace_buffer *max_buf = &tr->max_buffer;
1490         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1491         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1492
1493         max_buf->cpu = cpu;
1494         max_buf->time_start = data->preempt_timestamp;
1495
1496         max_data->saved_latency = tr->max_latency;
1497         max_data->critical_start = data->critical_start;
1498         max_data->critical_end = data->critical_end;
1499
1500         strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1501         max_data->pid = tsk->pid;
1502         /*
1503          * If tsk == current, then use current_uid(), as that does not use
1504          * RCU. The irq tracer can be called out of RCU scope.
1505          */
1506         if (tsk == current)
1507                 max_data->uid = current_uid();
1508         else
1509                 max_data->uid = task_uid(tsk);
1510
1511         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1512         max_data->policy = tsk->policy;
1513         max_data->rt_priority = tsk->rt_priority;
1514
1515         /* record this tasks comm */
1516         tracing_record_cmdline(tsk);
1517 }
1518
1519 /**
1520  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1521  * @tr: tracer
1522  * @tsk: the task with the latency
1523  * @cpu: The cpu that initiated the trace.
1524  * @cond_data: User data associated with a conditional snapshot
1525  *
1526  * Flip the buffers between the @tr and the max_tr and record information
1527  * about which task was the cause of this latency.
1528  */
1529 void
1530 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1531               void *cond_data)
1532 {
1533         if (tr->stop_count)
1534                 return;
1535
1536         WARN_ON_ONCE(!irqs_disabled());
1537
1538         if (!tr->allocated_snapshot) {
1539                 /* Only the nop tracer should hit this when disabling */
1540                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1541                 return;
1542         }
1543
1544         arch_spin_lock(&tr->max_lock);
1545
1546         /* Inherit the recordable setting from trace_buffer */
1547         if (ring_buffer_record_is_set_on(tr->trace_buffer.buffer))
1548                 ring_buffer_record_on(tr->max_buffer.buffer);
1549         else
1550                 ring_buffer_record_off(tr->max_buffer.buffer);
1551
1552 #ifdef CONFIG_TRACER_SNAPSHOT
1553         if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1554                 goto out_unlock;
1555 #endif
1556         swap(tr->trace_buffer.buffer, tr->max_buffer.buffer);
1557
1558         __update_max_tr(tr, tsk, cpu);
1559
1560  out_unlock:
1561         arch_spin_unlock(&tr->max_lock);
1562 }
1563
1564 /**
1565  * update_max_tr_single - only copy one trace over, and reset the rest
1566  * @tr - tracer
1567  * @tsk - task with the latency
1568  * @cpu - the cpu of the buffer to copy.
1569  *
1570  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1571  */
1572 void
1573 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1574 {
1575         int ret;
1576
1577         if (tr->stop_count)
1578                 return;
1579
1580         WARN_ON_ONCE(!irqs_disabled());
1581         if (!tr->allocated_snapshot) {
1582                 /* Only the nop tracer should hit this when disabling */
1583                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1584                 return;
1585         }
1586
1587         arch_spin_lock(&tr->max_lock);
1588
1589         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1590
1591         if (ret == -EBUSY) {
1592                 /*
1593                  * We failed to swap the buffer due to a commit taking
1594                  * place on this CPU. We fail to record, but we reset
1595                  * the max trace buffer (no one writes directly to it)
1596                  * and flag that it failed.
1597                  */
1598                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1599                         "Failed to swap buffers due to commit in progress\n");
1600         }
1601
1602         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1603
1604         __update_max_tr(tr, tsk, cpu);
1605         arch_spin_unlock(&tr->max_lock);
1606 }
1607 #endif /* CONFIG_TRACER_MAX_TRACE */
1608
1609 static int wait_on_pipe(struct trace_iterator *iter, int full)
1610 {
1611         /* Iterators are static, they should be filled or empty */
1612         if (trace_buffer_iter(iter, iter->cpu_file))
1613                 return 0;
1614
1615         return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1616                                 full);
1617 }
1618
1619 #ifdef CONFIG_FTRACE_STARTUP_TEST
1620 static bool selftests_can_run;
1621
1622 struct trace_selftests {
1623         struct list_head                list;
1624         struct tracer                   *type;
1625 };
1626
1627 static LIST_HEAD(postponed_selftests);
1628
1629 static int save_selftest(struct tracer *type)
1630 {
1631         struct trace_selftests *selftest;
1632
1633         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1634         if (!selftest)
1635                 return -ENOMEM;
1636
1637         selftest->type = type;
1638         list_add(&selftest->list, &postponed_selftests);
1639         return 0;
1640 }
1641
1642 static int run_tracer_selftest(struct tracer *type)
1643 {
1644         struct trace_array *tr = &global_trace;
1645         struct tracer *saved_tracer = tr->current_trace;
1646         int ret;
1647
1648         if (!type->selftest || tracing_selftest_disabled)
1649                 return 0;
1650
1651         /*
1652          * If a tracer registers early in boot up (before scheduling is
1653          * initialized and such), then do not run its selftests yet.
1654          * Instead, run it a little later in the boot process.
1655          */
1656         if (!selftests_can_run)
1657                 return save_selftest(type);
1658
1659         /*
1660          * Run a selftest on this tracer.
1661          * Here we reset the trace buffer, and set the current
1662          * tracer to be this tracer. The tracer can then run some
1663          * internal tracing to verify that everything is in order.
1664          * If we fail, we do not register this tracer.
1665          */
1666         tracing_reset_online_cpus(&tr->trace_buffer);
1667
1668         tr->current_trace = type;
1669
1670 #ifdef CONFIG_TRACER_MAX_TRACE
1671         if (type->use_max_tr) {
1672                 /* If we expanded the buffers, make sure the max is expanded too */
1673                 if (ring_buffer_expanded)
1674                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1675                                            RING_BUFFER_ALL_CPUS);
1676                 tr->allocated_snapshot = true;
1677         }
1678 #endif
1679
1680         /* the test is responsible for initializing and enabling */
1681         pr_info("Testing tracer %s: ", type->name);
1682         ret = type->selftest(type, tr);
1683         /* the test is responsible for resetting too */
1684         tr->current_trace = saved_tracer;
1685         if (ret) {
1686                 printk(KERN_CONT "FAILED!\n");
1687                 /* Add the warning after printing 'FAILED' */
1688                 WARN_ON(1);
1689                 return -1;
1690         }
1691         /* Only reset on passing, to avoid touching corrupted buffers */
1692         tracing_reset_online_cpus(&tr->trace_buffer);
1693
1694 #ifdef CONFIG_TRACER_MAX_TRACE
1695         if (type->use_max_tr) {
1696                 tr->allocated_snapshot = false;
1697
1698                 /* Shrink the max buffer again */
1699                 if (ring_buffer_expanded)
1700                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1701                                            RING_BUFFER_ALL_CPUS);
1702         }
1703 #endif
1704
1705         printk(KERN_CONT "PASSED\n");
1706         return 0;
1707 }
1708
1709 static __init int init_trace_selftests(void)
1710 {
1711         struct trace_selftests *p, *n;
1712         struct tracer *t, **last;
1713         int ret;
1714
1715         selftests_can_run = true;
1716
1717         mutex_lock(&trace_types_lock);
1718
1719         if (list_empty(&postponed_selftests))
1720                 goto out;
1721
1722         pr_info("Running postponed tracer tests:\n");
1723
1724         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1725                 ret = run_tracer_selftest(p->type);
1726                 /* If the test fails, then warn and remove from available_tracers */
1727                 if (ret < 0) {
1728                         WARN(1, "tracer: %s failed selftest, disabling\n",
1729                              p->type->name);
1730                         last = &trace_types;
1731                         for (t = trace_types; t; t = t->next) {
1732                                 if (t == p->type) {
1733                                         *last = t->next;
1734                                         break;
1735                                 }
1736                                 last = &t->next;
1737                         }
1738                 }
1739                 list_del(&p->list);
1740                 kfree(p);
1741         }
1742
1743  out:
1744         mutex_unlock(&trace_types_lock);
1745
1746         return 0;
1747 }
1748 core_initcall(init_trace_selftests);
1749 #else
1750 static inline int run_tracer_selftest(struct tracer *type)
1751 {
1752         return 0;
1753 }
1754 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1755
1756 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1757
1758 static void __init apply_trace_boot_options(void);
1759
1760 /**
1761  * register_tracer - register a tracer with the ftrace system.
1762  * @type - the plugin for the tracer
1763  *
1764  * Register a new plugin tracer.
1765  */
1766 int __init register_tracer(struct tracer *type)
1767 {
1768         struct tracer *t;
1769         int ret = 0;
1770
1771         if (!type->name) {
1772                 pr_info("Tracer must have a name\n");
1773                 return -1;
1774         }
1775
1776         if (strlen(type->name) >= MAX_TRACER_SIZE) {
1777                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1778                 return -1;
1779         }
1780
1781         mutex_lock(&trace_types_lock);
1782
1783         tracing_selftest_running = true;
1784
1785         for (t = trace_types; t; t = t->next) {
1786                 if (strcmp(type->name, t->name) == 0) {
1787                         /* already found */
1788                         pr_info("Tracer %s already registered\n",
1789                                 type->name);
1790                         ret = -1;
1791                         goto out;
1792                 }
1793         }
1794
1795         if (!type->set_flag)
1796                 type->set_flag = &dummy_set_flag;
1797         if (!type->flags) {
1798                 /*allocate a dummy tracer_flags*/
1799                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1800                 if (!type->flags) {
1801                         ret = -ENOMEM;
1802                         goto out;
1803                 }
1804                 type->flags->val = 0;
1805                 type->flags->opts = dummy_tracer_opt;
1806         } else
1807                 if (!type->flags->opts)
1808                         type->flags->opts = dummy_tracer_opt;
1809
1810         /* store the tracer for __set_tracer_option */
1811         type->flags->trace = type;
1812
1813         ret = run_tracer_selftest(type);
1814         if (ret < 0)
1815                 goto out;
1816
1817         type->next = trace_types;
1818         trace_types = type;
1819         add_tracer_options(&global_trace, type);
1820
1821  out:
1822         tracing_selftest_running = false;
1823         mutex_unlock(&trace_types_lock);
1824
1825         if (ret || !default_bootup_tracer)
1826                 goto out_unlock;
1827
1828         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1829                 goto out_unlock;
1830
1831         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1832         /* Do we want this tracer to start on bootup? */
1833         tracing_set_tracer(&global_trace, type->name);
1834         default_bootup_tracer = NULL;
1835
1836         apply_trace_boot_options();
1837
1838         /* disable other selftests, since this will break it. */
1839         tracing_selftest_disabled = true;
1840 #ifdef CONFIG_FTRACE_STARTUP_TEST
1841         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1842                type->name);
1843 #endif
1844
1845  out_unlock:
1846         return ret;
1847 }
1848
1849 void tracing_reset(struct trace_buffer *buf, int cpu)
1850 {
1851         struct ring_buffer *buffer = buf->buffer;
1852
1853         if (!buffer)
1854                 return;
1855
1856         ring_buffer_record_disable(buffer);
1857
1858         /* Make sure all commits have finished */
1859         synchronize_rcu();
1860         ring_buffer_reset_cpu(buffer, cpu);
1861
1862         ring_buffer_record_enable(buffer);
1863 }
1864
1865 void tracing_reset_online_cpus(struct trace_buffer *buf)
1866 {
1867         struct ring_buffer *buffer = buf->buffer;
1868         int cpu;
1869
1870         if (!buffer)
1871                 return;
1872
1873         ring_buffer_record_disable(buffer);
1874
1875         /* Make sure all commits have finished */
1876         synchronize_rcu();
1877
1878         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1879
1880         for_each_online_cpu(cpu)
1881                 ring_buffer_reset_cpu(buffer, cpu);
1882
1883         ring_buffer_record_enable(buffer);
1884 }
1885
1886 /* Must have trace_types_lock held */
1887 void tracing_reset_all_online_cpus(void)
1888 {
1889         struct trace_array *tr;
1890
1891         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1892                 if (!tr->clear_trace)
1893                         continue;
1894                 tr->clear_trace = false;
1895                 tracing_reset_online_cpus(&tr->trace_buffer);
1896 #ifdef CONFIG_TRACER_MAX_TRACE
1897                 tracing_reset_online_cpus(&tr->max_buffer);
1898 #endif
1899         }
1900 }
1901
1902 static int *tgid_map;
1903
1904 #define SAVED_CMDLINES_DEFAULT 128
1905 #define NO_CMDLINE_MAP UINT_MAX
1906 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1907 struct saved_cmdlines_buffer {
1908         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1909         unsigned *map_cmdline_to_pid;
1910         unsigned cmdline_num;
1911         int cmdline_idx;
1912         char *saved_cmdlines;
1913 };
1914 static struct saved_cmdlines_buffer *savedcmd;
1915
1916 /* temporary disable recording */
1917 static atomic_t trace_record_taskinfo_disabled __read_mostly;
1918
1919 static inline char *get_saved_cmdlines(int idx)
1920 {
1921         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1922 }
1923
1924 static inline void set_cmdline(int idx, const char *cmdline)
1925 {
1926         strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1927 }
1928
1929 static int allocate_cmdlines_buffer(unsigned int val,
1930                                     struct saved_cmdlines_buffer *s)
1931 {
1932         s->map_cmdline_to_pid = kmalloc_array(val,
1933                                               sizeof(*s->map_cmdline_to_pid),
1934                                               GFP_KERNEL);
1935         if (!s->map_cmdline_to_pid)
1936                 return -ENOMEM;
1937
1938         s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
1939         if (!s->saved_cmdlines) {
1940                 kfree(s->map_cmdline_to_pid);
1941                 return -ENOMEM;
1942         }
1943
1944         s->cmdline_idx = 0;
1945         s->cmdline_num = val;
1946         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1947                sizeof(s->map_pid_to_cmdline));
1948         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1949                val * sizeof(*s->map_cmdline_to_pid));
1950
1951         return 0;
1952 }
1953
1954 static int trace_create_savedcmd(void)
1955 {
1956         int ret;
1957
1958         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1959         if (!savedcmd)
1960                 return -ENOMEM;
1961
1962         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1963         if (ret < 0) {
1964                 kfree(savedcmd);
1965                 savedcmd = NULL;
1966                 return -ENOMEM;
1967         }
1968
1969         return 0;
1970 }
1971
1972 int is_tracing_stopped(void)
1973 {
1974         return global_trace.stop_count;
1975 }
1976
1977 /**
1978  * tracing_start - quick start of the tracer
1979  *
1980  * If tracing is enabled but was stopped by tracing_stop,
1981  * this will start the tracer back up.
1982  */
1983 void tracing_start(void)
1984 {
1985         struct ring_buffer *buffer;
1986         unsigned long flags;
1987
1988         if (tracing_disabled)
1989                 return;
1990
1991         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1992         if (--global_trace.stop_count) {
1993                 if (global_trace.stop_count < 0) {
1994                         /* Someone screwed up their debugging */
1995                         WARN_ON_ONCE(1);
1996                         global_trace.stop_count = 0;
1997                 }
1998                 goto out;
1999         }
2000
2001         /* Prevent the buffers from switching */
2002         arch_spin_lock(&global_trace.max_lock);
2003
2004         buffer = global_trace.trace_buffer.buffer;
2005         if (buffer)
2006                 ring_buffer_record_enable(buffer);
2007
2008 #ifdef CONFIG_TRACER_MAX_TRACE
2009         buffer = global_trace.max_buffer.buffer;
2010         if (buffer)
2011                 ring_buffer_record_enable(buffer);
2012 #endif
2013
2014         arch_spin_unlock(&global_trace.max_lock);
2015
2016  out:
2017         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2018 }
2019
2020 static void tracing_start_tr(struct trace_array *tr)
2021 {
2022         struct ring_buffer *buffer;
2023         unsigned long flags;
2024
2025         if (tracing_disabled)
2026                 return;
2027
2028         /* If global, we need to also start the max tracer */
2029         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2030                 return tracing_start();
2031
2032         raw_spin_lock_irqsave(&tr->start_lock, flags);
2033
2034         if (--tr->stop_count) {
2035                 if (tr->stop_count < 0) {
2036                         /* Someone screwed up their debugging */
2037                         WARN_ON_ONCE(1);
2038                         tr->stop_count = 0;
2039                 }
2040                 goto out;
2041         }
2042
2043         buffer = tr->trace_buffer.buffer;
2044         if (buffer)
2045                 ring_buffer_record_enable(buffer);
2046
2047  out:
2048         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2049 }
2050
2051 /**
2052  * tracing_stop - quick stop of the tracer
2053  *
2054  * Light weight way to stop tracing. Use in conjunction with
2055  * tracing_start.
2056  */
2057 void tracing_stop(void)
2058 {
2059         struct ring_buffer *buffer;
2060         unsigned long flags;
2061
2062         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2063         if (global_trace.stop_count++)
2064                 goto out;
2065
2066         /* Prevent the buffers from switching */
2067         arch_spin_lock(&global_trace.max_lock);
2068
2069         buffer = global_trace.trace_buffer.buffer;
2070         if (buffer)
2071                 ring_buffer_record_disable(buffer);
2072
2073 #ifdef CONFIG_TRACER_MAX_TRACE
2074         buffer = global_trace.max_buffer.buffer;
2075         if (buffer)
2076                 ring_buffer_record_disable(buffer);
2077 #endif
2078
2079         arch_spin_unlock(&global_trace.max_lock);
2080
2081  out:
2082         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2083 }
2084
2085 static void tracing_stop_tr(struct trace_array *tr)
2086 {
2087         struct ring_buffer *buffer;
2088         unsigned long flags;
2089
2090         /* If global, we need to also stop the max tracer */
2091         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2092                 return tracing_stop();
2093
2094         raw_spin_lock_irqsave(&tr->start_lock, flags);
2095         if (tr->stop_count++)
2096                 goto out;
2097
2098         buffer = tr->trace_buffer.buffer;
2099         if (buffer)
2100                 ring_buffer_record_disable(buffer);
2101
2102  out:
2103         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2104 }
2105
2106 static int trace_save_cmdline(struct task_struct *tsk)
2107 {
2108         unsigned pid, idx;
2109
2110         /* treat recording of idle task as a success */
2111         if (!tsk->pid)
2112                 return 1;
2113
2114         if (unlikely(tsk->pid > PID_MAX_DEFAULT))
2115                 return 0;
2116
2117         /*
2118          * It's not the end of the world if we don't get
2119          * the lock, but we also don't want to spin
2120          * nor do we want to disable interrupts,
2121          * so if we miss here, then better luck next time.
2122          */
2123         if (!arch_spin_trylock(&trace_cmdline_lock))
2124                 return 0;
2125
2126         idx = savedcmd->map_pid_to_cmdline[tsk->pid];
2127         if (idx == NO_CMDLINE_MAP) {
2128                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2129
2130                 /*
2131                  * Check whether the cmdline buffer at idx has a pid
2132                  * mapped. We are going to overwrite that entry so we
2133                  * need to clear the map_pid_to_cmdline. Otherwise we
2134                  * would read the new comm for the old pid.
2135                  */
2136                 pid = savedcmd->map_cmdline_to_pid[idx];
2137                 if (pid != NO_CMDLINE_MAP)
2138                         savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
2139
2140                 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2141                 savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
2142
2143                 savedcmd->cmdline_idx = idx;
2144         }
2145
2146         set_cmdline(idx, tsk->comm);
2147
2148         arch_spin_unlock(&trace_cmdline_lock);
2149
2150         return 1;
2151 }
2152
2153 static void __trace_find_cmdline(int pid, char comm[])
2154 {
2155         unsigned map;
2156
2157         if (!pid) {
2158                 strcpy(comm, "<idle>");
2159                 return;
2160         }
2161
2162         if (WARN_ON_ONCE(pid < 0)) {
2163                 strcpy(comm, "<XXX>");
2164                 return;
2165         }
2166
2167         if (pid > PID_MAX_DEFAULT) {
2168                 strcpy(comm, "<...>");
2169                 return;
2170         }
2171
2172         map = savedcmd->map_pid_to_cmdline[pid];
2173         if (map != NO_CMDLINE_MAP)
2174                 strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2175         else
2176                 strcpy(comm, "<...>");
2177 }
2178
2179 void trace_find_cmdline(int pid, char comm[])
2180 {
2181         preempt_disable();
2182         arch_spin_lock(&trace_cmdline_lock);
2183
2184         __trace_find_cmdline(pid, comm);
2185
2186         arch_spin_unlock(&trace_cmdline_lock);
2187         preempt_enable();
2188 }
2189
2190 int trace_find_tgid(int pid)
2191 {
2192         if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2193                 return 0;
2194
2195         return tgid_map[pid];
2196 }
2197
2198 static int trace_save_tgid(struct task_struct *tsk)
2199 {
2200         /* treat recording of idle task as a success */
2201         if (!tsk->pid)
2202                 return 1;
2203
2204         if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2205                 return 0;
2206
2207         tgid_map[tsk->pid] = tsk->tgid;
2208         return 1;
2209 }
2210
2211 static bool tracing_record_taskinfo_skip(int flags)
2212 {
2213         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2214                 return true;
2215         if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2216                 return true;
2217         if (!__this_cpu_read(trace_taskinfo_save))
2218                 return true;
2219         return false;
2220 }
2221
2222 /**
2223  * tracing_record_taskinfo - record the task info of a task
2224  *
2225  * @task  - task to record
2226  * @flags - TRACE_RECORD_CMDLINE for recording comm
2227  *        - TRACE_RECORD_TGID for recording tgid
2228  */
2229 void tracing_record_taskinfo(struct task_struct *task, int flags)
2230 {
2231         bool done;
2232
2233         if (tracing_record_taskinfo_skip(flags))
2234                 return;
2235
2236         /*
2237          * Record as much task information as possible. If some fail, continue
2238          * to try to record the others.
2239          */
2240         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2241         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2242
2243         /* If recording any information failed, retry again soon. */
2244         if (!done)
2245                 return;
2246
2247         __this_cpu_write(trace_taskinfo_save, false);
2248 }
2249
2250 /**
2251  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2252  *
2253  * @prev - previous task during sched_switch
2254  * @next - next task during sched_switch
2255  * @flags - TRACE_RECORD_CMDLINE for recording comm
2256  *          TRACE_RECORD_TGID for recording tgid
2257  */
2258 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2259                                           struct task_struct *next, int flags)
2260 {
2261         bool done;
2262
2263         if (tracing_record_taskinfo_skip(flags))
2264                 return;
2265
2266         /*
2267          * Record as much task information as possible. If some fail, continue
2268          * to try to record the others.
2269          */
2270         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2271         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2272         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2273         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2274
2275         /* If recording any information failed, retry again soon. */
2276         if (!done)
2277                 return;
2278
2279         __this_cpu_write(trace_taskinfo_save, false);
2280 }
2281
2282 /* Helpers to record a specific task information */
2283 void tracing_record_cmdline(struct task_struct *task)
2284 {
2285         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2286 }
2287
2288 void tracing_record_tgid(struct task_struct *task)
2289 {
2290         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2291 }
2292
2293 /*
2294  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2295  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2296  * simplifies those functions and keeps them in sync.
2297  */
2298 enum print_line_t trace_handle_return(struct trace_seq *s)
2299 {
2300         return trace_seq_has_overflowed(s) ?
2301                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2302 }
2303 EXPORT_SYMBOL_GPL(trace_handle_return);
2304
2305 void
2306 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
2307                              int pc)
2308 {
2309         struct task_struct *tsk = current;
2310
2311         entry->preempt_count            = pc & 0xff;
2312         entry->pid                      = (tsk) ? tsk->pid : 0;
2313         entry->flags =
2314 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2315                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2316 #else
2317                 TRACE_FLAG_IRQS_NOSUPPORT |
2318 #endif
2319                 ((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
2320                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2321                 ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2322                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2323                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2324 }
2325 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2326
2327 struct ring_buffer_event *
2328 trace_buffer_lock_reserve(struct ring_buffer *buffer,
2329                           int type,
2330                           unsigned long len,
2331                           unsigned long flags, int pc)
2332 {
2333         return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2334 }
2335
2336 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2337 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2338 static int trace_buffered_event_ref;
2339
2340 /**
2341  * trace_buffered_event_enable - enable buffering events
2342  *
2343  * When events are being filtered, it is quicker to use a temporary
2344  * buffer to write the event data into if there's a likely chance
2345  * that it will not be committed. The discard of the ring buffer
2346  * is not as fast as committing, and is much slower than copying
2347  * a commit.
2348  *
2349  * When an event is to be filtered, allocate per cpu buffers to
2350  * write the event data into, and if the event is filtered and discarded
2351  * it is simply dropped, otherwise, the entire data is to be committed
2352  * in one shot.
2353  */
2354 void trace_buffered_event_enable(void)
2355 {
2356         struct ring_buffer_event *event;
2357         struct page *page;
2358         int cpu;
2359
2360         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2361
2362         if (trace_buffered_event_ref++)
2363                 return;
2364
2365         for_each_tracing_cpu(cpu) {
2366                 page = alloc_pages_node(cpu_to_node(cpu),
2367                                         GFP_KERNEL | __GFP_NORETRY, 0);
2368                 if (!page)
2369                         goto failed;
2370
2371                 event = page_address(page);
2372                 memset(event, 0, sizeof(*event));
2373
2374                 per_cpu(trace_buffered_event, cpu) = event;
2375
2376                 preempt_disable();
2377                 if (cpu == smp_processor_id() &&
2378                     this_cpu_read(trace_buffered_event) !=
2379                     per_cpu(trace_buffered_event, cpu))
2380                         WARN_ON_ONCE(1);
2381                 preempt_enable();
2382         }
2383
2384         return;
2385  failed:
2386         trace_buffered_event_disable();
2387 }
2388
2389 static void enable_trace_buffered_event(void *data)
2390 {
2391         /* Probably not needed, but do it anyway */
2392         smp_rmb();
2393         this_cpu_dec(trace_buffered_event_cnt);
2394 }
2395
2396 static void disable_trace_buffered_event(void *data)
2397 {
2398         this_cpu_inc(trace_buffered_event_cnt);
2399 }
2400
2401 /**
2402  * trace_buffered_event_disable - disable buffering events
2403  *
2404  * When a filter is removed, it is faster to not use the buffered
2405  * events, and to commit directly into the ring buffer. Free up
2406  * the temp buffers when there are no more users. This requires
2407  * special synchronization with current events.
2408  */
2409 void trace_buffered_event_disable(void)
2410 {
2411         int cpu;
2412
2413         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2414
2415         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2416                 return;
2417
2418         if (--trace_buffered_event_ref)
2419                 return;
2420
2421         preempt_disable();
2422         /* For each CPU, set the buffer as used. */
2423         smp_call_function_many(tracing_buffer_mask,
2424                                disable_trace_buffered_event, NULL, 1);
2425         preempt_enable();
2426
2427         /* Wait for all current users to finish */
2428         synchronize_rcu();
2429
2430         for_each_tracing_cpu(cpu) {
2431                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2432                 per_cpu(trace_buffered_event, cpu) = NULL;
2433         }
2434         /*
2435          * Make sure trace_buffered_event is NULL before clearing
2436          * trace_buffered_event_cnt.
2437          */
2438         smp_wmb();
2439
2440         preempt_disable();
2441         /* Do the work on each cpu */
2442         smp_call_function_many(tracing_buffer_mask,
2443                                enable_trace_buffered_event, NULL, 1);
2444         preempt_enable();
2445 }
2446
2447 static struct ring_buffer *temp_buffer;
2448
2449 struct ring_buffer_event *
2450 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2451                           struct trace_event_file *trace_file,
2452                           int type, unsigned long len,
2453                           unsigned long flags, int pc)
2454 {
2455         struct ring_buffer_event *entry;
2456         int val;
2457
2458         *current_rb = trace_file->tr->trace_buffer.buffer;
2459
2460         if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
2461              (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2462             (entry = this_cpu_read(trace_buffered_event))) {
2463                 /* Try to use the per cpu buffer first */
2464                 val = this_cpu_inc_return(trace_buffered_event_cnt);
2465                 if (val == 1) {
2466                         trace_event_setup(entry, type, flags, pc);
2467                         entry->array[0] = len;
2468                         return entry;
2469                 }
2470                 this_cpu_dec(trace_buffered_event_cnt);
2471         }
2472
2473         entry = __trace_buffer_lock_reserve(*current_rb,
2474                                             type, len, flags, pc);
2475         /*
2476          * If tracing is off, but we have triggers enabled
2477          * we still need to look at the event data. Use the temp_buffer
2478          * to store the trace event for the tigger to use. It's recusive
2479          * safe and will not be recorded anywhere.
2480          */
2481         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2482                 *current_rb = temp_buffer;
2483                 entry = __trace_buffer_lock_reserve(*current_rb,
2484                                                     type, len, flags, pc);
2485         }
2486         return entry;
2487 }
2488 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2489
2490 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2491 static DEFINE_MUTEX(tracepoint_printk_mutex);
2492
2493 static void output_printk(struct trace_event_buffer *fbuffer)
2494 {
2495         struct trace_event_call *event_call;
2496         struct trace_event *event;
2497         unsigned long flags;
2498         struct trace_iterator *iter = tracepoint_print_iter;
2499
2500         /* We should never get here if iter is NULL */
2501         if (WARN_ON_ONCE(!iter))
2502                 return;
2503
2504         event_call = fbuffer->trace_file->event_call;
2505         if (!event_call || !event_call->event.funcs ||
2506             !event_call->event.funcs->trace)
2507                 return;
2508
2509         event = &fbuffer->trace_file->event_call->event;
2510
2511         spin_lock_irqsave(&tracepoint_iter_lock, flags);
2512         trace_seq_init(&iter->seq);
2513         iter->ent = fbuffer->entry;
2514         event_call->event.funcs->trace(iter, 0, event);
2515         trace_seq_putc(&iter->seq, 0);
2516         printk("%s", iter->seq.buffer);
2517
2518         spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2519 }
2520
2521 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2522                              void __user *buffer, size_t *lenp,
2523                              loff_t *ppos)
2524 {
2525         int save_tracepoint_printk;
2526         int ret;
2527
2528         mutex_lock(&tracepoint_printk_mutex);
2529         save_tracepoint_printk = tracepoint_printk;
2530
2531         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2532
2533         /*
2534          * This will force exiting early, as tracepoint_printk
2535          * is always zero when tracepoint_printk_iter is not allocated
2536          */
2537         if (!tracepoint_print_iter)
2538                 tracepoint_printk = 0;
2539
2540         if (save_tracepoint_printk == tracepoint_printk)
2541                 goto out;
2542
2543         if (tracepoint_printk)
2544                 static_key_enable(&tracepoint_printk_key.key);
2545         else
2546                 static_key_disable(&tracepoint_printk_key.key);
2547
2548  out:
2549         mutex_unlock(&tracepoint_printk_mutex);
2550
2551         return ret;
2552 }
2553
2554 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2555 {
2556         if (static_key_false(&tracepoint_printk_key.key))
2557                 output_printk(fbuffer);
2558
2559         event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer,
2560                                     fbuffer->event, fbuffer->entry,
2561                                     fbuffer->flags, fbuffer->pc);
2562 }
2563 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2564
2565 /*
2566  * Skip 3:
2567  *
2568  *   trace_buffer_unlock_commit_regs()
2569  *   trace_event_buffer_commit()
2570  *   trace_event_raw_event_xxx()
2571  */
2572 # define STACK_SKIP 3
2573
2574 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2575                                      struct ring_buffer *buffer,
2576                                      struct ring_buffer_event *event,
2577                                      unsigned long flags, int pc,
2578                                      struct pt_regs *regs)
2579 {
2580         __buffer_unlock_commit(buffer, event);
2581
2582         /*
2583          * If regs is not set, then skip the necessary functions.
2584          * Note, we can still get here via blktrace, wakeup tracer
2585          * and mmiotrace, but that's ok if they lose a function or
2586          * two. They are not that meaningful.
2587          */
2588         ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
2589         ftrace_trace_userstack(buffer, flags, pc);
2590 }
2591
2592 /*
2593  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2594  */
2595 void
2596 trace_buffer_unlock_commit_nostack(struct ring_buffer *buffer,
2597                                    struct ring_buffer_event *event)
2598 {
2599         __buffer_unlock_commit(buffer, event);
2600 }
2601
2602 static void
2603 trace_process_export(struct trace_export *export,
2604                struct ring_buffer_event *event)
2605 {
2606         struct trace_entry *entry;
2607         unsigned int size = 0;
2608
2609         entry = ring_buffer_event_data(event);
2610         size = ring_buffer_event_length(event);
2611         export->write(export, entry, size);
2612 }
2613
2614 static DEFINE_MUTEX(ftrace_export_lock);
2615
2616 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2617
2618 static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2619
2620 static inline void ftrace_exports_enable(void)
2621 {
2622         static_branch_enable(&ftrace_exports_enabled);
2623 }
2624
2625 static inline void ftrace_exports_disable(void)
2626 {
2627         static_branch_disable(&ftrace_exports_enabled);
2628 }
2629
2630 static void ftrace_exports(struct ring_buffer_event *event)
2631 {
2632         struct trace_export *export;
2633
2634         preempt_disable_notrace();
2635
2636         export = rcu_dereference_raw_notrace(ftrace_exports_list);
2637         while (export) {
2638                 trace_process_export(export, event);
2639                 export = rcu_dereference_raw_notrace(export->next);
2640         }
2641
2642         preempt_enable_notrace();
2643 }
2644
2645 static inline void
2646 add_trace_export(struct trace_export **list, struct trace_export *export)
2647 {
2648         rcu_assign_pointer(export->next, *list);
2649         /*
2650          * We are entering export into the list but another
2651          * CPU might be walking that list. We need to make sure
2652          * the export->next pointer is valid before another CPU sees
2653          * the export pointer included into the list.
2654          */
2655         rcu_assign_pointer(*list, export);
2656 }
2657
2658 static inline int
2659 rm_trace_export(struct trace_export **list, struct trace_export *export)
2660 {
2661         struct trace_export **p;
2662
2663         for (p = list; *p != NULL; p = &(*p)->next)
2664                 if (*p == export)
2665                         break;
2666
2667         if (*p != export)
2668                 return -1;
2669
2670         rcu_assign_pointer(*p, (*p)->next);
2671
2672         return 0;
2673 }
2674
2675 static inline void
2676 add_ftrace_export(struct trace_export **list, struct trace_export *export)
2677 {
2678         if (*list == NULL)
2679                 ftrace_exports_enable();
2680
2681         add_trace_export(list, export);
2682 }
2683
2684 static inline int
2685 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2686 {
2687         int ret;
2688
2689         ret = rm_trace_export(list, export);
2690         if (*list == NULL)
2691                 ftrace_exports_disable();
2692
2693         return ret;
2694 }
2695
2696 int register_ftrace_export(struct trace_export *export)
2697 {
2698         if (WARN_ON_ONCE(!export->write))
2699                 return -1;
2700
2701         mutex_lock(&ftrace_export_lock);
2702
2703         add_ftrace_export(&ftrace_exports_list, export);
2704
2705         mutex_unlock(&ftrace_export_lock);
2706
2707         return 0;
2708 }
2709 EXPORT_SYMBOL_GPL(register_ftrace_export);
2710
2711 int unregister_ftrace_export(struct trace_export *export)
2712 {
2713         int ret;
2714
2715         mutex_lock(&ftrace_export_lock);
2716
2717         ret = rm_ftrace_export(&ftrace_exports_list, export);
2718
2719         mutex_unlock(&ftrace_export_lock);
2720
2721         return ret;
2722 }
2723 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2724
2725 void
2726 trace_function(struct trace_array *tr,
2727                unsigned long ip, unsigned long parent_ip, unsigned long flags,
2728                int pc)
2729 {
2730         struct trace_event_call *call = &event_function;
2731         struct ring_buffer *buffer = tr->trace_buffer.buffer;
2732         struct ring_buffer_event *event;
2733         struct ftrace_entry *entry;
2734
2735         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2736                                             flags, pc);
2737         if (!event)
2738                 return;
2739         entry   = ring_buffer_event_data(event);
2740         entry->ip                       = ip;
2741         entry->parent_ip                = parent_ip;
2742
2743         if (!call_filter_check_discard(call, entry, buffer, event)) {
2744                 if (static_branch_unlikely(&ftrace_exports_enabled))
2745                         ftrace_exports(event);
2746                 __buffer_unlock_commit(buffer, event);
2747         }
2748 }
2749
2750 #ifdef CONFIG_STACKTRACE
2751
2752 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
2753 struct ftrace_stack {
2754         unsigned long           calls[FTRACE_STACK_MAX_ENTRIES];
2755 };
2756
2757 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
2758 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2759
2760 static void __ftrace_trace_stack(struct ring_buffer *buffer,
2761                                  unsigned long flags,
2762                                  int skip, int pc, struct pt_regs *regs)
2763 {
2764         struct trace_event_call *call = &event_kernel_stack;
2765         struct ring_buffer_event *event;
2766         struct stack_entry *entry;
2767         struct stack_trace trace;
2768         int use_stack;
2769         int size = FTRACE_STACK_ENTRIES;
2770
2771         trace.nr_entries        = 0;
2772         trace.skip              = skip;
2773
2774         /*
2775          * Add one, for this function and the call to save_stack_trace()
2776          * If regs is set, then these functions will not be in the way.
2777          */
2778 #ifndef CONFIG_UNWINDER_ORC
2779         if (!regs)
2780                 trace.skip++;
2781 #endif
2782
2783         /*
2784          * Since events can happen in NMIs there's no safe way to
2785          * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2786          * or NMI comes in, it will just have to use the default
2787          * FTRACE_STACK_SIZE.
2788          */
2789         preempt_disable_notrace();
2790
2791         use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
2792         /*
2793          * We don't need any atomic variables, just a barrier.
2794          * If an interrupt comes in, we don't care, because it would
2795          * have exited and put the counter back to what we want.
2796          * We just need a barrier to keep gcc from moving things
2797          * around.
2798          */
2799         barrier();
2800         if (use_stack == 1) {
2801                 trace.entries           = this_cpu_ptr(ftrace_stack.calls);
2802                 trace.max_entries       = FTRACE_STACK_MAX_ENTRIES;
2803
2804                 if (regs)
2805                         save_stack_trace_regs(regs, &trace);
2806                 else
2807                         save_stack_trace(&trace);
2808
2809                 if (trace.nr_entries > size)
2810                         size = trace.nr_entries;
2811         } else
2812                 /* From now on, use_stack is a boolean */
2813                 use_stack = 0;
2814
2815         size *= sizeof(unsigned long);
2816
2817         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2818                                             sizeof(*entry) + size, flags, pc);
2819         if (!event)
2820                 goto out;
2821         entry = ring_buffer_event_data(event);
2822
2823         memset(&entry->caller, 0, size);
2824
2825         if (use_stack)
2826                 memcpy(&entry->caller, trace.entries,
2827                        trace.nr_entries * sizeof(unsigned long));
2828         else {
2829                 trace.max_entries       = FTRACE_STACK_ENTRIES;
2830                 trace.entries           = entry->caller;
2831                 if (regs)
2832                         save_stack_trace_regs(regs, &trace);
2833                 else
2834                         save_stack_trace(&trace);
2835         }
2836
2837         entry->size = trace.nr_entries;
2838
2839         if (!call_filter_check_discard(call, entry, buffer, event))
2840                 __buffer_unlock_commit(buffer, event);
2841
2842  out:
2843         /* Again, don't let gcc optimize things here */
2844         barrier();
2845         __this_cpu_dec(ftrace_stack_reserve);
2846         preempt_enable_notrace();
2847
2848 }
2849
2850 static inline void ftrace_trace_stack(struct trace_array *tr,
2851                                       struct ring_buffer *buffer,
2852                                       unsigned long flags,
2853                                       int skip, int pc, struct pt_regs *regs)
2854 {
2855         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2856                 return;
2857
2858         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
2859 }
2860
2861 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2862                    int pc)
2863 {
2864         struct ring_buffer *buffer = tr->trace_buffer.buffer;
2865
2866         if (rcu_is_watching()) {
2867                 __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2868                 return;
2869         }
2870
2871         /*
2872          * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
2873          * but if the above rcu_is_watching() failed, then the NMI
2874          * triggered someplace critical, and rcu_irq_enter() should
2875          * not be called from NMI.
2876          */
2877         if (unlikely(in_nmi()))
2878                 return;
2879
2880         rcu_irq_enter_irqson();
2881         __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2882         rcu_irq_exit_irqson();
2883 }
2884
2885 /**
2886  * trace_dump_stack - record a stack back trace in the trace buffer
2887  * @skip: Number of functions to skip (helper handlers)
2888  */
2889 void trace_dump_stack(int skip)
2890 {
2891         unsigned long flags;
2892
2893         if (tracing_disabled || tracing_selftest_running)
2894                 return;
2895
2896         local_save_flags(flags);
2897
2898 #ifndef CONFIG_UNWINDER_ORC
2899         /* Skip 1 to skip this function. */
2900         skip++;
2901 #endif
2902         __ftrace_trace_stack(global_trace.trace_buffer.buffer,
2903                              flags, skip, preempt_count(), NULL);
2904 }
2905 EXPORT_SYMBOL_GPL(trace_dump_stack);
2906
2907 static DEFINE_PER_CPU(int, user_stack_count);
2908
2909 void
2910 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
2911 {
2912         struct trace_event_call *call = &event_user_stack;
2913         struct ring_buffer_event *event;
2914         struct userstack_entry *entry;
2915         struct stack_trace trace;
2916
2917         if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
2918                 return;
2919
2920         /*
2921          * NMIs can not handle page faults, even with fix ups.
2922          * The save user stack can (and often does) fault.
2923          */
2924         if (unlikely(in_nmi()))
2925                 return;
2926
2927         /*
2928          * prevent recursion, since the user stack tracing may
2929          * trigger other kernel events.
2930          */
2931         preempt_disable();
2932         if (__this_cpu_read(user_stack_count))
2933                 goto out;
2934
2935         __this_cpu_inc(user_stack_count);
2936
2937         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2938                                             sizeof(*entry), flags, pc);
2939         if (!event)
2940                 goto out_drop_count;
2941         entry   = ring_buffer_event_data(event);
2942
2943         entry->tgid             = current->tgid;
2944         memset(&entry->caller, 0, sizeof(entry->caller));
2945
2946         trace.nr_entries        = 0;
2947         trace.max_entries       = FTRACE_STACK_ENTRIES;
2948         trace.skip              = 0;
2949         trace.entries           = entry->caller;
2950
2951         save_stack_trace_user(&trace);
2952         if (!call_filter_check_discard(call, entry, buffer, event))
2953                 __buffer_unlock_commit(buffer, event);
2954
2955  out_drop_count:
2956         __this_cpu_dec(user_stack_count);
2957  out:
2958         preempt_enable();
2959 }
2960
2961 #ifdef UNUSED
2962 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
2963 {
2964         ftrace_trace_userstack(tr, flags, preempt_count());
2965 }
2966 #endif /* UNUSED */
2967
2968 #endif /* CONFIG_STACKTRACE */
2969
2970 /* created for use with alloc_percpu */
2971 struct trace_buffer_struct {
2972         int nesting;
2973         char buffer[4][TRACE_BUF_SIZE];
2974 };
2975
2976 static struct trace_buffer_struct *trace_percpu_buffer;
2977
2978 /*
2979  * Thise allows for lockless recording.  If we're nested too deeply, then
2980  * this returns NULL.
2981  */
2982 static char *get_trace_buf(void)
2983 {
2984         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
2985
2986         if (!buffer || buffer->nesting >= 4)
2987                 return NULL;
2988
2989         buffer->nesting++;
2990
2991         /* Interrupts must see nesting incremented before we use the buffer */
2992         barrier();
2993         return &buffer->buffer[buffer->nesting][0];
2994 }
2995
2996 static void put_trace_buf(void)
2997 {
2998         /* Don't let the decrement of nesting leak before this */
2999         barrier();
3000         this_cpu_dec(trace_percpu_buffer->nesting);
3001 }
3002
3003 static int alloc_percpu_trace_buffer(void)
3004 {
3005         struct trace_buffer_struct *buffers;
3006
3007         buffers = alloc_percpu(struct trace_buffer_struct);
3008         if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
3009                 return -ENOMEM;
3010
3011         trace_percpu_buffer = buffers;
3012         return 0;
3013 }
3014
3015 static int buffers_allocated;
3016
3017 void trace_printk_init_buffers(void)
3018 {
3019         if (buffers_allocated)
3020                 return;
3021
3022         if (alloc_percpu_trace_buffer())
3023                 return;
3024
3025         /* trace_printk() is for debug use only. Don't use it in production. */
3026
3027         pr_warn("\n");
3028         pr_warn("**********************************************************\n");
3029         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3030         pr_warn("**                                                      **\n");
3031         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3032         pr_warn("**                                                      **\n");
3033         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3034         pr_warn("** unsafe for production use.                           **\n");
3035         pr_warn("**                                                      **\n");
3036         pr_warn("** If you see this message and you are not debugging    **\n");
3037         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3038         pr_warn("**                                                      **\n");
3039         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3040         pr_warn("**********************************************************\n");
3041
3042         /* Expand the buffers to set size */
3043         tracing_update_buffers();
3044
3045         buffers_allocated = 1;
3046
3047         /*
3048          * trace_printk_init_buffers() can be called by modules.
3049          * If that happens, then we need to start cmdline recording
3050          * directly here. If the global_trace.buffer is already
3051          * allocated here, then this was called by module code.
3052          */
3053         if (global_trace.trace_buffer.buffer)
3054                 tracing_start_cmdline_record();
3055 }
3056 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3057
3058 void trace_printk_start_comm(void)
3059 {
3060         /* Start tracing comms if trace printk is set */
3061         if (!buffers_allocated)
3062                 return;
3063         tracing_start_cmdline_record();
3064 }
3065
3066 static void trace_printk_start_stop_comm(int enabled)
3067 {
3068         if (!buffers_allocated)
3069                 return;
3070
3071         if (enabled)
3072                 tracing_start_cmdline_record();
3073         else
3074                 tracing_stop_cmdline_record();
3075 }
3076
3077 /**
3078  * trace_vbprintk - write binary msg to tracing buffer
3079  *
3080  */
3081 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3082 {
3083         struct trace_event_call *call = &event_bprint;
3084         struct ring_buffer_event *event;
3085         struct ring_buffer *buffer;
3086         struct trace_array *tr = &global_trace;
3087         struct bprint_entry *entry;
3088         unsigned long flags;
3089         char *tbuffer;
3090         int len = 0, size, pc;
3091
3092         if (unlikely(tracing_selftest_running || tracing_disabled))
3093                 return 0;
3094
3095         /* Don't pollute graph traces with trace_vprintk internals */
3096         pause_graph_tracing();
3097
3098         pc = preempt_count();
3099         preempt_disable_notrace();
3100
3101         tbuffer = get_trace_buf();
3102         if (!tbuffer) {
3103                 len = 0;
3104                 goto out_nobuffer;
3105         }
3106
3107         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3108
3109         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3110                 goto out;
3111
3112         local_save_flags(flags);
3113         size = sizeof(*entry) + sizeof(u32) * len;
3114         buffer = tr->trace_buffer.buffer;
3115         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3116                                             flags, pc);
3117         if (!event)
3118                 goto out;
3119         entry = ring_buffer_event_data(event);
3120         entry->ip                       = ip;
3121         entry->fmt                      = fmt;
3122
3123         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3124         if (!call_filter_check_discard(call, entry, buffer, event)) {
3125                 __buffer_unlock_commit(buffer, event);
3126                 ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
3127         }
3128
3129 out:
3130         put_trace_buf();
3131
3132 out_nobuffer:
3133         preempt_enable_notrace();
3134         unpause_graph_tracing();
3135
3136         return len;
3137 }
3138 EXPORT_SYMBOL_GPL(trace_vbprintk);
3139
3140 __printf(3, 0)
3141 static int
3142 __trace_array_vprintk(struct ring_buffer *buffer,
3143                       unsigned long ip, const char *fmt, va_list args)
3144 {
3145         struct trace_event_call *call = &event_print;
3146         struct ring_buffer_event *event;
3147         int len = 0, size, pc;
3148         struct print_entry *entry;
3149         unsigned long flags;
3150         char *tbuffer;
3151
3152         if (tracing_disabled || tracing_selftest_running)
3153                 return 0;
3154
3155         /* Don't pollute graph traces with trace_vprintk internals */
3156         pause_graph_tracing();
3157
3158         pc = preempt_count();
3159         preempt_disable_notrace();
3160
3161
3162         tbuffer = get_trace_buf();
3163         if (!tbuffer) {
3164                 len = 0;
3165                 goto out_nobuffer;
3166         }
3167
3168         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3169
3170         local_save_flags(flags);
3171         size = sizeof(*entry) + len + 1;
3172         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3173                                             flags, pc);
3174         if (!event)
3175                 goto out;
3176         entry = ring_buffer_event_data(event);
3177         entry->ip = ip;
3178
3179         memcpy(&entry->buf, tbuffer, len + 1);
3180         if (!call_filter_check_discard(call, entry, buffer, event)) {
3181                 __buffer_unlock_commit(buffer, event);
3182                 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
3183         }
3184
3185 out:
3186         put_trace_buf();
3187
3188 out_nobuffer:
3189         preempt_enable_notrace();
3190         unpause_graph_tracing();
3191
3192         return len;
3193 }
3194
3195 __printf(3, 0)
3196 int trace_array_vprintk(struct trace_array *tr,
3197                         unsigned long ip, const char *fmt, va_list args)
3198 {
3199         return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
3200 }
3201
3202 __printf(3, 0)
3203 int trace_array_printk(struct trace_array *tr,
3204                        unsigned long ip, const char *fmt, ...)
3205 {
3206         int ret;
3207         va_list ap;
3208
3209         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3210                 return 0;
3211
3212         va_start(ap, fmt);
3213         ret = trace_array_vprintk(tr, ip, fmt, ap);
3214         va_end(ap);
3215         return ret;
3216 }
3217 EXPORT_SYMBOL_GPL(trace_array_printk);
3218
3219 __printf(3, 4)
3220 int trace_array_printk_buf(struct ring_buffer *buffer,
3221                            unsigned long ip, const char *fmt, ...)
3222 {
3223         int ret;
3224         va_list ap;
3225
3226         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3227                 return 0;
3228
3229         va_start(ap, fmt);
3230         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3231         va_end(ap);
3232         return ret;
3233 }
3234
3235 __printf(2, 0)
3236 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3237 {
3238         return trace_array_vprintk(&global_trace, ip, fmt, args);
3239 }
3240 EXPORT_SYMBOL_GPL(trace_vprintk);
3241
3242 static void trace_iterator_increment(struct trace_iterator *iter)
3243 {
3244         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3245
3246         iter->idx++;
3247         if (buf_iter)
3248                 ring_buffer_read(buf_iter, NULL);
3249 }
3250
3251 static struct trace_entry *
3252 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3253                 unsigned long *lost_events)
3254 {
3255         struct ring_buffer_event *event;
3256         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3257
3258         if (buf_iter)
3259                 event = ring_buffer_iter_peek(buf_iter, ts);
3260         else
3261                 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
3262                                          lost_events);
3263
3264         if (event) {
3265                 iter->ent_size = ring_buffer_event_length(event);
3266                 return ring_buffer_event_data(event);
3267         }
3268         iter->ent_size = 0;
3269         return NULL;
3270 }
3271
3272 static struct trace_entry *
3273 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3274                   unsigned long *missing_events, u64 *ent_ts)
3275 {
3276         struct ring_buffer *buffer = iter->trace_buffer->buffer;
3277         struct trace_entry *ent, *next = NULL;
3278         unsigned long lost_events = 0, next_lost = 0;
3279         int cpu_file = iter->cpu_file;
3280         u64 next_ts = 0, ts;
3281         int next_cpu = -1;
3282         int next_size = 0;
3283         int cpu;
3284
3285         /*
3286          * If we are in a per_cpu trace file, don't bother by iterating over
3287          * all cpu and peek directly.
3288          */
3289         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3290                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3291                         return NULL;
3292                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3293                 if (ent_cpu)
3294                         *ent_cpu = cpu_file;
3295
3296                 return ent;
3297         }
3298
3299         for_each_tracing_cpu(cpu) {
3300
3301                 if (ring_buffer_empty_cpu(buffer, cpu))
3302                         continue;
3303
3304                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3305
3306                 /*
3307                  * Pick the entry with the smallest timestamp:
3308                  */
3309                 if (ent && (!next || ts < next_ts)) {
3310                         next = ent;
3311                         next_cpu = cpu;
3312                         next_ts = ts;
3313                         next_lost = lost_events;
3314                         next_size = iter->ent_size;
3315                 }
3316         }
3317
3318         iter->ent_size = next_size;
3319
3320         if (ent_cpu)
3321                 *ent_cpu = next_cpu;
3322
3323         if (ent_ts)
3324                 *ent_ts = next_ts;
3325
3326         if (missing_events)
3327                 *missing_events = next_lost;
3328
3329         return next;
3330 }
3331
3332 /* Find the next real entry, without updating the iterator itself */
3333 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3334                                           int *ent_cpu, u64 *ent_ts)
3335 {
3336         return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3337 }
3338
3339 /* Find the next real entry, and increment the iterator to the next entry */
3340 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3341 {
3342         iter->ent = __find_next_entry(iter, &iter->cpu,
3343                                       &iter->lost_events, &iter->ts);
3344
3345         if (iter->ent)
3346                 trace_iterator_increment(iter);
3347
3348         return iter->ent ? iter : NULL;
3349 }
3350
3351 static void trace_consume(struct trace_iterator *iter)
3352 {
3353         ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
3354                             &iter->lost_events);
3355 }
3356
3357 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3358 {
3359         struct trace_iterator *iter = m->private;
3360         int i = (int)*pos;
3361         void *ent;
3362
3363         WARN_ON_ONCE(iter->leftover);
3364
3365         (*pos)++;
3366
3367         /* can't go backwards */
3368         if (iter->idx > i)
3369                 return NULL;
3370
3371         if (iter->idx < 0)
3372                 ent = trace_find_next_entry_inc(iter);
3373         else
3374                 ent = iter;
3375
3376         while (ent && iter->idx < i)
3377                 ent = trace_find_next_entry_inc(iter);
3378
3379         iter->pos = *pos;
3380
3381         return ent;
3382 }
3383
3384 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3385 {
3386         struct ring_buffer_event *event;
3387         struct ring_buffer_iter *buf_iter;
3388         unsigned long entries = 0;
3389         u64 ts;
3390
3391         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
3392
3393         buf_iter = trace_buffer_iter(iter, cpu);
3394         if (!buf_iter)
3395                 return;
3396
3397         ring_buffer_iter_reset(buf_iter);
3398
3399         /*
3400          * We could have the case with the max latency tracers
3401          * that a reset never took place on a cpu. This is evident
3402          * by the timestamp being before the start of the buffer.
3403          */
3404         while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
3405                 if (ts >= iter->trace_buffer->time_start)
3406                         break;
3407                 entries++;
3408                 ring_buffer_read(buf_iter, NULL);
3409         }
3410
3411         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
3412 }
3413
3414 /*
3415  * The current tracer is copied to avoid a global locking
3416  * all around.
3417  */
3418 static void *s_start(struct seq_file *m, loff_t *pos)
3419 {
3420         struct trace_iterator *iter = m->private;
3421         struct trace_array *tr = iter->tr;
3422         int cpu_file = iter->cpu_file;
3423         void *p = NULL;
3424         loff_t l = 0;
3425         int cpu;
3426
3427         /*
3428          * copy the tracer to avoid using a global lock all around.
3429          * iter->trace is a copy of current_trace, the pointer to the
3430          * name may be used instead of a strcmp(), as iter->trace->name
3431          * will point to the same string as current_trace->name.
3432          */
3433         mutex_lock(&trace_types_lock);
3434         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3435                 *iter->trace = *tr->current_trace;
3436         mutex_unlock(&trace_types_lock);
3437
3438 #ifdef CONFIG_TRACER_MAX_TRACE
3439         if (iter->snapshot && iter->trace->use_max_tr)
3440                 return ERR_PTR(-EBUSY);
3441 #endif
3442
3443         if (!iter->snapshot)
3444                 atomic_inc(&trace_record_taskinfo_disabled);
3445
3446         if (*pos != iter->pos) {
3447                 iter->ent = NULL;
3448                 iter->cpu = 0;
3449                 iter->idx = -1;
3450
3451                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
3452                         for_each_tracing_cpu(cpu)
3453                                 tracing_iter_reset(iter, cpu);
3454                 } else
3455                         tracing_iter_reset(iter, cpu_file);
3456
3457                 iter->leftover = 0;
3458                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3459                         ;
3460
3461         } else {
3462                 /*
3463                  * If we overflowed the seq_file before, then we want
3464                  * to just reuse the trace_seq buffer again.
3465                  */
3466                 if (iter->leftover)
3467                         p = iter;
3468                 else {
3469                         l = *pos - 1;
3470                         p = s_next(m, p, &l);
3471                 }
3472         }
3473
3474         trace_event_read_lock();
3475         trace_access_lock(cpu_file);
3476         return p;
3477 }
3478
3479 static void s_stop(struct seq_file *m, void *p)
3480 {
3481         struct trace_iterator *iter = m->private;
3482
3483 #ifdef CONFIG_TRACER_MAX_TRACE
3484         if (iter->snapshot && iter->trace->use_max_tr)
3485                 return;
3486 #endif
3487
3488         if (!iter->snapshot)
3489                 atomic_dec(&trace_record_taskinfo_disabled);
3490
3491         trace_access_unlock(iter->cpu_file);
3492         trace_event_read_unlock();
3493 }
3494
3495 static void
3496 get_total_entries_cpu(struct trace_buffer *buf, unsigned long *total,
3497                       unsigned long *entries, int cpu)
3498 {
3499         unsigned long count;
3500
3501         count = ring_buffer_entries_cpu(buf->buffer, cpu);
3502         /*
3503          * If this buffer has skipped entries, then we hold all
3504          * entries for the trace and we need to ignore the
3505          * ones before the time stamp.
3506          */
3507         if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3508                 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3509                 /* total is the same as the entries */
3510                 *total = count;
3511         } else
3512                 *total = count +
3513                         ring_buffer_overrun_cpu(buf->buffer, cpu);
3514         *entries = count;
3515 }
3516
3517 static void
3518 get_total_entries(struct trace_buffer *buf,
3519                   unsigned long *total, unsigned long *entries)
3520 {
3521         unsigned long t, e;
3522         int cpu;
3523
3524         *total = 0;
3525         *entries = 0;
3526
3527         for_each_tracing_cpu(cpu) {
3528                 get_total_entries_cpu(buf, &t, &e, cpu);
3529                 *total += t;
3530                 *entries += e;
3531         }
3532 }
3533
3534 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
3535 {
3536         unsigned long total, entries;
3537
3538         if (!tr)
3539                 tr = &global_trace;
3540
3541         get_total_entries_cpu(&tr->trace_buffer, &total, &entries, cpu);
3542
3543         return entries;
3544 }
3545
3546 unsigned long trace_total_entries(struct trace_array *tr)
3547 {
3548         unsigned long total, entries;
3549
3550         if (!tr)
3551                 tr = &global_trace;
3552
3553         get_total_entries(&tr->trace_buffer, &total, &entries);
3554
3555         return entries;
3556 }
3557
3558 static void print_lat_help_header(struct seq_file *m)
3559 {
3560         seq_puts(m, "#                  _------=> CPU#            \n"
3561                     "#                 / _-----=> irqs-off        \n"
3562                     "#                | / _----=> need-resched    \n"
3563                     "#                || / _---=> hardirq/softirq \n"
3564                     "#                ||| / _--=> preempt-depth   \n"
3565                     "#                |||| /     delay            \n"
3566                     "#  cmd     pid   ||||| time  |   caller      \n"
3567                     "#     \\   /      |||||  \\    |   /         \n");
3568 }
3569
3570 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
3571 {
3572         unsigned long total;
3573         unsigned long entries;
3574
3575         get_total_entries(buf, &total, &entries);
3576         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3577                    entries, total, num_online_cpus());
3578         seq_puts(m, "#\n");
3579 }
3580
3581 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m,
3582                                    unsigned int flags)
3583 {
3584         bool tgid = flags & TRACE_ITER_RECORD_TGID;
3585
3586         print_event_info(buf, m);
3587
3588         seq_printf(m, "#           TASK-PID   %s  CPU#   TIMESTAMP  FUNCTION\n", tgid ? "TGID     " : "");
3589         seq_printf(m, "#              | |     %s    |       |         |\n",      tgid ? "  |      " : "");
3590 }
3591
3592 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m,
3593                                        unsigned int flags)
3594 {
3595         bool tgid = flags & TRACE_ITER_RECORD_TGID;
3596         const char tgid_space[] = "          ";
3597         const char space[] = "  ";
3598
3599         print_event_info(buf, m);
3600
3601         seq_printf(m, "#                          %s  _-----=> irqs-off\n",
3602                    tgid ? tgid_space : space);
3603         seq_printf(m, "#                          %s / _----=> need-resched\n",
3604                    tgid ? tgid_space : space);
3605         seq_printf(m, "#                          %s| / _---=> hardirq/softirq\n",
3606                    tgid ? tgid_space : space);
3607         seq_printf(m, "#                          %s|| / _--=> preempt-depth\n",
3608                    tgid ? tgid_space : space);
3609         seq_printf(m, "#                          %s||| /     delay\n",
3610                    tgid ? tgid_space : space);
3611         seq_printf(m, "#           TASK-PID %sCPU#  ||||    TIMESTAMP  FUNCTION\n",
3612                    tgid ? "   TGID   " : space);
3613         seq_printf(m, "#              | |   %s  |   ||||       |         |\n",
3614                    tgid ? "     |    " : space);
3615 }
3616
3617 void
3618 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3619 {
3620         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3621         struct trace_buffer *buf = iter->trace_buffer;
3622         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3623         struct tracer *type = iter->trace;
3624         unsigned long entries;
3625         unsigned long total;
3626         const char *name = "preemption";
3627
3628         name = type->name;
3629
3630         get_total_entries(buf, &total, &entries);
3631
3632         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3633                    name, UTS_RELEASE);
3634         seq_puts(m, "# -----------------------------------"
3635                  "---------------------------------\n");
3636         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3637                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3638                    nsecs_to_usecs(data->saved_latency),
3639                    entries,
3640                    total,
3641                    buf->cpu,
3642 #if defined(CONFIG_PREEMPT_NONE)
3643                    "server",
3644 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3645                    "desktop",
3646 #elif defined(CONFIG_PREEMPT)
3647                    "preempt",
3648 #else
3649                    "unknown",
3650 #endif
3651                    /* These are reserved for later use */
3652                    0, 0, 0, 0);
3653 #ifdef CONFIG_SMP
3654         seq_printf(m, " #P:%d)\n", num_online_cpus());
3655 #else
3656         seq_puts(m, ")\n");
3657 #endif
3658         seq_puts(m, "#    -----------------\n");
3659         seq_printf(m, "#    | task: %.16s-%d "
3660                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3661                    data->comm, data->pid,
3662                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3663                    data->policy, data->rt_priority);
3664         seq_puts(m, "#    -----------------\n");
3665
3666         if (data->critical_start) {
3667                 seq_puts(m, "#  => started at: ");
3668                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3669                 trace_print_seq(m, &iter->seq);
3670                 seq_puts(m, "\n#  => ended at:   ");
3671                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3672                 trace_print_seq(m, &iter->seq);
3673                 seq_puts(m, "\n#\n");
3674         }
3675
3676         seq_puts(m, "#\n");
3677 }
3678
3679 static void test_cpu_buff_start(struct trace_iterator *iter)
3680 {
3681         struct trace_seq *s = &iter->seq;
3682         struct trace_array *tr = iter->tr;
3683
3684         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3685                 return;
3686
3687         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3688                 return;
3689
3690         if (cpumask_available(iter->started) &&
3691             cpumask_test_cpu(iter->cpu, iter->started))
3692                 return;
3693
3694         if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3695                 return;
3696
3697         if (cpumask_available(iter->started))
3698                 cpumask_set_cpu(iter->cpu, iter->started);
3699
3700         /* Don't print started cpu buffer for the first entry of the trace */
3701         if (iter->idx > 1)
3702                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3703                                 iter->cpu);
3704 }
3705
3706 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3707 {
3708         struct trace_array *tr = iter->tr;
3709         struct trace_seq *s = &iter->seq;
3710         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3711         struct trace_entry *entry;
3712         struct trace_event *event;
3713
3714         entry = iter->ent;
3715
3716         test_cpu_buff_start(iter);
3717
3718         event = ftrace_find_event(entry->type);
3719
3720         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3721                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3722                         trace_print_lat_context(iter);
3723                 else
3724                         trace_print_context(iter);
3725         }
3726
3727         if (trace_seq_has_overflowed(s))
3728                 return TRACE_TYPE_PARTIAL_LINE;
3729
3730         if (event)
3731                 return event->funcs->trace(iter, sym_flags, event);
3732
3733         trace_seq_printf(s, "Unknown type %d\n", entry->type);
3734
3735         return trace_handle_return(s);
3736 }
3737
3738 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3739 {
3740         struct trace_array *tr = iter->tr;
3741         struct trace_seq *s = &iter->seq;
3742         struct trace_entry *entry;
3743         struct trace_event *event;
3744
3745         entry = iter->ent;
3746
3747         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3748                 trace_seq_printf(s, "%d %d %llu ",
3749                                  entry->pid, iter->cpu, iter->ts);
3750
3751         if (trace_seq_has_overflowed(s))
3752                 return TRACE_TYPE_PARTIAL_LINE;
3753
3754         event = ftrace_find_event(entry->type);
3755         if (event)
3756                 return event->funcs->raw(iter, 0, event);
3757
3758         trace_seq_printf(s, "%d ?\n", entry->type);
3759
3760         return trace_handle_return(s);
3761 }
3762
3763 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3764 {
3765         struct trace_array *tr = iter->tr;
3766         struct trace_seq *s = &iter->seq;
3767         unsigned char newline = '\n';
3768         struct trace_entry *entry;
3769         struct trace_event *event;
3770
3771         entry = iter->ent;
3772
3773         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3774                 SEQ_PUT_HEX_FIELD(s, entry->pid);
3775                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
3776                 SEQ_PUT_HEX_FIELD(s, iter->ts);
3777                 if (trace_seq_has_overflowed(s))
3778                         return TRACE_TYPE_PARTIAL_LINE;
3779         }
3780
3781         event = ftrace_find_event(entry->type);
3782         if (event) {
3783                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
3784                 if (ret != TRACE_TYPE_HANDLED)
3785                         return ret;
3786         }
3787
3788         SEQ_PUT_FIELD(s, newline);
3789
3790         return trace_handle_return(s);
3791 }
3792
3793 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3794 {
3795         struct trace_array *tr = iter->tr;
3796         struct trace_seq *s = &iter->seq;
3797         struct trace_entry *entry;
3798         struct trace_event *event;
3799
3800         entry = iter->ent;
3801
3802         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3803                 SEQ_PUT_FIELD(s, entry->pid);
3804                 SEQ_PUT_FIELD(s, iter->cpu);
3805                 SEQ_PUT_FIELD(s, iter->ts);
3806                 if (trace_seq_has_overflowed(s))
3807                         return TRACE_TYPE_PARTIAL_LINE;
3808         }
3809
3810         event = ftrace_find_event(entry->type);
3811         return event ? event->funcs->binary(iter, 0, event) :
3812                 TRACE_TYPE_HANDLED;
3813 }
3814
3815 int trace_empty(struct trace_iterator *iter)
3816 {
3817         struct ring_buffer_iter *buf_iter;
3818         int cpu;
3819
3820         /* If we are looking at one CPU buffer, only check that one */
3821         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3822                 cpu = iter->cpu_file;
3823                 buf_iter = trace_buffer_iter(iter, cpu);
3824                 if (buf_iter) {
3825                         if (!ring_buffer_iter_empty(buf_iter))
3826                                 return 0;
3827                 } else {
3828                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3829                                 return 0;
3830                 }
3831                 return 1;
3832         }
3833
3834         for_each_tracing_cpu(cpu) {
3835                 buf_iter = trace_buffer_iter(iter, cpu);
3836                 if (buf_iter) {
3837                         if (!ring_buffer_iter_empty(buf_iter))
3838                                 return 0;
3839                 } else {
3840                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3841                                 return 0;
3842                 }
3843         }
3844
3845         return 1;
3846 }
3847
3848 /*  Called with trace_event_read_lock() held. */
3849 enum print_line_t print_trace_line(struct trace_iterator *iter)
3850 {
3851         struct trace_array *tr = iter->tr;
3852         unsigned long trace_flags = tr->trace_flags;
3853         enum print_line_t ret;
3854
3855         if (iter->lost_events) {
3856                 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3857                                  iter->cpu, iter->lost_events);
3858                 if (trace_seq_has_overflowed(&iter->seq))
3859                         return TRACE_TYPE_PARTIAL_LINE;
3860         }
3861
3862         if (iter->trace && iter->trace->print_line) {
3863                 ret = iter->trace->print_line(iter);
3864                 if (ret != TRACE_TYPE_UNHANDLED)
3865                         return ret;
3866         }
3867
3868         if (iter->ent->type == TRACE_BPUTS &&
3869                         trace_flags & TRACE_ITER_PRINTK &&
3870                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3871                 return trace_print_bputs_msg_only(iter);
3872
3873         if (iter->ent->type == TRACE_BPRINT &&
3874                         trace_flags & TRACE_ITER_PRINTK &&
3875                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3876                 return trace_print_bprintk_msg_only(iter);
3877
3878         if (iter->ent->type == TRACE_PRINT &&
3879                         trace_flags & TRACE_ITER_PRINTK &&
3880                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3881                 return trace_print_printk_msg_only(iter);
3882
3883         if (trace_flags & TRACE_ITER_BIN)
3884                 return print_bin_fmt(iter);
3885
3886         if (trace_flags & TRACE_ITER_HEX)
3887                 return print_hex_fmt(iter);
3888
3889         if (trace_flags & TRACE_ITER_RAW)
3890                 return print_raw_fmt(iter);
3891
3892         return print_trace_fmt(iter);
3893 }
3894
3895 void trace_latency_header(struct seq_file *m)
3896 {
3897         struct trace_iterator *iter = m->private;
3898         struct trace_array *tr = iter->tr;
3899
3900         /* print nothing if the buffers are empty */
3901         if (trace_empty(iter))
3902                 return;
3903
3904         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3905                 print_trace_header(m, iter);
3906
3907         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
3908                 print_lat_help_header(m);
3909 }
3910
3911 void trace_default_header(struct seq_file *m)
3912 {
3913         struct trace_iterator *iter = m->private;
3914         struct trace_array *tr = iter->tr;
3915         unsigned long trace_flags = tr->trace_flags;
3916
3917         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3918                 return;
3919
3920         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3921                 /* print nothing if the buffers are empty */
3922                 if (trace_empty(iter))
3923                         return;
3924                 print_trace_header(m, iter);
3925                 if (!(trace_flags & TRACE_ITER_VERBOSE))
3926                         print_lat_help_header(m);
3927         } else {
3928                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
3929                         if (trace_flags & TRACE_ITER_IRQ_INFO)
3930                                 print_func_help_header_irq(iter->trace_buffer,
3931                                                            m, trace_flags);
3932                         else
3933                                 print_func_help_header(iter->trace_buffer, m,
3934                                                        trace_flags);
3935                 }
3936         }
3937 }
3938
3939 static void test_ftrace_alive(struct seq_file *m)
3940 {
3941         if (!ftrace_is_dead())
3942                 return;
3943         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3944                     "#          MAY BE MISSING FUNCTION EVENTS\n");
3945 }
3946
3947 #ifdef CONFIG_TRACER_MAX_TRACE
3948 static void show_snapshot_main_help(struct seq_file *m)
3949 {
3950         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3951                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3952                     "#                      Takes a snapshot of the main buffer.\n"
3953                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3954                     "#                      (Doesn't have to be '2' works with any number that\n"
3955                     "#                       is not a '0' or '1')\n");
3956 }
3957
3958 static void show_snapshot_percpu_help(struct seq_file *m)
3959 {
3960         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3961 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3962         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3963                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
3964 #else
3965         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3966                     "#                     Must use main snapshot file to allocate.\n");
3967 #endif
3968         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3969                     "#                      (Doesn't have to be '2' works with any number that\n"
3970                     "#                       is not a '0' or '1')\n");
3971 }
3972
3973 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3974 {
3975         if (iter->tr->allocated_snapshot)
3976                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3977         else
3978                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3979
3980         seq_puts(m, "# Snapshot commands:\n");
3981         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3982                 show_snapshot_main_help(m);
3983         else
3984                 show_snapshot_percpu_help(m);
3985 }
3986 #else
3987 /* Should never be called */
3988 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3989 #endif
3990
3991 static int s_show(struct seq_file *m, void *v)
3992 {
3993         struct trace_iterator *iter = v;
3994         int ret;
3995
3996         if (iter->ent == NULL) {
3997                 if (iter->tr) {
3998                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
3999                         seq_puts(m, "#\n");
4000                         test_ftrace_alive(m);
4001                 }
4002                 if (iter->snapshot && trace_empty(iter))
4003                         print_snapshot_help(m, iter);
4004                 else if (iter->trace && iter->trace->print_header)
4005                         iter->trace->print_header(m);
4006                 else
4007                         trace_default_header(m);
4008
4009         } else if (iter->leftover) {
4010                 /*
4011                  * If we filled the seq_file buffer earlier, we
4012                  * want to just show it now.
4013                  */
4014                 ret = trace_print_seq(m, &iter->seq);
4015
4016                 /* ret should this time be zero, but you never know */
4017                 iter->leftover = ret;
4018
4019         } else {
4020                 print_trace_line(iter);
4021                 ret = trace_print_seq(m, &iter->seq);
4022                 /*
4023                  * If we overflow the seq_file buffer, then it will
4024                  * ask us for this data again at start up.
4025                  * Use that instead.
4026                  *  ret is 0 if seq_file write succeeded.
4027                  *        -1 otherwise.
4028                  */
4029                 iter->leftover = ret;
4030         }
4031
4032         return 0;
4033 }
4034
4035 /*
4036  * Should be used after trace_array_get(), trace_types_lock
4037  * ensures that i_cdev was already initialized.
4038  */
4039 static inline int tracing_get_cpu(struct inode *inode)
4040 {
4041         if (inode->i_cdev) /* See trace_create_cpu_file() */
4042                 return (long)inode->i_cdev - 1;
4043         return RING_BUFFER_ALL_CPUS;
4044 }
4045
4046 static const struct seq_operations tracer_seq_ops = {
4047         .start          = s_start,
4048         .next           = s_next,
4049         .stop           = s_stop,
4050         .show           = s_show,
4051 };
4052
4053 static struct trace_iterator *
4054 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4055 {
4056         struct trace_array *tr = inode->i_private;
4057         struct trace_iterator *iter;
4058         int cpu;
4059
4060         if (tracing_disabled)
4061                 return ERR_PTR(-ENODEV);
4062
4063         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4064         if (!iter)
4065                 return ERR_PTR(-ENOMEM);
4066
4067         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4068                                     GFP_KERNEL);
4069         if (!iter->buffer_iter)
4070                 goto release;
4071
4072         /*
4073          * We make a copy of the current tracer to avoid concurrent
4074          * changes on it while we are reading.
4075          */
4076         mutex_lock(&trace_types_lock);
4077         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4078         if (!iter->trace)
4079                 goto fail;
4080
4081         *iter->trace = *tr->current_trace;
4082
4083         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4084                 goto fail;
4085
4086         iter->tr = tr;
4087
4088 #ifdef CONFIG_TRACER_MAX_TRACE
4089         /* Currently only the top directory has a snapshot */
4090         if (tr->current_trace->print_max || snapshot)
4091                 iter->trace_buffer = &tr->max_buffer;
4092         else
4093 #endif
4094                 iter->trace_buffer = &tr->trace_buffer;
4095         iter->snapshot = snapshot;
4096         iter->pos = -1;
4097         iter->cpu_file = tracing_get_cpu(inode);
4098         mutex_init(&iter->mutex);
4099
4100         /* Notify the tracer early; before we stop tracing. */
4101         if (iter->trace && iter->trace->open)
4102                 iter->trace->open(iter);
4103
4104         /* Annotate start of buffers if we had overruns */
4105         if (ring_buffer_overruns(iter->trace_buffer->buffer))
4106                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4107
4108         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4109         if (trace_clocks[tr->clock_id].in_ns)
4110                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4111
4112         /* stop the trace while dumping if we are not opening "snapshot" */
4113         if (!iter->snapshot)
4114                 tracing_stop_tr(tr);
4115
4116         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4117                 for_each_tracing_cpu(cpu) {
4118                         iter->buffer_iter[cpu] =
4119                                 ring_buffer_read_prepare(iter->trace_buffer->buffer,
4120                                                          cpu, GFP_KERNEL);
4121                 }
4122                 ring_buffer_read_prepare_sync();
4123                 for_each_tracing_cpu(cpu) {
4124                         ring_buffer_read_start(iter->buffer_iter[cpu]);
4125                         tracing_iter_reset(iter, cpu);
4126                 }
4127         } else {
4128                 cpu = iter->cpu_file;
4129                 iter->buffer_iter[cpu] =
4130                         ring_buffer_read_prepare(iter->trace_buffer->buffer,
4131                                                  cpu, GFP_KERNEL);
4132                 ring_buffer_read_prepare_sync();
4133                 ring_buffer_read_start(iter->buffer_iter[cpu]);
4134                 tracing_iter_reset(iter, cpu);
4135         }
4136
4137         mutex_unlock(&trace_types_lock);
4138
4139         return iter;
4140
4141  fail:
4142         mutex_unlock(&trace_types_lock);
4143         kfree(iter->trace);
4144         kfree(iter->buffer_iter);
4145 release:
4146         seq_release_private(inode, file);
4147         return ERR_PTR(-ENOMEM);
4148 }
4149
4150 int tracing_open_generic(struct inode *inode, struct file *filp)
4151 {
4152         if (tracing_disabled)
4153                 return -ENODEV;
4154
4155         filp->private_data = inode->i_private;
4156         return 0;
4157 }
4158
4159 bool tracing_is_disabled(void)
4160 {
4161         return (tracing_disabled) ? true: false;
4162 }
4163
4164 /*
4165  * Open and update trace_array ref count.
4166  * Must have the current trace_array passed to it.
4167  */
4168 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4169 {
4170         struct trace_array *tr = inode->i_private;
4171
4172         if (tracing_disabled)
4173                 return -ENODEV;
4174
4175         if (trace_array_get(tr) < 0)
4176                 return -ENODEV;
4177
4178         filp->private_data = inode->i_private;
4179
4180         return 0;
4181 }
4182
4183 static int tracing_release(struct inode *inode, struct file *file)
4184 {
4185         struct trace_array *tr = inode->i_private;
4186         struct seq_file *m = file->private_data;
4187         struct trace_iterator *iter;
4188         int cpu;
4189
4190         if (!(file->f_mode & FMODE_READ)) {
4191                 trace_array_put(tr);
4192                 return 0;
4193         }
4194
4195         /* Writes do not use seq_file */
4196         iter = m->private;
4197         mutex_lock(&trace_types_lock);
4198
4199         for_each_tracing_cpu(cpu) {
4200                 if (iter->buffer_iter[cpu])
4201                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
4202         }
4203
4204         if (iter->trace && iter->trace->close)
4205                 iter->trace->close(iter);
4206
4207         if (!iter->snapshot)
4208                 /* reenable tracing if it was previously enabled */
4209                 tracing_start_tr(tr);
4210
4211         __trace_array_put(tr);
4212
4213         mutex_unlock(&trace_types_lock);
4214
4215         mutex_destroy(&iter->mutex);
4216         free_cpumask_var(iter->started);
4217         kfree(iter->trace);
4218         kfree(iter->buffer_iter);
4219         seq_release_private(inode, file);
4220
4221         return 0;
4222 }
4223
4224 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4225 {
4226         struct trace_array *tr = inode->i_private;
4227
4228         trace_array_put(tr);
4229         return 0;
4230 }
4231
4232 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4233 {
4234         struct trace_array *tr = inode->i_private;
4235
4236         trace_array_put(tr);
4237
4238         return single_release(inode, file);
4239 }
4240
4241 static int tracing_open(struct inode *inode, struct file *file)
4242 {
4243         struct trace_array *tr = inode->i_private;
4244         struct trace_iterator *iter;
4245         int ret = 0;
4246
4247         if (trace_array_get(tr) < 0)
4248                 return -ENODEV;
4249
4250         /* If this file was open for write, then erase contents */
4251         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4252                 int cpu = tracing_get_cpu(inode);
4253                 struct trace_buffer *trace_buf = &tr->trace_buffer;
4254
4255 #ifdef CONFIG_TRACER_MAX_TRACE
4256                 if (tr->current_trace->print_max)
4257                         trace_buf = &tr->max_buffer;
4258 #endif
4259
4260                 if (cpu == RING_BUFFER_ALL_CPUS)
4261                         tracing_reset_online_cpus(trace_buf);
4262                 else
4263                         tracing_reset(trace_buf, cpu);
4264         }
4265
4266         if (file->f_mode & FMODE_READ) {
4267                 iter = __tracing_open(inode, file, false);
4268                 if (IS_ERR(iter))
4269                         ret = PTR_ERR(iter);
4270                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4271                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
4272         }
4273
4274         if (ret < 0)
4275                 trace_array_put(tr);
4276
4277         return ret;
4278 }
4279
4280 /*
4281  * Some tracers are not suitable for instance buffers.
4282  * A tracer is always available for the global array (toplevel)
4283  * or if it explicitly states that it is.
4284  */
4285 static bool
4286 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4287 {
4288         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4289 }
4290
4291 /* Find the next tracer that this trace array may use */
4292 static struct tracer *
4293 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4294 {
4295         while (t && !trace_ok_for_array(t, tr))
4296                 t = t->next;
4297
4298         return t;
4299 }
4300
4301 static void *
4302 t_next(struct seq_file *m, void *v, loff_t *pos)
4303 {
4304         struct trace_array *tr = m->private;
4305         struct tracer *t = v;
4306
4307         (*pos)++;
4308
4309         if (t)
4310                 t = get_tracer_for_array(tr, t->next);
4311
4312         return t;
4313 }
4314
4315 static void *t_start(struct seq_file *m, loff_t *pos)
4316 {
4317         struct trace_array *tr = m->private;
4318         struct tracer *t;
4319         loff_t l = 0;
4320
4321         mutex_lock(&trace_types_lock);
4322
4323         t = get_tracer_for_array(tr, trace_types);
4324         for (; t && l < *pos; t = t_next(m, t, &l))
4325                         ;
4326
4327         return t;
4328 }
4329
4330 static void t_stop(struct seq_file *m, void *p)
4331 {
4332         mutex_unlock(&trace_types_lock);
4333 }
4334
4335 static int t_show(struct seq_file *m, void *v)
4336 {
4337         struct tracer *t = v;
4338
4339         if (!t)
4340                 return 0;
4341
4342         seq_puts(m, t->name);
4343         if (t->next)
4344                 seq_putc(m, ' ');
4345         else
4346                 seq_putc(m, '\n');
4347
4348         return 0;
4349 }
4350
4351 static const struct seq_operations show_traces_seq_ops = {
4352         .start          = t_start,
4353         .next           = t_next,
4354         .stop           = t_stop,
4355         .show           = t_show,
4356 };
4357
4358 static int show_traces_open(struct inode *inode, struct file *file)
4359 {
4360         struct trace_array *tr = inode->i_private;
4361         struct seq_file *m;
4362         int ret;
4363
4364         if (tracing_disabled)
4365                 return -ENODEV;
4366
4367         ret = seq_open(file, &show_traces_seq_ops);
4368         if (ret)
4369                 return ret;
4370
4371         m = file->private_data;
4372         m->private = tr;
4373
4374         return 0;
4375 }
4376
4377 static ssize_t
4378 tracing_write_stub(struct file *filp, const char __user *ubuf,
4379                    size_t count, loff_t *ppos)
4380 {
4381         return count;
4382 }
4383
4384 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4385 {
4386         int ret;
4387
4388         if (file->f_mode & FMODE_READ)
4389                 ret = seq_lseek(file, offset, whence);
4390         else
4391                 file->f_pos = ret = 0;
4392
4393         return ret;
4394 }
4395
4396 static const struct file_operations tracing_fops = {
4397         .open           = tracing_open,
4398         .read           = seq_read,
4399         .write          = tracing_write_stub,
4400         .llseek         = tracing_lseek,
4401         .release        = tracing_release,
4402 };
4403
4404 static const struct file_operations show_traces_fops = {
4405         .open           = show_traces_open,
4406         .read           = seq_read,
4407         .release        = seq_release,
4408         .llseek         = seq_lseek,
4409 };
4410
4411 static ssize_t
4412 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4413                      size_t count, loff_t *ppos)
4414 {
4415         struct trace_array *tr = file_inode(filp)->i_private;
4416         char *mask_str;
4417         int len;
4418
4419         len = snprintf(NULL, 0, "%*pb\n",
4420                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
4421         mask_str = kmalloc(len, GFP_KERNEL);
4422         if (!mask_str)
4423                 return -ENOMEM;
4424
4425         len = snprintf(mask_str, len, "%*pb\n",
4426                        cpumask_pr_args(tr->tracing_cpumask));
4427         if (len >= count) {
4428                 count = -EINVAL;
4429                 goto out_err;
4430         }
4431         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4432
4433 out_err:
4434         kfree(mask_str);
4435
4436         return count;
4437 }
4438
4439 static ssize_t
4440 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4441                       size_t count, loff_t *ppos)
4442 {
4443         struct trace_array *tr = file_inode(filp)->i_private;
4444         cpumask_var_t tracing_cpumask_new;
4445         int err, cpu;
4446
4447         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4448                 return -ENOMEM;
4449
4450         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4451         if (err)
4452                 goto err_unlock;
4453
4454         local_irq_disable();
4455         arch_spin_lock(&tr->max_lock);
4456         for_each_tracing_cpu(cpu) {
4457                 /*
4458                  * Increase/decrease the disabled counter if we are
4459                  * about to flip a bit in the cpumask:
4460                  */
4461                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4462                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4463                         atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4464                         ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
4465                 }
4466                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4467                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4468                         atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4469                         ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
4470                 }
4471         }
4472         arch_spin_unlock(&tr->max_lock);
4473         local_irq_enable();
4474
4475         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4476         free_cpumask_var(tracing_cpumask_new);
4477
4478         return count;
4479
4480 err_unlock:
4481         free_cpumask_var(tracing_cpumask_new);
4482
4483         return err;
4484 }
4485
4486 static const struct file_operations tracing_cpumask_fops = {
4487         .open           = tracing_open_generic_tr,
4488         .read           = tracing_cpumask_read,
4489         .write          = tracing_cpumask_write,
4490         .release        = tracing_release_generic_tr,
4491         .llseek         = generic_file_llseek,
4492 };
4493
4494 static int tracing_trace_options_show(struct seq_file *m, void *v)
4495 {
4496         struct tracer_opt *trace_opts;
4497         struct trace_array *tr = m->private;
4498         u32 tracer_flags;
4499         int i;
4500
4501         mutex_lock(&trace_types_lock);
4502         tracer_flags = tr->current_trace->flags->val;
4503         trace_opts = tr->current_trace->flags->opts;
4504
4505         for (i = 0; trace_options[i]; i++) {
4506                 if (tr->trace_flags & (1 << i))
4507                         seq_printf(m, "%s\n", trace_options[i]);
4508                 else
4509                         seq_printf(m, "no%s\n", trace_options[i]);
4510         }
4511
4512         for (i = 0; trace_opts[i].name; i++) {
4513                 if (tracer_flags & trace_opts[i].bit)
4514                         seq_printf(m, "%s\n", trace_opts[i].name);
4515                 else
4516                         seq_printf(m, "no%s\n", trace_opts[i].name);
4517         }
4518         mutex_unlock(&trace_types_lock);
4519
4520         return 0;
4521 }
4522
4523 static int __set_tracer_option(struct trace_array *tr,
4524                                struct tracer_flags *tracer_flags,
4525                                struct tracer_opt *opts, int neg)
4526 {
4527         struct tracer *trace = tracer_flags->trace;
4528         int ret;
4529
4530         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4531         if (ret)
4532                 return ret;
4533
4534         if (neg)
4535                 tracer_flags->val &= ~opts->bit;
4536         else
4537                 tracer_flags->val |= opts->bit;
4538         return 0;
4539 }
4540
4541 /* Try to assign a tracer specific option */
4542 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4543 {
4544         struct tracer *trace = tr->current_trace;
4545         struct tracer_flags *tracer_flags = trace->flags;
4546         struct tracer_opt *opts = NULL;
4547         int i;
4548
4549         for (i = 0; tracer_flags->opts[i].name; i++) {
4550                 opts = &tracer_flags->opts[i];
4551
4552                 if (strcmp(cmp, opts->name) == 0)
4553                         return __set_tracer_option(tr, trace->flags, opts, neg);
4554         }
4555
4556         return -EINVAL;
4557 }
4558
4559 /* Some tracers require overwrite to stay enabled */
4560 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4561 {
4562         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4563                 return -1;
4564
4565         return 0;
4566 }
4567
4568 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4569 {
4570         /* do nothing if flag is already set */
4571         if (!!(tr->trace_flags & mask) == !!enabled)
4572                 return 0;
4573
4574         /* Give the tracer a chance to approve the change */
4575         if (tr->current_trace->flag_changed)
4576                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4577                         return -EINVAL;
4578
4579         if (enabled)
4580                 tr->trace_flags |= mask;
4581         else
4582                 tr->trace_flags &= ~mask;
4583
4584         if (mask == TRACE_ITER_RECORD_CMD)
4585                 trace_event_enable_cmd_record(enabled);
4586
4587         if (mask == TRACE_ITER_RECORD_TGID) {
4588                 if (!tgid_map)
4589                         tgid_map = kcalloc(PID_MAX_DEFAULT + 1,
4590                                            sizeof(*tgid_map),
4591                                            GFP_KERNEL);
4592                 if (!tgid_map) {
4593                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4594                         return -ENOMEM;
4595                 }
4596
4597                 trace_event_enable_tgid_record(enabled);
4598         }
4599
4600         if (mask == TRACE_ITER_EVENT_FORK)
4601                 trace_event_follow_fork(tr, enabled);
4602
4603         if (mask == TRACE_ITER_FUNC_FORK)
4604                 ftrace_pid_follow_fork(tr, enabled);
4605
4606         if (mask == TRACE_ITER_OVERWRITE) {
4607                 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
4608 #ifdef CONFIG_TRACER_MAX_TRACE
4609                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4610 #endif
4611         }
4612
4613         if (mask == TRACE_ITER_PRINTK) {
4614                 trace_printk_start_stop_comm(enabled);
4615                 trace_printk_control(enabled);
4616         }
4617
4618         return 0;
4619 }
4620
4621 static int trace_set_options(struct trace_array *tr, char *option)
4622 {
4623         char *cmp;
4624         int neg = 0;
4625         int ret;
4626         size_t orig_len = strlen(option);
4627         int len;
4628
4629         cmp = strstrip(option);
4630
4631         len = str_has_prefix(cmp, "no");
4632         if (len)
4633                 neg = 1;
4634
4635         cmp += len;
4636
4637         mutex_lock(&trace_types_lock);
4638
4639         ret = match_string(trace_options, -1, cmp);
4640         /* If no option could be set, test the specific tracer options */
4641         if (ret < 0)
4642                 ret = set_tracer_option(tr, cmp, neg);
4643         else
4644                 ret = set_tracer_flag(tr, 1 << ret, !neg);
4645
4646         mutex_unlock(&trace_types_lock);
4647
4648         /*
4649          * If the first trailing whitespace is replaced with '\0' by strstrip,
4650          * turn it back into a space.
4651          */
4652         if (orig_len > strlen(option))
4653                 option[strlen(option)] = ' ';
4654
4655         return ret;
4656 }
4657
4658 static void __init apply_trace_boot_options(void)
4659 {
4660         char *buf = trace_boot_options_buf;
4661         char *option;
4662
4663         while (true) {
4664                 option = strsep(&buf, ",");
4665
4666                 if (!option)
4667                         break;
4668
4669                 if (*option)
4670                         trace_set_options(&global_trace, option);
4671
4672                 /* Put back the comma to allow this to be called again */
4673                 if (buf)
4674                         *(buf - 1) = ',';
4675         }
4676 }
4677
4678 static ssize_t
4679 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4680                         size_t cnt, loff_t *ppos)
4681 {
4682         struct seq_file *m = filp->private_data;
4683         struct trace_array *tr = m->private;
4684         char buf[64];
4685         int ret;
4686
4687         if (cnt >= sizeof(buf))
4688                 return -EINVAL;
4689
4690         if (copy_from_user(buf, ubuf, cnt))
4691                 return -EFAULT;
4692
4693         buf[cnt] = 0;
4694
4695         ret = trace_set_options(tr, buf);
4696         if (ret < 0)
4697                 return ret;
4698
4699         *ppos += cnt;
4700
4701         return cnt;
4702 }
4703
4704 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4705 {
4706         struct trace_array *tr = inode->i_private;
4707         int ret;
4708
4709         if (tracing_disabled)
4710                 return -ENODEV;
4711
4712         if (trace_array_get(tr) < 0)
4713                 return -ENODEV;
4714
4715         ret = single_open(file, tracing_trace_options_show, inode->i_private);
4716         if (ret < 0)
4717                 trace_array_put(tr);
4718
4719         return ret;
4720 }
4721
4722 static const struct file_operations tracing_iter_fops = {
4723         .open           = tracing_trace_options_open,
4724         .read           = seq_read,
4725         .llseek         = seq_lseek,
4726         .release        = tracing_single_release_tr,
4727         .write          = tracing_trace_options_write,
4728 };
4729
4730 static const char readme_msg[] =
4731         "tracing mini-HOWTO:\n\n"
4732         "# echo 0 > tracing_on : quick way to disable tracing\n"
4733         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4734         " Important files:\n"
4735         "  trace\t\t\t- The static contents of the buffer\n"
4736         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
4737         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4738         "  current_tracer\t- function and latency tracers\n"
4739         "  available_tracers\t- list of configured tracers for current_tracer\n"
4740         "  error_log\t- error log for failed commands (that support it)\n"
4741         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4742         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4743         "  trace_clock\t\t-change the clock used to order events\n"
4744         "       local:   Per cpu clock but may not be synced across CPUs\n"
4745         "      global:   Synced across CPUs but slows tracing down.\n"
4746         "     counter:   Not a clock, but just an increment\n"
4747         "      uptime:   Jiffy counter from time of boot\n"
4748         "        perf:   Same clock that perf events use\n"
4749 #ifdef CONFIG_X86_64
4750         "     x86-tsc:   TSC cycle counter\n"
4751 #endif
4752         "\n  timestamp_mode\t-view the mode used to timestamp events\n"
4753         "       delta:   Delta difference against a buffer-wide timestamp\n"
4754         "    absolute:   Absolute (standalone) timestamp\n"
4755         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4756         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4757         "  tracing_cpumask\t- Limit which CPUs to trace\n"
4758         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4759         "\t\t\t  Remove sub-buffer with rmdir\n"
4760         "  trace_options\t\t- Set format or modify how tracing happens\n"
4761         "\t\t\t  Disable an option by adding a suffix 'no' to the\n"
4762         "\t\t\t  option name\n"
4763         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4764 #ifdef CONFIG_DYNAMIC_FTRACE
4765         "\n  available_filter_functions - list of functions that can be filtered on\n"
4766         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
4767         "\t\t\t  functions\n"
4768         "\t     accepts: func_full_name or glob-matching-pattern\n"
4769         "\t     modules: Can select a group via module\n"
4770         "\t      Format: :mod:<module-name>\n"
4771         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
4772         "\t    triggers: a command to perform when function is hit\n"
4773         "\t      Format: <function>:<trigger>[:count]\n"
4774         "\t     trigger: traceon, traceoff\n"
4775         "\t\t      enable_event:<system>:<event>\n"
4776         "\t\t      disable_event:<system>:<event>\n"
4777 #ifdef CONFIG_STACKTRACE
4778         "\t\t      stacktrace\n"
4779 #endif
4780 #ifdef CONFIG_TRACER_SNAPSHOT
4781         "\t\t      snapshot\n"
4782 #endif
4783         "\t\t      dump\n"
4784         "\t\t      cpudump\n"
4785         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
4786         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
4787         "\t     The first one will disable tracing every time do_fault is hit\n"
4788         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
4789         "\t       The first time do trap is hit and it disables tracing, the\n"
4790         "\t       counter will decrement to 2. If tracing is already disabled,\n"
4791         "\t       the counter will not decrement. It only decrements when the\n"
4792         "\t       trigger did work\n"
4793         "\t     To remove trigger without count:\n"
4794         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
4795         "\t     To remove trigger with a count:\n"
4796         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4797         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
4798         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4799         "\t    modules: Can select a group via module command :mod:\n"
4800         "\t    Does not accept triggers\n"
4801 #endif /* CONFIG_DYNAMIC_FTRACE */
4802 #ifdef CONFIG_FUNCTION_TRACER
4803         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4804         "\t\t    (function)\n"
4805 #endif
4806 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4807         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4808         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4809         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4810 #endif
4811 #ifdef CONFIG_TRACER_SNAPSHOT
4812         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
4813         "\t\t\t  snapshot buffer. Read the contents for more\n"
4814         "\t\t\t  information\n"
4815 #endif
4816 #ifdef CONFIG_STACK_TRACER
4817         "  stack_trace\t\t- Shows the max stack trace when active\n"
4818         "  stack_max_size\t- Shows current max stack size that was traced\n"
4819         "\t\t\t  Write into this file to reset the max size (trigger a\n"
4820         "\t\t\t  new trace)\n"
4821 #ifdef CONFIG_DYNAMIC_FTRACE
4822         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4823         "\t\t\t  traces\n"
4824 #endif
4825 #endif /* CONFIG_STACK_TRACER */
4826 #ifdef CONFIG_DYNAMIC_EVENTS
4827         "  dynamic_events\t\t- Add/remove/show the generic dynamic events\n"
4828         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4829 #endif
4830 #ifdef CONFIG_KPROBE_EVENTS
4831         "  kprobe_events\t\t- Add/remove/show the kernel dynamic events\n"
4832         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4833 #endif
4834 #ifdef CONFIG_UPROBE_EVENTS
4835         "  uprobe_events\t\t- Add/remove/show the userspace dynamic events\n"
4836         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4837 #endif
4838 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
4839         "\t  accepts: event-definitions (one definition per line)\n"
4840         "\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
4841         "\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
4842 #ifdef CONFIG_HIST_TRIGGERS
4843         "\t           s:[synthetic/]<event> <field> [<field>]\n"
4844 #endif
4845         "\t           -:[<group>/]<event>\n"
4846 #ifdef CONFIG_KPROBE_EVENTS
4847         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4848   "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4849 #endif
4850 #ifdef CONFIG_UPROBE_EVENTS
4851   "   place (uprobe): <path>:<offset>[(ref_ctr_offset)]\n"
4852 #endif
4853         "\t     args: <name>=fetcharg[:type]\n"
4854         "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
4855 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
4856         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>\n"
4857 #else
4858         "\t           $stack<index>, $stack, $retval, $comm\n"
4859 #endif
4860         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
4861         "\t           b<bit-width>@<bit-offset>/<container-size>,\n"
4862         "\t           <type>\\[<array-size>\\]\n"
4863 #ifdef CONFIG_HIST_TRIGGERS
4864         "\t    field: <stype> <name>;\n"
4865         "\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
4866         "\t           [unsigned] char/int/long\n"
4867 #endif
4868 #endif
4869         "  events/\t\t- Directory containing all trace event subsystems:\n"
4870         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4871         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
4872         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4873         "\t\t\t  events\n"
4874         "      filter\t\t- If set, only events passing filter are traced\n"
4875         "  events/<system>/<event>/\t- Directory containing control files for\n"
4876         "\t\t\t  <event>:\n"
4877         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4878         "      filter\t\t- If set, only events passing filter are traced\n"
4879         "      trigger\t\t- If set, a command to perform when event is hit\n"
4880         "\t    Format: <trigger>[:count][if <filter>]\n"
4881         "\t   trigger: traceon, traceoff\n"
4882         "\t            enable_event:<system>:<event>\n"
4883         "\t            disable_event:<system>:<event>\n"
4884 #ifdef CONFIG_HIST_TRIGGERS
4885         "\t            enable_hist:<system>:<event>\n"
4886         "\t            disable_hist:<system>:<event>\n"
4887 #endif
4888 #ifdef CONFIG_STACKTRACE
4889         "\t\t    stacktrace\n"
4890 #endif
4891 #ifdef CONFIG_TRACER_SNAPSHOT
4892         "\t\t    snapshot\n"
4893 #endif
4894 #ifdef CONFIG_HIST_TRIGGERS
4895         "\t\t    hist (see below)\n"
4896 #endif
4897         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
4898         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
4899         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4900         "\t                  events/block/block_unplug/trigger\n"
4901         "\t   The first disables tracing every time block_unplug is hit.\n"
4902         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
4903         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
4904         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4905         "\t   Like function triggers, the counter is only decremented if it\n"
4906         "\t    enabled or disabled tracing.\n"
4907         "\t   To remove a trigger without a count:\n"
4908         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
4909         "\t   To remove a trigger with a count:\n"
4910         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
4911         "\t   Filters can be ignored when removing a trigger.\n"
4912 #ifdef CONFIG_HIST_TRIGGERS
4913         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
4914         "\t    Format: hist:keys=<field1[,field2,...]>\n"
4915         "\t            [:values=<field1[,field2,...]>]\n"
4916         "\t            [:sort=<field1[,field2,...]>]\n"
4917         "\t            [:size=#entries]\n"
4918         "\t            [:pause][:continue][:clear]\n"
4919         "\t            [:name=histname1]\n"
4920         "\t            [:<handler>.<action>]\n"
4921         "\t            [if <filter>]\n\n"
4922         "\t    When a matching event is hit, an entry is added to a hash\n"
4923         "\t    table using the key(s) and value(s) named, and the value of a\n"
4924         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
4925         "\t    correspond to fields in the event's format description.  Keys\n"
4926         "\t    can be any field, or the special string 'stacktrace'.\n"
4927         "\t    Compound keys consisting of up to two fields can be specified\n"
4928         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
4929         "\t    fields.  Sort keys consisting of up to two fields can be\n"
4930         "\t    specified using the 'sort' keyword.  The sort direction can\n"
4931         "\t    be modified by appending '.descending' or '.ascending' to a\n"
4932         "\t    sort field.  The 'size' parameter can be used to specify more\n"
4933         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
4934         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
4935         "\t    its histogram data will be shared with other triggers of the\n"
4936         "\t    same name, and trigger hits will update this common data.\n\n"
4937         "\t    Reading the 'hist' file for the event will dump the hash\n"
4938         "\t    table in its entirety to stdout.  If there are multiple hist\n"
4939         "\t    triggers attached to an event, there will be a table for each\n"
4940         "\t    trigger in the output.  The table displayed for a named\n"
4941         "\t    trigger will be the same as any other instance having the\n"
4942         "\t    same name.  The default format used to display a given field\n"
4943         "\t    can be modified by appending any of the following modifiers\n"
4944         "\t    to the field name, as applicable:\n\n"
4945         "\t            .hex        display a number as a hex value\n"
4946         "\t            .sym        display an address as a symbol\n"
4947         "\t            .sym-offset display an address as a symbol and offset\n"
4948         "\t            .execname   display a common_pid as a program name\n"
4949         "\t            .syscall    display a syscall id as a syscall name\n"
4950         "\t            .log2       display log2 value rather than raw number\n"
4951         "\t            .usecs      display a common_timestamp in microseconds\n\n"
4952         "\t    The 'pause' parameter can be used to pause an existing hist\n"
4953         "\t    trigger or to start a hist trigger but not log any events\n"
4954         "\t    until told to do so.  'continue' can be used to start or\n"
4955         "\t    restart a paused hist trigger.\n\n"
4956         "\t    The 'clear' parameter will clear the contents of a running\n"
4957         "\t    hist trigger and leave its current paused/active state\n"
4958         "\t    unchanged.\n\n"
4959         "\t    The enable_hist and disable_hist triggers can be used to\n"
4960         "\t    have one event conditionally start and stop another event's\n"
4961         "\t    already-attached hist trigger.  The syntax is analogous to\n"
4962         "\t    the enable_event and disable_event triggers.\n\n"
4963         "\t    Hist trigger handlers and actions are executed whenever a\n"
4964         "\t    a histogram entry is added or updated.  They take the form:\n\n"
4965         "\t        <handler>.<action>\n\n"
4966         "\t    The available handlers are:\n\n"
4967         "\t        onmatch(matching.event)  - invoke on addition or update\n"
4968         "\t        onmax(var)               - invoke if var exceeds current max\n"
4969         "\t        onchange(var)            - invoke action if var changes\n\n"
4970         "\t    The available actions are:\n\n"
4971         "\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
4972         "\t        save(field,...)                      - save current event fields\n"
4973 #ifdef CONFIG_TRACER_SNAPSHOT
4974         "\t        snapshot()                           - snapshot the trace buffer\n"
4975 #endif
4976 #endif
4977 ;
4978
4979 static ssize_t
4980 tracing_readme_read(struct file *filp, char __user *ubuf,
4981                        size_t cnt, loff_t *ppos)
4982 {
4983         return simple_read_from_buffer(ubuf, cnt, ppos,
4984                                         readme_msg, strlen(readme_msg));
4985 }
4986
4987 static const struct file_operations tracing_readme_fops = {
4988         .open           = tracing_open_generic,
4989         .read           = tracing_readme_read,
4990         .llseek         = generic_file_llseek,
4991 };
4992
4993 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
4994 {
4995         int *ptr = v;
4996
4997         if (*pos || m->count)
4998                 ptr++;
4999
5000         (*pos)++;
5001
5002         for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
5003                 if (trace_find_tgid(*ptr))
5004                         return ptr;
5005         }
5006
5007         return NULL;
5008 }
5009
5010 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5011 {
5012         void *v;
5013         loff_t l = 0;
5014
5015         if (!tgid_map)
5016                 return NULL;
5017
5018         v = &tgid_map[0];
5019         while (l <= *pos) {
5020                 v = saved_tgids_next(m, v, &l);
5021                 if (!v)
5022                         return NULL;
5023         }
5024
5025         return v;
5026 }
5027
5028 static void saved_tgids_stop(struct seq_file *m, void *v)
5029 {
5030 }
5031
5032 static int saved_tgids_show(struct seq_file *m, void *v)
5033 {
5034         int pid = (int *)v - tgid_map;
5035
5036         seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
5037         return 0;
5038 }
5039
5040 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5041         .start          = saved_tgids_start,
5042         .stop           = saved_tgids_stop,
5043         .next           = saved_tgids_next,
5044         .show           = saved_tgids_show,
5045 };
5046
5047 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5048 {
5049         if (tracing_disabled)
5050                 return -ENODEV;
5051
5052         return seq_open(filp, &tracing_saved_tgids_seq_ops);
5053 }
5054
5055
5056 static const struct file_operations tracing_saved_tgids_fops = {
5057         .open           = tracing_saved_tgids_open,
5058         .read           = seq_read,
5059         .llseek         = seq_lseek,
5060         .release        = seq_release,
5061 };
5062
5063 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5064 {
5065         unsigned int *ptr = v;
5066
5067         if (*pos || m->count)
5068                 ptr++;
5069
5070         (*pos)++;
5071
5072         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5073              ptr++) {
5074                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5075                         continue;
5076
5077                 return ptr;
5078         }
5079
5080         return NULL;
5081 }
5082
5083 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5084 {
5085         void *v;
5086         loff_t l = 0;
5087
5088         preempt_disable();
5089         arch_spin_lock(&trace_cmdline_lock);
5090
5091         v = &savedcmd->map_cmdline_to_pid[0];
5092         while (l <= *pos) {
5093                 v = saved_cmdlines_next(m, v, &l);
5094                 if (!v)
5095                         return NULL;
5096         }
5097
5098         return v;
5099 }
5100
5101 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5102 {
5103         arch_spin_unlock(&trace_cmdline_lock);
5104         preempt_enable();
5105 }
5106
5107 static int saved_cmdlines_show(struct seq_file *m, void *v)
5108 {
5109         char buf[TASK_COMM_LEN];
5110         unsigned int *pid = v;
5111
5112         __trace_find_cmdline(*pid, buf);
5113         seq_printf(m, "%d %s\n", *pid, buf);
5114         return 0;
5115 }
5116
5117 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5118         .start          = saved_cmdlines_start,
5119         .next           = saved_cmdlines_next,
5120         .stop           = saved_cmdlines_stop,
5121         .show           = saved_cmdlines_show,
5122 };
5123
5124 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5125 {
5126         if (tracing_disabled)
5127                 return -ENODEV;
5128
5129         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5130 }
5131
5132 static const struct file_operations tracing_saved_cmdlines_fops = {
5133         .open           = tracing_saved_cmdlines_open,
5134         .read           = seq_read,
5135         .llseek         = seq_lseek,
5136         .release        = seq_release,
5137 };
5138
5139 static ssize_t
5140 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5141                                  size_t cnt, loff_t *ppos)
5142 {
5143         char buf[64];
5144         int r;
5145
5146         arch_spin_lock(&trace_cmdline_lock);
5147         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5148         arch_spin_unlock(&trace_cmdline_lock);
5149
5150         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5151 }
5152
5153 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5154 {
5155         kfree(s->saved_cmdlines);
5156         kfree(s->map_cmdline_to_pid);
5157         kfree(s);
5158 }
5159
5160 static int tracing_resize_saved_cmdlines(unsigned int val)
5161 {
5162         struct saved_cmdlines_buffer *s, *savedcmd_temp;
5163
5164         s = kmalloc(sizeof(*s), GFP_KERNEL);
5165         if (!s)
5166                 return -ENOMEM;
5167
5168         if (allocate_cmdlines_buffer(val, s) < 0) {
5169                 kfree(s);
5170                 return -ENOMEM;
5171         }
5172
5173         arch_spin_lock(&trace_cmdline_lock);
5174         savedcmd_temp = savedcmd;
5175         savedcmd = s;
5176         arch_spin_unlock(&trace_cmdline_lock);
5177         free_saved_cmdlines_buffer(savedcmd_temp);
5178
5179         return 0;
5180 }
5181
5182 static ssize_t
5183 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5184                                   size_t cnt, loff_t *ppos)
5185 {
5186         unsigned long val;
5187         int ret;
5188
5189         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5190         if (ret)
5191                 return ret;
5192
5193         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
5194         if (!val || val > PID_MAX_DEFAULT)
5195                 return -EINVAL;
5196
5197         ret = tracing_resize_saved_cmdlines((unsigned int)val);
5198         if (ret < 0)
5199                 return ret;
5200
5201         *ppos += cnt;
5202
5203         return cnt;
5204 }
5205
5206 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5207         .open           = tracing_open_generic,
5208         .read           = tracing_saved_cmdlines_size_read,
5209         .write          = tracing_saved_cmdlines_size_write,
5210 };
5211
5212 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5213 static union trace_eval_map_item *
5214 update_eval_map(union trace_eval_map_item *ptr)
5215 {
5216         if (!ptr->map.eval_string) {
5217                 if (ptr->tail.next) {
5218                         ptr = ptr->tail.next;
5219                         /* Set ptr to the next real item (skip head) */
5220                         ptr++;
5221                 } else
5222                         return NULL;
5223         }
5224         return ptr;
5225 }
5226
5227 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5228 {
5229         union trace_eval_map_item *ptr = v;
5230
5231         /*
5232          * Paranoid! If ptr points to end, we don't want to increment past it.
5233          * This really should never happen.
5234          */
5235         ptr = update_eval_map(ptr);
5236         if (WARN_ON_ONCE(!ptr))
5237                 return NULL;
5238
5239         ptr++;
5240
5241         (*pos)++;
5242
5243         ptr = update_eval_map(ptr);
5244
5245         return ptr;
5246 }
5247
5248 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5249 {
5250         union trace_eval_map_item *v;
5251         loff_t l = 0;
5252
5253         mutex_lock(&trace_eval_mutex);
5254
5255         v = trace_eval_maps;
5256         if (v)
5257                 v++;
5258
5259         while (v && l < *pos) {
5260                 v = eval_map_next(m, v, &l);
5261         }
5262
5263         return v;
5264 }
5265
5266 static void eval_map_stop(struct seq_file *m, void *v)
5267 {
5268         mutex_unlock(&trace_eval_mutex);
5269 }
5270
5271 static int eval_map_show(struct seq_file *m, void *v)
5272 {
5273         union trace_eval_map_item *ptr = v;
5274
5275         seq_printf(m, "%s %ld (%s)\n",
5276                    ptr->map.eval_string, ptr->map.eval_value,
5277                    ptr->map.system);
5278
5279         return 0;
5280 }
5281
5282 static const struct seq_operations tracing_eval_map_seq_ops = {
5283         .start          = eval_map_start,
5284         .next           = eval_map_next,
5285         .stop           = eval_map_stop,
5286         .show           = eval_map_show,
5287 };
5288
5289 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5290 {
5291         if (tracing_disabled)
5292                 return -ENODEV;
5293
5294         return seq_open(filp, &tracing_eval_map_seq_ops);
5295 }
5296
5297 static const struct file_operations tracing_eval_map_fops = {
5298         .open           = tracing_eval_map_open,
5299         .read           = seq_read,
5300         .llseek         = seq_lseek,
5301         .release        = seq_release,
5302 };
5303
5304 static inline union trace_eval_map_item *
5305 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5306 {
5307         /* Return tail of array given the head */
5308         return ptr + ptr->head.length + 1;
5309 }
5310
5311 static void
5312 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5313                            int len)
5314 {
5315         struct trace_eval_map **stop;
5316         struct trace_eval_map **map;
5317         union trace_eval_map_item *map_array;
5318         union trace_eval_map_item *ptr;
5319
5320         stop = start + len;
5321
5322         /*
5323          * The trace_eval_maps contains the map plus a head and tail item,
5324          * where the head holds the module and length of array, and the
5325          * tail holds a pointer to the next list.
5326          */
5327         map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5328         if (!map_array) {
5329                 pr_warn("Unable to allocate trace eval mapping\n");
5330                 return;
5331         }
5332
5333         mutex_lock(&trace_eval_mutex);
5334
5335         if (!trace_eval_maps)
5336                 trace_eval_maps = map_array;
5337         else {
5338                 ptr = trace_eval_maps;
5339                 for (;;) {
5340                         ptr = trace_eval_jmp_to_tail(ptr);
5341                         if (!ptr->tail.next)
5342                                 break;
5343                         ptr = ptr->tail.next;
5344
5345                 }
5346                 ptr->tail.next = map_array;
5347         }
5348         map_array->head.mod = mod;
5349         map_array->head.length = len;
5350         map_array++;
5351
5352         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5353                 map_array->map = **map;
5354                 map_array++;
5355         }
5356         memset(map_array, 0, sizeof(*map_array));
5357
5358         mutex_unlock(&trace_eval_mutex);
5359 }
5360
5361 static void trace_create_eval_file(struct dentry *d_tracer)
5362 {
5363         trace_create_file("eval_map", 0444, d_tracer,
5364                           NULL, &tracing_eval_map_fops);
5365 }
5366
5367 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5368 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5369 static inline void trace_insert_eval_map_file(struct module *mod,
5370                               struct trace_eval_map **start, int len) { }
5371 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5372
5373 static void trace_insert_eval_map(struct module *mod,
5374                                   struct trace_eval_map **start, int len)
5375 {
5376         struct trace_eval_map **map;
5377
5378         if (len <= 0)
5379                 return;
5380
5381         map = start;
5382
5383         trace_event_eval_update(map, len);
5384
5385         trace_insert_eval_map_file(mod, start, len);
5386 }
5387
5388 static ssize_t
5389 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5390                        size_t cnt, loff_t *ppos)
5391 {
5392         struct trace_array *tr = filp->private_data;
5393         char buf[MAX_TRACER_SIZE+2];
5394         int r;
5395
5396         mutex_lock(&trace_types_lock);
5397         r = sprintf(buf, "%s\n", tr->current_trace->name);
5398         mutex_unlock(&trace_types_lock);
5399
5400         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5401 }
5402
5403 int tracer_init(struct tracer *t, struct trace_array *tr)
5404 {
5405         tracing_reset_online_cpus(&tr->trace_buffer);
5406         return t->init(tr);
5407 }
5408
5409 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
5410 {
5411         int cpu;
5412
5413         for_each_tracing_cpu(cpu)
5414                 per_cpu_ptr(buf->data, cpu)->entries = val;
5415 }
5416
5417 #ifdef CONFIG_TRACER_MAX_TRACE
5418 /* resize @tr's buffer to the size of @size_tr's entries */
5419 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
5420                                         struct trace_buffer *size_buf, int cpu_id)
5421 {
5422         int cpu, ret = 0;
5423
5424         if (cpu_id == RING_BUFFER_ALL_CPUS) {
5425                 for_each_tracing_cpu(cpu) {
5426                         ret = ring_buffer_resize(trace_buf->buffer,
5427                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5428                         if (ret < 0)
5429                                 break;
5430                         per_cpu_ptr(trace_buf->data, cpu)->entries =
5431                                 per_cpu_ptr(size_buf->data, cpu)->entries;
5432                 }
5433         } else {
5434                 ret = ring_buffer_resize(trace_buf->buffer,
5435                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5436                 if (ret == 0)
5437                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5438                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
5439         }
5440
5441         return ret;
5442 }
5443 #endif /* CONFIG_TRACER_MAX_TRACE */
5444
5445 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5446                                         unsigned long size, int cpu)
5447 {
5448         int ret;
5449
5450         /*
5451          * If kernel or user changes the size of the ring buffer
5452          * we use the size that was given, and we can forget about
5453          * expanding it later.
5454          */
5455         ring_buffer_expanded = true;
5456
5457         /* May be called before buffers are initialized */
5458         if (!tr->trace_buffer.buffer)
5459                 return 0;
5460
5461         ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
5462         if (ret < 0)
5463                 return ret;
5464
5465 #ifdef CONFIG_TRACER_MAX_TRACE
5466         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5467             !tr->current_trace->use_max_tr)
5468                 goto out;
5469
5470         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5471         if (ret < 0) {
5472                 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
5473                                                      &tr->trace_buffer, cpu);
5474                 if (r < 0) {
5475                         /*
5476                          * AARGH! We are left with different
5477                          * size max buffer!!!!
5478                          * The max buffer is our "snapshot" buffer.
5479                          * When a tracer needs a snapshot (one of the
5480                          * latency tracers), it swaps the max buffer
5481                          * with the saved snap shot. We succeeded to
5482                          * update the size of the main buffer, but failed to
5483                          * update the size of the max buffer. But when we tried
5484                          * to reset the main buffer to the original size, we
5485                          * failed there too. This is very unlikely to
5486                          * happen, but if it does, warn and kill all
5487                          * tracing.
5488                          */
5489                         WARN_ON(1);
5490                         tracing_disabled = 1;
5491                 }
5492                 return ret;
5493         }
5494
5495         if (cpu == RING_BUFFER_ALL_CPUS)
5496                 set_buffer_entries(&tr->max_buffer, size);
5497         else
5498                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5499
5500  out:
5501 #endif /* CONFIG_TRACER_MAX_TRACE */
5502
5503         if (cpu == RING_BUFFER_ALL_CPUS)
5504                 set_buffer_entries(&tr->trace_buffer, size);
5505         else
5506                 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
5507
5508         return ret;
5509 }
5510
5511 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5512                                           unsigned long size, int cpu_id)
5513 {
5514         int ret = size;
5515
5516         mutex_lock(&trace_types_lock);
5517
5518         if (cpu_id != RING_BUFFER_ALL_CPUS) {
5519                 /* make sure, this cpu is enabled in the mask */
5520                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5521                         ret = -EINVAL;
5522                         goto out;
5523                 }
5524         }
5525
5526         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5527         if (ret < 0)
5528                 ret = -ENOMEM;
5529
5530 out:
5531         mutex_unlock(&trace_types_lock);
5532
5533         return ret;
5534 }
5535
5536
5537 /**
5538  * tracing_update_buffers - used by tracing facility to expand ring buffers
5539  *
5540  * To save on memory when the tracing is never used on a system with it
5541  * configured in. The ring buffers are set to a minimum size. But once
5542  * a user starts to use the tracing facility, then they need to grow
5543  * to their default size.
5544  *
5545  * This function is to be called when a tracer is about to be used.
5546  */
5547 int tracing_update_buffers(void)
5548 {
5549         int ret = 0;
5550
5551         mutex_lock(&trace_types_lock);
5552         if (!ring_buffer_expanded)
5553                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5554                                                 RING_BUFFER_ALL_CPUS);
5555         mutex_unlock(&trace_types_lock);
5556
5557         return ret;
5558 }
5559
5560 struct trace_option_dentry;
5561
5562 static void
5563 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5564
5565 /*
5566  * Used to clear out the tracer before deletion of an instance.
5567  * Must have trace_types_lock held.
5568  */
5569 static void tracing_set_nop(struct trace_array *tr)
5570 {
5571         if (tr->current_trace == &nop_trace)
5572                 return;
5573         
5574         tr->current_trace->enabled--;
5575
5576         if (tr->current_trace->reset)
5577                 tr->current_trace->reset(tr);
5578
5579         tr->current_trace = &nop_trace;
5580 }
5581
5582 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5583 {
5584         /* Only enable if the directory has been created already. */
5585         if (!tr->dir)
5586                 return;
5587
5588         create_trace_option_files(tr, t);
5589 }
5590
5591 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
5592 {
5593         struct tracer *t;
5594 #ifdef CONFIG_TRACER_MAX_TRACE
5595         bool had_max_tr;
5596 #endif
5597         int ret = 0;
5598
5599         mutex_lock(&trace_types_lock);
5600
5601         if (!ring_buffer_expanded) {
5602                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5603                                                 RING_BUFFER_ALL_CPUS);
5604                 if (ret < 0)
5605                         goto out;
5606                 ret = 0;
5607         }
5608
5609         for (t = trace_types; t; t = t->next) {
5610                 if (strcmp(t->name, buf) == 0)
5611                         break;
5612         }
5613         if (!t) {
5614                 ret = -EINVAL;
5615                 goto out;
5616         }
5617         if (t == tr->current_trace)
5618                 goto out;
5619
5620 #ifdef CONFIG_TRACER_SNAPSHOT
5621         if (t->use_max_tr) {
5622                 arch_spin_lock(&tr->max_lock);
5623                 if (tr->cond_snapshot)
5624                         ret = -EBUSY;
5625                 arch_spin_unlock(&tr->max_lock);
5626                 if (ret)
5627                         goto out;
5628         }
5629 #endif
5630         /* Some tracers won't work on kernel command line */
5631         if (system_state < SYSTEM_RUNNING && t->noboot) {
5632                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5633                         t->name);
5634                 goto out;
5635         }
5636
5637         /* Some tracers are only allowed for the top level buffer */
5638         if (!trace_ok_for_array(t, tr)) {
5639                 ret = -EINVAL;
5640                 goto out;
5641         }
5642
5643         /* If trace pipe files are being read, we can't change the tracer */
5644         if (tr->current_trace->ref) {
5645                 ret = -EBUSY;
5646                 goto out;
5647         }
5648
5649         trace_branch_disable();
5650
5651         tr->current_trace->enabled--;
5652
5653         if (tr->current_trace->reset)
5654                 tr->current_trace->reset(tr);
5655
5656         /* Current trace needs to be nop_trace before synchronize_rcu */
5657         tr->current_trace = &nop_trace;
5658
5659 #ifdef CONFIG_TRACER_MAX_TRACE
5660         had_max_tr = tr->allocated_snapshot;
5661
5662         if (had_max_tr && !t->use_max_tr) {
5663                 /*
5664                  * We need to make sure that the update_max_tr sees that
5665                  * current_trace changed to nop_trace to keep it from
5666                  * swapping the buffers after we resize it.
5667                  * The update_max_tr is called from interrupts disabled
5668                  * so a synchronized_sched() is sufficient.
5669                  */
5670                 synchronize_rcu();
5671                 free_snapshot(tr);
5672         }
5673 #endif
5674
5675 #ifdef CONFIG_TRACER_MAX_TRACE
5676         if (t->use_max_tr && !had_max_tr) {
5677                 ret = tracing_alloc_snapshot_instance(tr);
5678                 if (ret < 0)
5679                         goto out;
5680         }
5681 #endif
5682
5683         if (t->init) {
5684                 ret = tracer_init(t, tr);
5685                 if (ret)
5686                         goto out;
5687         }
5688
5689         tr->current_trace = t;
5690         tr->current_trace->enabled++;
5691         trace_branch_enable(tr);
5692  out:
5693         mutex_unlock(&trace_types_lock);
5694
5695         return ret;
5696 }
5697
5698 static ssize_t
5699 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5700                         size_t cnt, loff_t *ppos)
5701 {
5702         struct trace_array *tr = filp->private_data;
5703         char buf[MAX_TRACER_SIZE+1];
5704         int i;
5705         size_t ret;
5706         int err;
5707
5708         ret = cnt;
5709
5710         if (cnt > MAX_TRACER_SIZE)
5711                 cnt = MAX_TRACER_SIZE;
5712
5713         if (copy_from_user(buf, ubuf, cnt))
5714                 return -EFAULT;
5715
5716         buf[cnt] = 0;
5717
5718         /* strip ending whitespace. */
5719         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
5720                 buf[i] = 0;
5721
5722         err = tracing_set_tracer(tr, buf);
5723         if (err)
5724                 return err;
5725
5726         *ppos += ret;
5727
5728         return ret;
5729 }
5730
5731 static ssize_t
5732 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5733                    size_t cnt, loff_t *ppos)
5734 {
5735         char buf[64];
5736         int r;
5737
5738         r = snprintf(buf, sizeof(buf), "%ld\n",
5739                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5740         if (r > sizeof(buf))
5741                 r = sizeof(buf);
5742         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5743 }
5744
5745 static ssize_t
5746 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5747                     size_t cnt, loff_t *ppos)
5748 {
5749         unsigned long val;
5750         int ret;
5751
5752         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5753         if (ret)
5754                 return ret;
5755
5756         *ptr = val * 1000;
5757
5758         return cnt;
5759 }
5760
5761 static ssize_t
5762 tracing_thresh_read(struct file *filp, char __user *ubuf,
5763                     size_t cnt, loff_t *ppos)
5764 {
5765         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
5766 }
5767
5768 static ssize_t
5769 tracing_thresh_write(struct file *filp, const char __user *ubuf,
5770                      size_t cnt, loff_t *ppos)
5771 {
5772         struct trace_array *tr = filp->private_data;
5773         int ret;
5774
5775         mutex_lock(&trace_types_lock);
5776         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
5777         if (ret < 0)
5778                 goto out;
5779
5780         if (tr->current_trace->update_thresh) {
5781                 ret = tr->current_trace->update_thresh(tr);
5782                 if (ret < 0)
5783                         goto out;
5784         }
5785
5786         ret = cnt;
5787 out:
5788         mutex_unlock(&trace_types_lock);
5789
5790         return ret;
5791 }
5792
5793 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5794
5795 static ssize_t
5796 tracing_max_lat_read(struct file *filp, char __user *ubuf,
5797                      size_t cnt, loff_t *ppos)
5798 {
5799         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
5800 }
5801
5802 static ssize_t
5803 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
5804                       size_t cnt, loff_t *ppos)
5805 {
5806         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
5807 }
5808
5809 #endif
5810
5811 static int tracing_open_pipe(struct inode *inode, struct file *filp)
5812 {
5813         struct trace_array *tr = inode->i_private;
5814         struct trace_iterator *iter;
5815         int ret = 0;
5816
5817         if (tracing_disabled)
5818                 return -ENODEV;
5819
5820         if (trace_array_get(tr) < 0)
5821                 return -ENODEV;
5822
5823         mutex_lock(&trace_types_lock);
5824
5825         /* create a buffer to store the information to pass to userspace */
5826         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5827         if (!iter) {
5828                 ret = -ENOMEM;
5829                 __trace_array_put(tr);
5830                 goto out;
5831         }
5832
5833         trace_seq_init(&iter->seq);
5834         iter->trace = tr->current_trace;
5835
5836         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5837                 ret = -ENOMEM;
5838                 goto fail;
5839         }
5840
5841         /* trace pipe does not show start of buffer */
5842         cpumask_setall(iter->started);
5843
5844         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5845                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
5846
5847         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
5848         if (trace_clocks[tr->clock_id].in_ns)
5849                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5850
5851         iter->tr = tr;
5852         iter->trace_buffer = &tr->trace_buffer;
5853         iter->cpu_file = tracing_get_cpu(inode);
5854         mutex_init(&iter->mutex);
5855         filp->private_data = iter;
5856
5857         if (iter->trace->pipe_open)
5858                 iter->trace->pipe_open(iter);
5859
5860         nonseekable_open(inode, filp);
5861
5862         tr->current_trace->ref++;
5863 out:
5864         mutex_unlock(&trace_types_lock);
5865         return ret;
5866
5867 fail:
5868         kfree(iter);
5869         __trace_array_put(tr);
5870         mutex_unlock(&trace_types_lock);
5871         return ret;
5872 }
5873
5874 static int tracing_release_pipe(struct inode *inode, struct file *file)
5875 {
5876         struct trace_iterator *iter = file->private_data;
5877         struct trace_array *tr = inode->i_private;
5878
5879         mutex_lock(&trace_types_lock);
5880
5881         tr->current_trace->ref--;
5882
5883         if (iter->trace->pipe_close)
5884                 iter->trace->pipe_close(iter);
5885
5886         mutex_unlock(&trace_types_lock);
5887
5888         free_cpumask_var(iter->started);
5889         mutex_destroy(&iter->mutex);
5890         kfree(iter);
5891
5892         trace_array_put(tr);
5893
5894         return 0;
5895 }
5896
5897 static __poll_t
5898 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5899 {
5900         struct trace_array *tr = iter->tr;
5901
5902         /* Iterators are static, they should be filled or empty */
5903         if (trace_buffer_iter(iter, iter->cpu_file))
5904                 return EPOLLIN | EPOLLRDNORM;
5905
5906         if (tr->trace_flags & TRACE_ITER_BLOCK)
5907                 /*
5908                  * Always select as readable when in blocking mode
5909                  */
5910                 return EPOLLIN | EPOLLRDNORM;
5911         else
5912                 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
5913                                              filp, poll_table);
5914 }
5915
5916 static __poll_t
5917 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5918 {
5919         struct trace_iterator *iter = filp->private_data;
5920
5921         return trace_poll(iter, filp, poll_table);
5922 }
5923
5924 /* Must be called with iter->mutex held. */
5925 static int tracing_wait_pipe(struct file *filp)
5926 {
5927         struct trace_iterator *iter = filp->private_data;
5928         int ret;
5929
5930         while (trace_empty(iter)) {
5931
5932                 if ((filp->f_flags & O_NONBLOCK)) {
5933                         return -EAGAIN;
5934                 }
5935
5936                 /*
5937                  * We block until we read something and tracing is disabled.
5938                  * We still block if tracing is disabled, but we have never
5939                  * read anything. This allows a user to cat this file, and
5940                  * then enable tracing. But after we have read something,
5941                  * we give an EOF when tracing is again disabled.
5942                  *
5943                  * iter->pos will be 0 if we haven't read anything.
5944                  */
5945                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
5946                         break;
5947
5948                 mutex_unlock(&iter->mutex);
5949
5950                 ret = wait_on_pipe(iter, 0);
5951
5952                 mutex_lock(&iter->mutex);
5953
5954                 if (ret)
5955                         return ret;
5956         }
5957
5958         return 1;
5959 }
5960
5961 /*
5962  * Consumer reader.
5963  */
5964 static ssize_t
5965 tracing_read_pipe(struct file *filp, char __user *ubuf,
5966                   size_t cnt, loff_t *ppos)
5967 {
5968         struct trace_iterator *iter = filp->private_data;
5969         ssize_t sret;
5970
5971         /*
5972          * Avoid more than one consumer on a single file descriptor
5973          * This is just a matter of traces coherency, the ring buffer itself
5974          * is protected.
5975          */
5976         mutex_lock(&iter->mutex);
5977
5978         /* return any leftover data */
5979         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5980         if (sret != -EBUSY)
5981                 goto out;
5982
5983         trace_seq_init(&iter->seq);
5984
5985         if (iter->trace->read) {
5986                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
5987                 if (sret)
5988                         goto out;
5989         }
5990
5991 waitagain:
5992         sret = tracing_wait_pipe(filp);
5993         if (sret <= 0)
5994                 goto out;
5995
5996         /* stop when tracing is finished */
5997         if (trace_empty(iter)) {
5998                 sret = 0;
5999                 goto out;
6000         }
6001
6002         if (cnt >= PAGE_SIZE)
6003                 cnt = PAGE_SIZE - 1;
6004
6005         /* reset all but tr, trace, and overruns */
6006         memset(&iter->seq, 0,
6007                sizeof(struct trace_iterator) -
6008                offsetof(struct trace_iterator, seq));
6009         cpumask_clear(iter->started);
6010         iter->pos = -1;
6011
6012         trace_event_read_lock();
6013         trace_access_lock(iter->cpu_file);
6014         while (trace_find_next_entry_inc(iter) != NULL) {
6015                 enum print_line_t ret;
6016                 int save_len = iter->seq.seq.len;
6017
6018                 ret = print_trace_line(iter);
6019                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6020                         /* don't print partial lines */
6021                         iter->seq.seq.len = save_len;
6022                         break;
6023                 }
6024                 if (ret != TRACE_TYPE_NO_CONSUME)
6025                         trace_consume(iter);
6026
6027                 if (trace_seq_used(&iter->seq) >= cnt)
6028                         break;
6029
6030                 /*
6031                  * Setting the full flag means we reached the trace_seq buffer
6032                  * size and we should leave by partial output condition above.
6033                  * One of the trace_seq_* functions is not used properly.
6034                  */
6035                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6036                           iter->ent->type);
6037         }
6038         trace_access_unlock(iter->cpu_file);
6039         trace_event_read_unlock();
6040
6041         /* Now copy what we have to the user */
6042         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6043         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6044                 trace_seq_init(&iter->seq);
6045
6046         /*
6047          * If there was nothing to send to user, in spite of consuming trace
6048          * entries, go back to wait for more entries.
6049          */
6050         if (sret == -EBUSY)
6051                 goto waitagain;
6052
6053 out:
6054         mutex_unlock(&iter->mutex);
6055
6056         return sret;
6057 }
6058
6059 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6060                                      unsigned int idx)
6061 {
6062         __free_page(spd->pages[idx]);
6063 }
6064
6065 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
6066         .confirm                = generic_pipe_buf_confirm,
6067         .release                = generic_pipe_buf_release,
6068         .steal                  = generic_pipe_buf_steal,
6069         .get                    = generic_pipe_buf_get,
6070 };
6071
6072 static size_t
6073 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6074 {
6075         size_t count;
6076         int save_len;
6077         int ret;
6078
6079         /* Seq buffer is page-sized, exactly what we need. */
6080         for (;;) {
6081                 save_len = iter->seq.seq.len;
6082                 ret = print_trace_line(iter);
6083
6084                 if (trace_seq_has_overflowed(&iter->seq)) {
6085                         iter->seq.seq.len = save_len;
6086                         break;
6087                 }
6088
6089                 /*
6090                  * This should not be hit, because it should only
6091                  * be set if the iter->seq overflowed. But check it
6092                  * anyway to be safe.
6093                  */
6094                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6095                         iter->seq.seq.len = save_len;
6096                         break;
6097                 }
6098
6099                 count = trace_seq_used(&iter->seq) - save_len;
6100                 if (rem < count) {
6101                         rem = 0;
6102                         iter->seq.seq.len = save_len;
6103                         break;
6104                 }
6105
6106                 if (ret != TRACE_TYPE_NO_CONSUME)
6107                         trace_consume(iter);
6108                 rem -= count;
6109                 if (!trace_find_next_entry_inc(iter))   {
6110                         rem = 0;
6111                         iter->ent = NULL;
6112                         break;
6113                 }
6114         }
6115
6116         return rem;
6117 }
6118
6119 static ssize_t tracing_splice_read_pipe(struct file *filp,
6120                                         loff_t *ppos,
6121                                         struct pipe_inode_info *pipe,
6122                                         size_t len,
6123                                         unsigned int flags)
6124 {
6125         struct page *pages_def[PIPE_DEF_BUFFERS];
6126         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6127         struct trace_iterator *iter = filp->private_data;
6128         struct splice_pipe_desc spd = {
6129                 .pages          = pages_def,
6130                 .partial        = partial_def,
6131                 .nr_pages       = 0, /* This gets updated below. */
6132                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6133                 .ops            = &tracing_pipe_buf_ops,
6134                 .spd_release    = tracing_spd_release_pipe,
6135         };
6136         ssize_t ret;
6137         size_t rem;
6138         unsigned int i;
6139
6140         if (splice_grow_spd(pipe, &spd))
6141                 return -ENOMEM;
6142
6143         mutex_lock(&iter->mutex);
6144
6145         if (iter->trace->splice_read) {
6146                 ret = iter->trace->splice_read(iter, filp,
6147                                                ppos, pipe, len, flags);
6148                 if (ret)
6149                         goto out_err;
6150         }
6151
6152         ret = tracing_wait_pipe(filp);
6153         if (ret <= 0)
6154                 goto out_err;
6155
6156         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6157                 ret = -EFAULT;
6158                 goto out_err;
6159         }
6160
6161         trace_event_read_lock();
6162         trace_access_lock(iter->cpu_file);
6163
6164         /* Fill as many pages as possible. */
6165         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6166                 spd.pages[i] = alloc_page(GFP_KERNEL);
6167                 if (!spd.pages[i])
6168                         break;
6169
6170                 rem = tracing_fill_pipe_page(rem, iter);
6171
6172                 /* Copy the data into the page, so we can start over. */
6173                 ret = trace_seq_to_buffer(&iter->seq,
6174                                           page_address(spd.pages[i]),
6175                                           trace_seq_used(&iter->seq));
6176                 if (ret < 0) {
6177                         __free_page(spd.pages[i]);
6178                         break;
6179                 }
6180                 spd.partial[i].offset = 0;
6181                 spd.partial[i].len = trace_seq_used(&iter->seq);
6182
6183                 trace_seq_init(&iter->seq);
6184         }
6185
6186         trace_access_unlock(iter->cpu_file);
6187         trace_event_read_unlock();
6188         mutex_unlock(&iter->mutex);
6189
6190         spd.nr_pages = i;
6191
6192         if (i)
6193                 ret = splice_to_pipe(pipe, &spd);
6194         else
6195                 ret = 0;
6196 out:
6197         splice_shrink_spd(&spd);
6198         return ret;
6199
6200 out_err:
6201         mutex_unlock(&iter->mutex);
6202         goto out;
6203 }
6204
6205 static ssize_t
6206 tracing_entries_read(struct file *filp, char __user *ubuf,
6207                      size_t cnt, loff_t *ppos)
6208 {
6209         struct inode *inode = file_inode(filp);
6210         struct trace_array *tr = inode->i_private;
6211         int cpu = tracing_get_cpu(inode);
6212         char buf[64];
6213         int r = 0;
6214         ssize_t ret;
6215
6216         mutex_lock(&trace_types_lock);
6217
6218         if (cpu == RING_BUFFER_ALL_CPUS) {
6219                 int cpu, buf_size_same;
6220                 unsigned long size;
6221
6222                 size = 0;
6223                 buf_size_same = 1;
6224                 /* check if all cpu sizes are same */
6225                 for_each_tracing_cpu(cpu) {
6226                         /* fill in the size from first enabled cpu */
6227                         if (size == 0)
6228                                 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
6229                         if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
6230                                 buf_size_same = 0;
6231                                 break;
6232                         }
6233                 }
6234
6235                 if (buf_size_same) {
6236                         if (!ring_buffer_expanded)
6237                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
6238                                             size >> 10,
6239                                             trace_buf_size >> 10);
6240                         else
6241                                 r = sprintf(buf, "%lu\n", size >> 10);
6242                 } else
6243                         r = sprintf(buf, "X\n");
6244         } else
6245                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
6246
6247         mutex_unlock(&trace_types_lock);
6248
6249         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6250         return ret;
6251 }
6252
6253 static ssize_t
6254 tracing_entries_write(struct file *filp, const char __user *ubuf,
6255                       size_t cnt, loff_t *ppos)
6256 {
6257         struct inode *inode = file_inode(filp);
6258         struct trace_array *tr = inode->i_private;
6259         unsigned long val;
6260         int ret;
6261
6262         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6263         if (ret)
6264                 return ret;
6265
6266         /* must have at least 1 entry */
6267         if (!val)
6268                 return -EINVAL;
6269
6270         /* value is in KB */
6271         val <<= 10;
6272         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6273         if (ret < 0)
6274                 return ret;
6275
6276         *ppos += cnt;
6277
6278         return cnt;
6279 }
6280
6281 static ssize_t
6282 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6283                                 size_t cnt, loff_t *ppos)
6284 {
6285         struct trace_array *tr = filp->private_data;
6286         char buf[64];
6287         int r, cpu;
6288         unsigned long size = 0, expanded_size = 0;
6289
6290         mutex_lock(&trace_types_lock);
6291         for_each_tracing_cpu(cpu) {
6292                 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
6293                 if (!ring_buffer_expanded)
6294                         expanded_size += trace_buf_size >> 10;
6295         }
6296         if (ring_buffer_expanded)
6297                 r = sprintf(buf, "%lu\n", size);
6298         else
6299                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6300         mutex_unlock(&trace_types_lock);
6301
6302         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6303 }
6304
6305 static ssize_t
6306 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6307                           size_t cnt, loff_t *ppos)
6308 {
6309         /*
6310          * There is no need to read what the user has written, this function
6311          * is just to make sure that there is no error when "echo" is used
6312          */
6313
6314         *ppos += cnt;
6315
6316         return cnt;
6317 }
6318
6319 static int
6320 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6321 {
6322         struct trace_array *tr = inode->i_private;
6323
6324         /* disable tracing ? */
6325         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6326                 tracer_tracing_off(tr);
6327         /* resize the ring buffer to 0 */
6328         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6329
6330         trace_array_put(tr);
6331
6332         return 0;
6333 }
6334
6335 static ssize_t
6336 tracing_mark_write(struct file *filp, const char __user *ubuf,
6337                                         size_t cnt, loff_t *fpos)
6338 {
6339         struct trace_array *tr = filp->private_data;
6340         struct ring_buffer_event *event;
6341         enum event_trigger_type tt = ETT_NONE;
6342         struct ring_buffer *buffer;
6343         struct print_entry *entry;
6344         unsigned long irq_flags;
6345         const char faulted[] = "<faulted>";
6346         ssize_t written;
6347         int size;
6348         int len;
6349
6350 /* Used in tracing_mark_raw_write() as well */
6351 #define FAULTED_SIZE (sizeof(faulted) - 1) /* '\0' is already accounted for */
6352
6353         if (tracing_disabled)
6354                 return -EINVAL;
6355
6356         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6357                 return -EINVAL;
6358
6359         if (cnt > TRACE_BUF_SIZE)
6360                 cnt = TRACE_BUF_SIZE;
6361
6362         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6363
6364         local_save_flags(irq_flags);
6365         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6366
6367         /* If less than "<faulted>", then make sure we can still add that */
6368         if (cnt < FAULTED_SIZE)
6369                 size += FAULTED_SIZE - cnt;
6370
6371         buffer = tr->trace_buffer.buffer;
6372         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6373                                             irq_flags, preempt_count());
6374         if (unlikely(!event))
6375                 /* Ring buffer disabled, return as if not open for write */
6376                 return -EBADF;
6377
6378         entry = ring_buffer_event_data(event);
6379         entry->ip = _THIS_IP_;
6380
6381         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6382         if (len) {
6383                 memcpy(&entry->buf, faulted, FAULTED_SIZE);
6384                 cnt = FAULTED_SIZE;
6385                 written = -EFAULT;
6386         } else
6387                 written = cnt;
6388         len = cnt;
6389
6390         if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6391                 /* do not add \n before testing triggers, but add \0 */
6392                 entry->buf[cnt] = '\0';
6393                 tt = event_triggers_call(tr->trace_marker_file, entry, event);
6394         }
6395
6396         if (entry->buf[cnt - 1] != '\n') {
6397                 entry->buf[cnt] = '\n';
6398                 entry->buf[cnt + 1] = '\0';
6399         } else
6400                 entry->buf[cnt] = '\0';
6401
6402         __buffer_unlock_commit(buffer, event);
6403
6404         if (tt)
6405                 event_triggers_post_call(tr->trace_marker_file, tt);
6406
6407         if (written > 0)
6408                 *fpos += written;
6409
6410         return written;
6411 }
6412
6413 /* Limit it for now to 3K (including tag) */
6414 #define RAW_DATA_MAX_SIZE (1024*3)
6415
6416 static ssize_t
6417 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6418                                         size_t cnt, loff_t *fpos)
6419 {
6420         struct trace_array *tr = filp->private_data;
6421         struct ring_buffer_event *event;
6422         struct ring_buffer *buffer;
6423         struct raw_data_entry *entry;
6424         const char faulted[] = "<faulted>";
6425         unsigned long irq_flags;
6426         ssize_t written;
6427         int size;
6428         int len;
6429
6430 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6431
6432         if (tracing_disabled)
6433                 return -EINVAL;
6434
6435         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6436                 return -EINVAL;
6437
6438         /* The marker must at least have a tag id */
6439         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6440                 return -EINVAL;
6441
6442         if (cnt > TRACE_BUF_SIZE)
6443                 cnt = TRACE_BUF_SIZE;
6444
6445         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6446
6447         local_save_flags(irq_flags);
6448         size = sizeof(*entry) + cnt;
6449         if (cnt < FAULT_SIZE_ID)
6450                 size += FAULT_SIZE_ID - cnt;
6451
6452         buffer = tr->trace_buffer.buffer;
6453         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6454                                             irq_flags, preempt_count());
6455         if (!event)
6456                 /* Ring buffer disabled, return as if not open for write */
6457                 return -EBADF;
6458
6459         entry = ring_buffer_event_data(event);
6460
6461         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6462         if (len) {
6463                 entry->id = -1;
6464                 memcpy(&entry->buf, faulted, FAULTED_SIZE);
6465                 written = -EFAULT;
6466         } else
6467                 written = cnt;
6468
6469         __buffer_unlock_commit(buffer, event);
6470
6471         if (written > 0)
6472                 *fpos += written;
6473
6474         return written;
6475 }
6476
6477 static int tracing_clock_show(struct seq_file *m, void *v)
6478 {
6479         struct trace_array *tr = m->private;
6480         int i;
6481
6482         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6483                 seq_printf(m,
6484                         "%s%s%s%s", i ? " " : "",
6485                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6486                         i == tr->clock_id ? "]" : "");
6487         seq_putc(m, '\n');
6488
6489         return 0;
6490 }
6491
6492 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6493 {
6494         int i;
6495
6496         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6497                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
6498                         break;
6499         }
6500         if (i == ARRAY_SIZE(trace_clocks))
6501                 return -EINVAL;
6502
6503         mutex_lock(&trace_types_lock);
6504
6505         tr->clock_id = i;
6506
6507         ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
6508
6509         /*
6510          * New clock may not be consistent with the previous clock.
6511          * Reset the buffer so that it doesn't have incomparable timestamps.
6512          */
6513         tracing_reset_online_cpus(&tr->trace_buffer);
6514
6515 #ifdef CONFIG_TRACER_MAX_TRACE
6516         if (tr->max_buffer.buffer)
6517                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6518         tracing_reset_online_cpus(&tr->max_buffer);
6519 #endif
6520
6521         mutex_unlock(&trace_types_lock);
6522
6523         return 0;
6524 }
6525
6526 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6527                                    size_t cnt, loff_t *fpos)
6528 {
6529         struct seq_file *m = filp->private_data;
6530         struct trace_array *tr = m->private;
6531         char buf[64];
6532         const char *clockstr;
6533         int ret;
6534
6535         if (cnt >= sizeof(buf))
6536                 return -EINVAL;
6537
6538         if (copy_from_user(buf, ubuf, cnt))
6539                 return -EFAULT;
6540
6541         buf[cnt] = 0;
6542
6543         clockstr = strstrip(buf);
6544
6545         ret = tracing_set_clock(tr, clockstr);
6546         if (ret)
6547                 return ret;
6548
6549         *fpos += cnt;
6550
6551         return cnt;
6552 }
6553
6554 static int tracing_clock_open(struct inode *inode, struct file *file)
6555 {
6556         struct trace_array *tr = inode->i_private;
6557         int ret;
6558
6559         if (tracing_disabled)
6560                 return -ENODEV;
6561
6562         if (trace_array_get(tr))
6563                 return -ENODEV;
6564
6565         ret = single_open(file, tracing_clock_show, inode->i_private);
6566         if (ret < 0)
6567                 trace_array_put(tr);
6568
6569         return ret;
6570 }
6571
6572 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
6573 {
6574         struct trace_array *tr = m->private;
6575
6576         mutex_lock(&trace_types_lock);
6577
6578         if (ring_buffer_time_stamp_abs(tr->trace_buffer.buffer))
6579                 seq_puts(m, "delta [absolute]\n");
6580         else
6581                 seq_puts(m, "[delta] absolute\n");
6582
6583         mutex_unlock(&trace_types_lock);
6584
6585         return 0;
6586 }
6587
6588 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
6589 {
6590         struct trace_array *tr = inode->i_private;
6591         int ret;
6592
6593         if (tracing_disabled)
6594                 return -ENODEV;
6595
6596         if (trace_array_get(tr))
6597                 return -ENODEV;
6598
6599         ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
6600         if (ret < 0)
6601                 trace_array_put(tr);
6602
6603         return ret;
6604 }
6605
6606 int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
6607 {
6608         int ret = 0;
6609
6610         mutex_lock(&trace_types_lock);
6611
6612         if (abs && tr->time_stamp_abs_ref++)
6613                 goto out;
6614
6615         if (!abs) {
6616                 if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) {
6617                         ret = -EINVAL;
6618                         goto out;
6619                 }
6620
6621                 if (--tr->time_stamp_abs_ref)
6622                         goto out;
6623         }
6624
6625         ring_buffer_set_time_stamp_abs(tr->trace_buffer.buffer, abs);
6626
6627 #ifdef CONFIG_TRACER_MAX_TRACE
6628         if (tr->max_buffer.buffer)
6629                 ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs);
6630 #endif
6631  out:
6632         mutex_unlock(&trace_types_lock);
6633
6634         return ret;
6635 }
6636
6637 struct ftrace_buffer_info {
6638         struct trace_iterator   iter;
6639         void                    *spare;
6640         unsigned int            spare_cpu;
6641         unsigned int            read;
6642 };
6643
6644 #ifdef CONFIG_TRACER_SNAPSHOT
6645 static int tracing_snapshot_open(struct inode *inode, struct file *file)
6646 {
6647         struct trace_array *tr = inode->i_private;
6648         struct trace_iterator *iter;
6649         struct seq_file *m;
6650         int ret = 0;
6651
6652         if (trace_array_get(tr) < 0)
6653                 return -ENODEV;
6654
6655         if (file->f_mode & FMODE_READ) {
6656                 iter = __tracing_open(inode, file, true);
6657                 if (IS_ERR(iter))
6658                         ret = PTR_ERR(iter);
6659         } else {
6660                 /* Writes still need the seq_file to hold the private data */
6661                 ret = -ENOMEM;
6662                 m = kzalloc(sizeof(*m), GFP_KERNEL);
6663                 if (!m)
6664                         goto out;
6665                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6666                 if (!iter) {
6667                         kfree(m);
6668                         goto out;
6669                 }
6670                 ret = 0;
6671
6672                 iter->tr = tr;
6673                 iter->trace_buffer = &tr->max_buffer;
6674                 iter->cpu_file = tracing_get_cpu(inode);
6675                 m->private = iter;
6676                 file->private_data = m;
6677         }
6678 out:
6679         if (ret < 0)
6680                 trace_array_put(tr);
6681
6682         return ret;
6683 }
6684
6685 static ssize_t
6686 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6687                        loff_t *ppos)
6688 {
6689         struct seq_file *m = filp->private_data;
6690         struct trace_iterator *iter = m->private;
6691         struct trace_array *tr = iter->tr;
6692         unsigned long val;
6693         int ret;
6694
6695         ret = tracing_update_buffers();
6696         if (ret < 0)
6697                 return ret;
6698
6699         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6700         if (ret)
6701                 return ret;
6702
6703         mutex_lock(&trace_types_lock);
6704
6705         if (tr->current_trace->use_max_tr) {
6706                 ret = -EBUSY;
6707                 goto out;
6708         }
6709
6710         arch_spin_lock(&tr->max_lock);
6711         if (tr->cond_snapshot)
6712                 ret = -EBUSY;
6713         arch_spin_unlock(&tr->max_lock);
6714         if (ret)
6715                 goto out;
6716
6717         switch (val) {
6718         case 0:
6719                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6720                         ret = -EINVAL;
6721                         break;
6722                 }
6723                 if (tr->allocated_snapshot)
6724                         free_snapshot(tr);
6725                 break;
6726         case 1:
6727 /* Only allow per-cpu swap if the ring buffer supports it */
6728 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
6729                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6730                         ret = -EINVAL;
6731                         break;
6732                 }
6733 #endif
6734                 if (!tr->allocated_snapshot) {
6735                         ret = tracing_alloc_snapshot_instance(tr);
6736                         if (ret < 0)
6737                                 break;
6738                 }
6739                 local_irq_disable();
6740                 /* Now, we're going to swap */
6741                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6742                         update_max_tr(tr, current, smp_processor_id(), NULL);
6743                 else
6744                         update_max_tr_single(tr, current, iter->cpu_file);
6745                 local_irq_enable();
6746                 break;
6747         default:
6748                 if (tr->allocated_snapshot) {
6749                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6750                                 tracing_reset_online_cpus(&tr->max_buffer);
6751                         else
6752                                 tracing_reset(&tr->max_buffer, iter->cpu_file);
6753                 }
6754                 break;
6755         }
6756
6757         if (ret >= 0) {
6758                 *ppos += cnt;
6759                 ret = cnt;
6760         }
6761 out:
6762         mutex_unlock(&trace_types_lock);
6763         return ret;
6764 }
6765
6766 static int tracing_snapshot_release(struct inode *inode, struct file *file)
6767 {
6768         struct seq_file *m = file->private_data;
6769         int ret;
6770
6771         ret = tracing_release(inode, file);
6772
6773         if (file->f_mode & FMODE_READ)
6774                 return ret;
6775
6776         /* If write only, the seq_file is just a stub */
6777         if (m)
6778                 kfree(m->private);
6779         kfree(m);
6780
6781         return 0;
6782 }
6783
6784 static int tracing_buffers_open(struct inode *inode, struct file *filp);
6785 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
6786                                     size_t count, loff_t *ppos);
6787 static int tracing_buffers_release(struct inode *inode, struct file *file);
6788 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6789                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
6790
6791 static int snapshot_raw_open(struct inode *inode, struct file *filp)
6792 {
6793         struct ftrace_buffer_info *info;
6794         int ret;
6795
6796         ret = tracing_buffers_open(inode, filp);
6797         if (ret < 0)
6798                 return ret;
6799
6800         info = filp->private_data;
6801
6802         if (info->iter.trace->use_max_tr) {
6803                 tracing_buffers_release(inode, filp);
6804                 return -EBUSY;
6805         }
6806
6807         info->iter.snapshot = true;
6808         info->iter.trace_buffer = &info->iter.tr->max_buffer;
6809
6810         return ret;
6811 }
6812
6813 #endif /* CONFIG_TRACER_SNAPSHOT */
6814
6815
6816 static const struct file_operations tracing_thresh_fops = {
6817         .open           = tracing_open_generic,
6818         .read           = tracing_thresh_read,
6819         .write          = tracing_thresh_write,
6820         .llseek         = generic_file_llseek,
6821 };
6822
6823 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6824 static const struct file_operations tracing_max_lat_fops = {
6825         .open           = tracing_open_generic,
6826         .read           = tracing_max_lat_read,
6827         .write          = tracing_max_lat_write,
6828         .llseek         = generic_file_llseek,
6829 };
6830 #endif
6831
6832 static const struct file_operations set_tracer_fops = {
6833         .open           = tracing_open_generic,
6834         .read           = tracing_set_trace_read,
6835         .write          = tracing_set_trace_write,
6836         .llseek         = generic_file_llseek,
6837 };
6838
6839 static const struct file_operations tracing_pipe_fops = {
6840         .open           = tracing_open_pipe,
6841         .poll           = tracing_poll_pipe,
6842         .read           = tracing_read_pipe,
6843         .splice_read    = tracing_splice_read_pipe,
6844         .release        = tracing_release_pipe,
6845         .llseek         = no_llseek,
6846 };
6847
6848 static const struct file_operations tracing_entries_fops = {
6849         .open           = tracing_open_generic_tr,
6850         .read           = tracing_entries_read,
6851         .write          = tracing_entries_write,
6852         .llseek         = generic_file_llseek,
6853         .release        = tracing_release_generic_tr,
6854 };
6855
6856 static const struct file_operations tracing_total_entries_fops = {
6857         .open           = tracing_open_generic_tr,
6858         .read           = tracing_total_entries_read,
6859         .llseek         = generic_file_llseek,
6860         .release        = tracing_release_generic_tr,
6861 };
6862
6863 static const struct file_operations tracing_free_buffer_fops = {
6864         .open           = tracing_open_generic_tr,
6865         .write          = tracing_free_buffer_write,
6866         .release        = tracing_free_buffer_release,
6867 };
6868
6869 static const struct file_operations tracing_mark_fops = {
6870         .open           = tracing_open_generic_tr,
6871         .write          = tracing_mark_write,
6872         .llseek         = generic_file_llseek,
6873         .release        = tracing_release_generic_tr,
6874 };
6875
6876 static const struct file_operations tracing_mark_raw_fops = {
6877         .open           = tracing_open_generic_tr,
6878         .write          = tracing_mark_raw_write,
6879         .llseek         = generic_file_llseek,
6880         .release        = tracing_release_generic_tr,
6881 };
6882
6883 static const struct file_operations trace_clock_fops = {
6884         .open           = tracing_clock_open,
6885         .read           = seq_read,
6886         .llseek         = seq_lseek,
6887         .release        = tracing_single_release_tr,
6888         .write          = tracing_clock_write,
6889 };
6890
6891 static const struct file_operations trace_time_stamp_mode_fops = {
6892         .open           = tracing_time_stamp_mode_open,
6893         .read           = seq_read,
6894         .llseek         = seq_lseek,
6895         .release        = tracing_single_release_tr,
6896 };
6897
6898 #ifdef CONFIG_TRACER_SNAPSHOT
6899 static const struct file_operations snapshot_fops = {
6900         .open           = tracing_snapshot_open,
6901         .read           = seq_read,
6902         .write          = tracing_snapshot_write,
6903         .llseek         = tracing_lseek,
6904         .release        = tracing_snapshot_release,
6905 };
6906
6907 static const struct file_operations snapshot_raw_fops = {
6908         .open           = snapshot_raw_open,
6909         .read           = tracing_buffers_read,
6910         .release        = tracing_buffers_release,
6911         .splice_read    = tracing_buffers_splice_read,
6912         .llseek         = no_llseek,
6913 };
6914
6915 #endif /* CONFIG_TRACER_SNAPSHOT */
6916
6917 #define TRACING_LOG_ERRS_MAX    8
6918 #define TRACING_LOG_LOC_MAX     128
6919
6920 #define CMD_PREFIX "  Command: "
6921
6922 struct err_info {
6923         const char      **errs; /* ptr to loc-specific array of err strings */
6924         u8              type;   /* index into errs -> specific err string */
6925         u8              pos;    /* MAX_FILTER_STR_VAL = 256 */
6926         u64             ts;
6927 };
6928
6929 struct tracing_log_err {
6930         struct list_head        list;
6931         struct err_info         info;
6932         char                    loc[TRACING_LOG_LOC_MAX]; /* err location */
6933         char                    cmd[MAX_FILTER_STR_VAL]; /* what caused err */
6934 };
6935
6936 static DEFINE_MUTEX(tracing_err_log_lock);
6937
6938 struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
6939 {
6940         struct tracing_log_err *err;
6941
6942         if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
6943                 err = kzalloc(sizeof(*err), GFP_KERNEL);
6944                 if (!err)
6945                         err = ERR_PTR(-ENOMEM);
6946                 tr->n_err_log_entries++;
6947
6948                 return err;
6949         }
6950
6951         err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
6952         list_del(&err->list);
6953
6954         return err;
6955 }
6956
6957 /**
6958  * err_pos - find the position of a string within a command for error careting
6959  * @cmd: The tracing command that caused the error
6960  * @str: The string to position the caret at within @cmd
6961  *
6962  * Finds the position of the first occurence of @str within @cmd.  The
6963  * return value can be passed to tracing_log_err() for caret placement
6964  * within @cmd.
6965  *
6966  * Returns the index within @cmd of the first occurence of @str or 0
6967  * if @str was not found.
6968  */
6969 unsigned int err_pos(char *cmd, const char *str)
6970 {
6971         char *found;
6972
6973         if (WARN_ON(!strlen(cmd)))
6974                 return 0;
6975
6976         found = strstr(cmd, str);
6977         if (found)
6978                 return found - cmd;
6979
6980         return 0;
6981 }
6982
6983 /**
6984  * tracing_log_err - write an error to the tracing error log
6985  * @tr: The associated trace array for the error (NULL for top level array)
6986  * @loc: A string describing where the error occurred
6987  * @cmd: The tracing command that caused the error
6988  * @errs: The array of loc-specific static error strings
6989  * @type: The index into errs[], which produces the specific static err string
6990  * @pos: The position the caret should be placed in the cmd
6991  *
6992  * Writes an error into tracing/error_log of the form:
6993  *
6994  * <loc>: error: <text>
6995  *   Command: <cmd>
6996  *              ^
6997  *
6998  * tracing/error_log is a small log file containing the last
6999  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7000  * unless there has been a tracing error, and the error log can be
7001  * cleared and have its memory freed by writing the empty string in
7002  * truncation mode to it i.e. echo > tracing/error_log.
7003  *
7004  * NOTE: the @errs array along with the @type param are used to
7005  * produce a static error string - this string is not copied and saved
7006  * when the error is logged - only a pointer to it is saved.  See
7007  * existing callers for examples of how static strings are typically
7008  * defined for use with tracing_log_err().
7009  */
7010 void tracing_log_err(struct trace_array *tr,
7011                      const char *loc, const char *cmd,
7012                      const char **errs, u8 type, u8 pos)
7013 {
7014         struct tracing_log_err *err;
7015
7016         if (!tr)
7017                 tr = &global_trace;
7018
7019         mutex_lock(&tracing_err_log_lock);
7020         err = get_tracing_log_err(tr);
7021         if (PTR_ERR(err) == -ENOMEM) {
7022                 mutex_unlock(&tracing_err_log_lock);
7023                 return;
7024         }
7025
7026         snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7027         snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7028
7029         err->info.errs = errs;
7030         err->info.type = type;
7031         err->info.pos = pos;
7032         err->info.ts = local_clock();
7033
7034         list_add_tail(&err->list, &tr->err_log);
7035         mutex_unlock(&tracing_err_log_lock);
7036 }
7037
7038 static void clear_tracing_err_log(struct trace_array *tr)
7039 {
7040         struct tracing_log_err *err, *next;
7041
7042         mutex_lock(&tracing_err_log_lock);
7043         list_for_each_entry_safe(err, next, &tr->err_log, list) {
7044                 list_del(&err->list);
7045                 kfree(err);
7046         }
7047
7048         tr->n_err_log_entries = 0;
7049         mutex_unlock(&tracing_err_log_lock);
7050 }
7051
7052 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7053 {
7054         struct trace_array *tr = m->private;
7055
7056         mutex_lock(&tracing_err_log_lock);
7057
7058         return seq_list_start(&tr->err_log, *pos);
7059 }
7060
7061 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7062 {
7063         struct trace_array *tr = m->private;
7064
7065         return seq_list_next(v, &tr->err_log, pos);
7066 }
7067
7068 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7069 {
7070         mutex_unlock(&tracing_err_log_lock);
7071 }
7072
7073 static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7074 {
7075         u8 i;
7076
7077         for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7078                 seq_putc(m, ' ');
7079         for (i = 0; i < pos; i++)
7080                 seq_putc(m, ' ');
7081         seq_puts(m, "^\n");
7082 }
7083
7084 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7085 {
7086         struct tracing_log_err *err = v;
7087
7088         if (err) {
7089                 const char *err_text = err->info.errs[err->info.type];
7090                 u64 sec = err->info.ts;
7091                 u32 nsec;
7092
7093                 nsec = do_div(sec, NSEC_PER_SEC);
7094                 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7095                            err->loc, err_text);
7096                 seq_printf(m, "%s", err->cmd);
7097                 tracing_err_log_show_pos(m, err->info.pos);
7098         }
7099
7100         return 0;
7101 }
7102
7103 static const struct seq_operations tracing_err_log_seq_ops = {
7104         .start  = tracing_err_log_seq_start,
7105         .next   = tracing_err_log_seq_next,
7106         .stop   = tracing_err_log_seq_stop,
7107         .show   = tracing_err_log_seq_show
7108 };
7109
7110 static int tracing_err_log_open(struct inode *inode, struct file *file)
7111 {
7112         struct trace_array *tr = inode->i_private;
7113         int ret = 0;
7114
7115         if (trace_array_get(tr) < 0)
7116                 return -ENODEV;
7117
7118         /* If this file was opened for write, then erase contents */
7119         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7120                 clear_tracing_err_log(tr);
7121
7122         if (file->f_mode & FMODE_READ) {
7123                 ret = seq_open(file, &tracing_err_log_seq_ops);
7124                 if (!ret) {
7125                         struct seq_file *m = file->private_data;
7126                         m->private = tr;
7127                 } else {
7128                         trace_array_put(tr);
7129                 }
7130         }
7131         return ret;
7132 }
7133
7134 static ssize_t tracing_err_log_write(struct file *file,
7135                                      const char __user *buffer,
7136                                      size_t count, loff_t *ppos)
7137 {
7138         return count;
7139 }
7140
7141 static const struct file_operations tracing_err_log_fops = {
7142         .open           = tracing_err_log_open,
7143         .write          = tracing_err_log_write,
7144         .read           = seq_read,
7145         .llseek         = seq_lseek,
7146         .release        = tracing_release_generic_tr,
7147 };
7148
7149 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7150 {
7151         struct trace_array *tr = inode->i_private;
7152         struct ftrace_buffer_info *info;
7153         int ret;
7154
7155         if (tracing_disabled)
7156                 return -ENODEV;
7157
7158         if (trace_array_get(tr) < 0)
7159                 return -ENODEV;
7160
7161         info = kzalloc(sizeof(*info), GFP_KERNEL);
7162         if (!info) {
7163                 trace_array_put(tr);
7164                 return -ENOMEM;
7165         }
7166
7167         mutex_lock(&trace_types_lock);
7168
7169         info->iter.tr           = tr;
7170         info->iter.cpu_file     = tracing_get_cpu(inode);
7171         info->iter.trace        = tr->current_trace;
7172         info->iter.trace_buffer = &tr->trace_buffer;
7173         info->spare             = NULL;
7174         /* Force reading ring buffer for first read */
7175         info->read              = (unsigned int)-1;
7176
7177         filp->private_data = info;
7178
7179         tr->current_trace->ref++;
7180
7181         mutex_unlock(&trace_types_lock);
7182
7183         ret = nonseekable_open(inode, filp);
7184         if (ret < 0)
7185                 trace_array_put(tr);
7186
7187         return ret;
7188 }
7189
7190 static __poll_t
7191 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7192 {
7193         struct ftrace_buffer_info *info = filp->private_data;
7194         struct trace_iterator *iter = &info->iter;
7195
7196         return trace_poll(iter, filp, poll_table);
7197 }
7198
7199 static ssize_t
7200 tracing_buffers_read(struct file *filp, char __user *ubuf,
7201                      size_t count, loff_t *ppos)
7202 {
7203         struct ftrace_buffer_info *info = filp->private_data;
7204         struct trace_iterator *iter = &info->iter;
7205         ssize_t ret = 0;
7206         ssize_t size;
7207
7208         if (!count)
7209                 return 0;
7210
7211 #ifdef CONFIG_TRACER_MAX_TRACE
7212         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7213                 return -EBUSY;
7214 #endif
7215
7216         if (!info->spare) {
7217                 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
7218                                                           iter->cpu_file);
7219                 if (IS_ERR(info->spare)) {
7220                         ret = PTR_ERR(info->spare);
7221                         info->spare = NULL;
7222                 } else {
7223                         info->spare_cpu = iter->cpu_file;
7224                 }
7225         }
7226         if (!info->spare)
7227                 return ret;
7228
7229         /* Do we have previous read data to read? */
7230         if (info->read < PAGE_SIZE)
7231                 goto read;
7232
7233  again:
7234         trace_access_lock(iter->cpu_file);
7235         ret = ring_buffer_read_page(iter->trace_buffer->buffer,
7236                                     &info->spare,
7237                                     count,
7238                                     iter->cpu_file, 0);
7239         trace_access_unlock(iter->cpu_file);
7240
7241         if (ret < 0) {
7242                 if (trace_empty(iter)) {
7243                         if ((filp->f_flags & O_NONBLOCK))
7244                                 return -EAGAIN;
7245
7246                         ret = wait_on_pipe(iter, 0);
7247                         if (ret)
7248                                 return ret;
7249
7250                         goto again;
7251                 }
7252                 return 0;
7253         }
7254
7255         info->read = 0;
7256  read:
7257         size = PAGE_SIZE - info->read;
7258         if (size > count)
7259                 size = count;
7260
7261         ret = copy_to_user(ubuf, info->spare + info->read, size);
7262         if (ret == size)
7263                 return -EFAULT;
7264
7265         size -= ret;
7266
7267         *ppos += size;
7268         info->read += size;
7269
7270         return size;
7271 }
7272
7273 static int tracing_buffers_release(struct inode *inode, struct file *file)
7274 {
7275         struct ftrace_buffer_info *info = file->private_data;
7276         struct trace_iterator *iter = &info->iter;
7277
7278         mutex_lock(&trace_types_lock);
7279
7280         iter->tr->current_trace->ref--;
7281
7282         __trace_array_put(iter->tr);
7283
7284         if (info->spare)
7285                 ring_buffer_free_read_page(iter->trace_buffer->buffer,
7286                                            info->spare_cpu, info->spare);
7287         kfree(info);
7288
7289         mutex_unlock(&trace_types_lock);
7290
7291         return 0;
7292 }
7293
7294 struct buffer_ref {
7295         struct ring_buffer      *buffer;
7296         void                    *page;
7297         int                     cpu;
7298         int                     ref;
7299 };
7300
7301 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
7302                                     struct pipe_buffer *buf)
7303 {
7304         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7305
7306         if (--ref->ref)
7307                 return;
7308
7309         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
7310         kfree(ref);
7311         buf->private = 0;
7312 }
7313
7314 static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
7315                                 struct pipe_buffer *buf)
7316 {
7317         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7318
7319         ref->ref++;
7320 }
7321
7322 /* Pipe buffer operations for a buffer. */
7323 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
7324         .confirm                = generic_pipe_buf_confirm,
7325         .release                = buffer_pipe_buf_release,
7326         .steal                  = generic_pipe_buf_steal,
7327         .get                    = buffer_pipe_buf_get,
7328 };
7329
7330 /*
7331  * Callback from splice_to_pipe(), if we need to release some pages
7332  * at the end of the spd in case we error'ed out in filling the pipe.
7333  */
7334 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
7335 {
7336         struct buffer_ref *ref =
7337                 (struct buffer_ref *)spd->partial[i].private;
7338
7339         if (--ref->ref)
7340                 return;
7341
7342         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
7343         kfree(ref);
7344         spd->partial[i].private = 0;
7345 }
7346
7347 static ssize_t
7348 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7349                             struct pipe_inode_info *pipe, size_t len,
7350                             unsigned int flags)
7351 {
7352         struct ftrace_buffer_info *info = file->private_data;
7353         struct trace_iterator *iter = &info->iter;
7354         struct partial_page partial_def[PIPE_DEF_BUFFERS];
7355         struct page *pages_def[PIPE_DEF_BUFFERS];
7356         struct splice_pipe_desc spd = {
7357                 .pages          = pages_def,
7358                 .partial        = partial_def,
7359                 .nr_pages_max   = PIPE_DEF_BUFFERS,
7360                 .ops            = &buffer_pipe_buf_ops,
7361                 .spd_release    = buffer_spd_release,
7362         };
7363         struct buffer_ref *ref;
7364         int entries, i;
7365         ssize_t ret = 0;
7366
7367 #ifdef CONFIG_TRACER_MAX_TRACE
7368         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7369                 return -EBUSY;
7370 #endif
7371
7372         if (*ppos & (PAGE_SIZE - 1))
7373                 return -EINVAL;
7374
7375         if (len & (PAGE_SIZE - 1)) {
7376                 if (len < PAGE_SIZE)
7377                         return -EINVAL;
7378                 len &= PAGE_MASK;
7379         }
7380
7381         if (splice_grow_spd(pipe, &spd))
7382                 return -ENOMEM;
7383
7384  again:
7385         trace_access_lock(iter->cpu_file);
7386         entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
7387
7388         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
7389                 struct page *page;
7390                 int r;
7391
7392                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
7393                 if (!ref) {
7394                         ret = -ENOMEM;
7395                         break;
7396                 }
7397
7398                 ref->ref = 1;
7399                 ref->buffer = iter->trace_buffer->buffer;
7400                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
7401                 if (IS_ERR(ref->page)) {
7402                         ret = PTR_ERR(ref->page);
7403                         ref->page = NULL;
7404                         kfree(ref);
7405                         break;
7406                 }
7407                 ref->cpu = iter->cpu_file;
7408
7409                 r = ring_buffer_read_page(ref->buffer, &ref->page,
7410                                           len, iter->cpu_file, 1);
7411                 if (r < 0) {
7412                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
7413                                                    ref->page);
7414                         kfree(ref);
7415                         break;
7416                 }
7417
7418                 page = virt_to_page(ref->page);
7419
7420                 spd.pages[i] = page;
7421                 spd.partial[i].len = PAGE_SIZE;
7422                 spd.partial[i].offset = 0;
7423                 spd.partial[i].private = (unsigned long)ref;
7424                 spd.nr_pages++;
7425                 *ppos += PAGE_SIZE;
7426
7427                 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
7428         }
7429
7430         trace_access_unlock(iter->cpu_file);
7431         spd.nr_pages = i;
7432
7433         /* did we read anything? */
7434         if (!spd.nr_pages) {
7435                 if (ret)
7436                         goto out;
7437
7438                 ret = -EAGAIN;
7439                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
7440                         goto out;
7441
7442                 ret = wait_on_pipe(iter, iter->tr->buffer_percent);
7443                 if (ret)
7444                         goto out;
7445
7446                 goto again;
7447         }
7448
7449         ret = splice_to_pipe(pipe, &spd);
7450 out:
7451         splice_shrink_spd(&spd);
7452
7453         return ret;
7454 }
7455
7456 static const struct file_operations tracing_buffers_fops = {
7457         .open           = tracing_buffers_open,
7458         .read           = tracing_buffers_read,
7459         .poll           = tracing_buffers_poll,
7460         .release        = tracing_buffers_release,
7461         .splice_read    = tracing_buffers_splice_read,
7462         .llseek         = no_llseek,
7463 };
7464
7465 static ssize_t
7466 tracing_stats_read(struct file *filp, char __user *ubuf,
7467                    size_t count, loff_t *ppos)
7468 {
7469         struct inode *inode = file_inode(filp);
7470         struct trace_array *tr = inode->i_private;
7471         struct trace_buffer *trace_buf = &tr->trace_buffer;
7472         int cpu = tracing_get_cpu(inode);
7473         struct trace_seq *s;
7474         unsigned long cnt;
7475         unsigned long long t;
7476         unsigned long usec_rem;
7477
7478         s = kmalloc(sizeof(*s), GFP_KERNEL);
7479         if (!s)
7480                 return -ENOMEM;
7481
7482         trace_seq_init(s);
7483
7484         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
7485         trace_seq_printf(s, "entries: %ld\n", cnt);
7486
7487         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
7488         trace_seq_printf(s, "overrun: %ld\n", cnt);
7489
7490         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
7491         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
7492
7493         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
7494         trace_seq_printf(s, "bytes: %ld\n", cnt);
7495
7496         if (trace_clocks[tr->clock_id].in_ns) {
7497                 /* local or global for trace_clock */
7498                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7499                 usec_rem = do_div(t, USEC_PER_SEC);
7500                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
7501                                                                 t, usec_rem);
7502
7503                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
7504                 usec_rem = do_div(t, USEC_PER_SEC);
7505                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
7506         } else {
7507                 /* counter or tsc mode for trace_clock */
7508                 trace_seq_printf(s, "oldest event ts: %llu\n",
7509                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7510
7511                 trace_seq_printf(s, "now ts: %llu\n",
7512                                 ring_buffer_time_stamp(trace_buf->buffer, cpu));
7513         }
7514
7515         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
7516         trace_seq_printf(s, "dropped events: %ld\n", cnt);
7517
7518         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
7519         trace_seq_printf(s, "read events: %ld\n", cnt);
7520
7521         count = simple_read_from_buffer(ubuf, count, ppos,
7522                                         s->buffer, trace_seq_used(s));
7523
7524         kfree(s);
7525
7526         return count;
7527 }
7528
7529 static const struct file_operations tracing_stats_fops = {
7530         .open           = tracing_open_generic_tr,
7531         .read           = tracing_stats_read,
7532         .llseek         = generic_file_llseek,
7533         .release        = tracing_release_generic_tr,
7534 };
7535
7536 #ifdef CONFIG_DYNAMIC_FTRACE
7537
7538 static ssize_t
7539 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
7540                   size_t cnt, loff_t *ppos)
7541 {
7542         unsigned long *p = filp->private_data;
7543         char buf[64]; /* Not too big for a shallow stack */
7544         int r;
7545
7546         r = scnprintf(buf, 63, "%ld", *p);
7547         buf[r++] = '\n';
7548
7549         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7550 }
7551
7552 static const struct file_operations tracing_dyn_info_fops = {
7553         .open           = tracing_open_generic,
7554         .read           = tracing_read_dyn_info,
7555         .llseek         = generic_file_llseek,
7556 };
7557 #endif /* CONFIG_DYNAMIC_FTRACE */
7558
7559 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
7560 static void
7561 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
7562                 struct trace_array *tr, struct ftrace_probe_ops *ops,
7563                 void *data)
7564 {
7565         tracing_snapshot_instance(tr);
7566 }
7567
7568 static void
7569 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
7570                       struct trace_array *tr, struct ftrace_probe_ops *ops,
7571                       void *data)
7572 {
7573         struct ftrace_func_mapper *mapper = data;
7574         long *count = NULL;
7575
7576         if (mapper)
7577                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7578
7579         if (count) {
7580
7581                 if (*count <= 0)
7582                         return;
7583
7584                 (*count)--;
7585         }
7586
7587         tracing_snapshot_instance(tr);
7588 }
7589
7590 static int
7591 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
7592                       struct ftrace_probe_ops *ops, void *data)
7593 {
7594         struct ftrace_func_mapper *mapper = data;
7595         long *count = NULL;
7596
7597         seq_printf(m, "%ps:", (void *)ip);
7598
7599         seq_puts(m, "snapshot");
7600
7601         if (mapper)
7602                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7603
7604         if (count)
7605                 seq_printf(m, ":count=%ld\n", *count);
7606         else
7607                 seq_puts(m, ":unlimited\n");
7608
7609         return 0;
7610 }
7611
7612 static int
7613 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
7614                      unsigned long ip, void *init_data, void **data)
7615 {
7616         struct ftrace_func_mapper *mapper = *data;
7617
7618         if (!mapper) {
7619                 mapper = allocate_ftrace_func_mapper();
7620                 if (!mapper)
7621                         return -ENOMEM;
7622                 *data = mapper;
7623         }
7624
7625         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
7626 }
7627
7628 static void
7629 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
7630                      unsigned long ip, void *data)
7631 {
7632         struct ftrace_func_mapper *mapper = data;
7633
7634         if (!ip) {
7635                 if (!mapper)
7636                         return;
7637                 free_ftrace_func_mapper(mapper, NULL);
7638                 return;
7639         }
7640
7641         ftrace_func_mapper_remove_ip(mapper, ip);
7642 }
7643
7644 static struct ftrace_probe_ops snapshot_probe_ops = {
7645         .func                   = ftrace_snapshot,
7646         .print                  = ftrace_snapshot_print,
7647 };
7648
7649 static struct ftrace_probe_ops snapshot_count_probe_ops = {
7650         .func                   = ftrace_count_snapshot,
7651         .print                  = ftrace_snapshot_print,
7652         .init                   = ftrace_snapshot_init,
7653         .free                   = ftrace_snapshot_free,
7654 };
7655
7656 static int
7657 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
7658                                char *glob, char *cmd, char *param, int enable)
7659 {
7660         struct ftrace_probe_ops *ops;
7661         void *count = (void *)-1;
7662         char *number;
7663         int ret;
7664
7665         if (!tr)
7666                 return -ENODEV;
7667
7668         /* hash funcs only work with set_ftrace_filter */
7669         if (!enable)
7670                 return -EINVAL;
7671
7672         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
7673
7674         if (glob[0] == '!')
7675                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
7676
7677         if (!param)
7678                 goto out_reg;
7679
7680         number = strsep(&param, ":");
7681
7682         if (!strlen(number))
7683                 goto out_reg;
7684
7685         /*
7686          * We use the callback data field (which is a pointer)
7687          * as our counter.
7688          */
7689         ret = kstrtoul(number, 0, (unsigned long *)&count);
7690         if (ret)
7691                 return ret;
7692
7693  out_reg:
7694         ret = tracing_alloc_snapshot_instance(tr);
7695         if (ret < 0)
7696                 goto out;
7697
7698         ret = register_ftrace_function_probe(glob, tr, ops, count);
7699
7700  out:
7701         return ret < 0 ? ret : 0;
7702 }
7703
7704 static struct ftrace_func_command ftrace_snapshot_cmd = {
7705         .name                   = "snapshot",
7706         .func                   = ftrace_trace_snapshot_callback,
7707 };
7708
7709 static __init int register_snapshot_cmd(void)
7710 {
7711         return register_ftrace_command(&ftrace_snapshot_cmd);
7712 }
7713 #else
7714 static inline __init int register_snapshot_cmd(void) { return 0; }
7715 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
7716
7717 static struct dentry *tracing_get_dentry(struct trace_array *tr)
7718 {
7719         if (WARN_ON(!tr->dir))
7720                 return ERR_PTR(-ENODEV);
7721
7722         /* Top directory uses NULL as the parent */
7723         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
7724                 return NULL;
7725
7726         /* All sub buffers have a descriptor */
7727         return tr->dir;
7728 }
7729
7730 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
7731 {
7732         struct dentry *d_tracer;
7733
7734         if (tr->percpu_dir)
7735                 return tr->percpu_dir;
7736
7737         d_tracer = tracing_get_dentry(tr);
7738         if (IS_ERR(d_tracer))
7739                 return NULL;
7740
7741         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
7742
7743         WARN_ONCE(!tr->percpu_dir,
7744                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
7745
7746         return tr->percpu_dir;
7747 }
7748
7749 static struct dentry *
7750 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
7751                       void *data, long cpu, const struct file_operations *fops)
7752 {
7753         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
7754
7755         if (ret) /* See tracing_get_cpu() */
7756                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
7757         return ret;
7758 }
7759
7760 static void
7761 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
7762 {
7763         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
7764         struct dentry *d_cpu;
7765         char cpu_dir[30]; /* 30 characters should be more than enough */
7766
7767         if (!d_percpu)
7768                 return;
7769
7770         snprintf(cpu_dir, 30, "cpu%ld", cpu);
7771         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
7772         if (!d_cpu) {
7773                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
7774                 return;
7775         }
7776
7777         /* per cpu trace_pipe */
7778         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
7779                                 tr, cpu, &tracing_pipe_fops);
7780
7781         /* per cpu trace */
7782         trace_create_cpu_file("trace", 0644, d_cpu,
7783                                 tr, cpu, &tracing_fops);
7784
7785         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
7786                                 tr, cpu, &tracing_buffers_fops);
7787
7788         trace_create_cpu_file("stats", 0444, d_cpu,
7789                                 tr, cpu, &tracing_stats_fops);
7790
7791         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
7792                                 tr, cpu, &tracing_entries_fops);
7793
7794 #ifdef CONFIG_TRACER_SNAPSHOT
7795         trace_create_cpu_file("snapshot", 0644, d_cpu,
7796                                 tr, cpu, &snapshot_fops);
7797
7798         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
7799                                 tr, cpu, &snapshot_raw_fops);
7800 #endif
7801 }
7802
7803 #ifdef CONFIG_FTRACE_SELFTEST
7804 /* Let selftest have access to static functions in this file */
7805 #include "trace_selftest.c"
7806 #endif
7807
7808 static ssize_t
7809 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
7810                         loff_t *ppos)
7811 {
7812         struct trace_option_dentry *topt = filp->private_data;
7813         char *buf;
7814
7815         if (topt->flags->val & topt->opt->bit)
7816                 buf = "1\n";
7817         else
7818                 buf = "0\n";
7819
7820         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7821 }
7822
7823 static ssize_t
7824 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
7825                          loff_t *ppos)
7826 {
7827         struct trace_option_dentry *topt = filp->private_data;
7828         unsigned long val;
7829         int ret;
7830
7831         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7832         if (ret)
7833                 return ret;
7834
7835         if (val != 0 && val != 1)
7836                 return -EINVAL;
7837
7838         if (!!(topt->flags->val & topt->opt->bit) != val) {
7839                 mutex_lock(&trace_types_lock);
7840                 ret = __set_tracer_option(topt->tr, topt->flags,
7841                                           topt->opt, !val);
7842                 mutex_unlock(&trace_types_lock);
7843                 if (ret)
7844                         return ret;
7845         }
7846
7847         *ppos += cnt;
7848
7849         return cnt;
7850 }
7851
7852
7853 static const struct file_operations trace_options_fops = {
7854         .open = tracing_open_generic,
7855         .read = trace_options_read,
7856         .write = trace_options_write,
7857         .llseek = generic_file_llseek,
7858 };
7859
7860 /*
7861  * In order to pass in both the trace_array descriptor as well as the index
7862  * to the flag that the trace option file represents, the trace_array
7863  * has a character array of trace_flags_index[], which holds the index
7864  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
7865  * The address of this character array is passed to the flag option file
7866  * read/write callbacks.
7867  *
7868  * In order to extract both the index and the trace_array descriptor,
7869  * get_tr_index() uses the following algorithm.
7870  *
7871  *   idx = *ptr;
7872  *
7873  * As the pointer itself contains the address of the index (remember
7874  * index[1] == 1).
7875  *
7876  * Then to get the trace_array descriptor, by subtracting that index
7877  * from the ptr, we get to the start of the index itself.
7878  *
7879  *   ptr - idx == &index[0]
7880  *
7881  * Then a simple container_of() from that pointer gets us to the
7882  * trace_array descriptor.
7883  */
7884 static void get_tr_index(void *data, struct trace_array **ptr,
7885                          unsigned int *pindex)
7886 {
7887         *pindex = *(unsigned char *)data;
7888
7889         *ptr = container_of(data - *pindex, struct trace_array,
7890                             trace_flags_index);
7891 }
7892
7893 static ssize_t
7894 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
7895                         loff_t *ppos)
7896 {
7897         void *tr_index = filp->private_data;
7898         struct trace_array *tr;
7899         unsigned int index;
7900         char *buf;
7901
7902         get_tr_index(tr_index, &tr, &index);
7903
7904         if (tr->trace_flags & (1 << index))
7905                 buf = "1\n";
7906         else
7907                 buf = "0\n";
7908
7909         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7910 }
7911
7912 static ssize_t
7913 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
7914                          loff_t *ppos)
7915 {
7916         void *tr_index = filp->private_data;
7917         struct trace_array *tr;
7918         unsigned int index;
7919         unsigned long val;
7920         int ret;
7921
7922         get_tr_index(tr_index, &tr, &index);
7923
7924         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7925         if (ret)
7926                 return ret;
7927
7928         if (val != 0 && val != 1)
7929                 return -EINVAL;
7930
7931         mutex_lock(&trace_types_lock);
7932         ret = set_tracer_flag(tr, 1 << index, val);
7933         mutex_unlock(&trace_types_lock);
7934
7935         if (ret < 0)
7936                 return ret;
7937
7938         *ppos += cnt;
7939
7940         return cnt;
7941 }
7942
7943 static const struct file_operations trace_options_core_fops = {
7944         .open = tracing_open_generic,
7945         .read = trace_options_core_read,
7946         .write = trace_options_core_write,
7947         .llseek = generic_file_llseek,
7948 };
7949
7950 struct dentry *trace_create_file(const char *name,
7951                                  umode_t mode,
7952                                  struct dentry *parent,
7953                                  void *data,
7954                                  const struct file_operations *fops)
7955 {
7956         struct dentry *ret;
7957
7958         ret = tracefs_create_file(name, mode, parent, data, fops);
7959         if (!ret)
7960                 pr_warn("Could not create tracefs '%s' entry\n", name);
7961
7962         return ret;
7963 }
7964
7965
7966 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
7967 {
7968         struct dentry *d_tracer;
7969
7970         if (tr->options)
7971                 return tr->options;
7972
7973         d_tracer = tracing_get_dentry(tr);
7974         if (IS_ERR(d_tracer))
7975                 return NULL;
7976
7977         tr->options = tracefs_create_dir("options", d_tracer);
7978         if (!tr->options) {
7979                 pr_warn("Could not create tracefs directory 'options'\n");
7980                 return NULL;
7981         }
7982
7983         return tr->options;
7984 }
7985
7986 static void
7987 create_trace_option_file(struct trace_array *tr,
7988                          struct trace_option_dentry *topt,
7989                          struct tracer_flags *flags,
7990                          struct tracer_opt *opt)
7991 {
7992         struct dentry *t_options;
7993
7994         t_options = trace_options_init_dentry(tr);
7995         if (!t_options)
7996                 return;
7997
7998         topt->flags = flags;
7999         topt->opt = opt;
8000         topt->tr = tr;
8001
8002         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
8003                                     &trace_options_fops);
8004
8005 }
8006
8007 static void
8008 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8009 {
8010         struct trace_option_dentry *topts;
8011         struct trace_options *tr_topts;
8012         struct tracer_flags *flags;
8013         struct tracer_opt *opts;
8014         int cnt;
8015         int i;
8016
8017         if (!tracer)
8018                 return;
8019
8020         flags = tracer->flags;
8021
8022         if (!flags || !flags->opts)
8023                 return;
8024
8025         /*
8026          * If this is an instance, only create flags for tracers
8027          * the instance may have.
8028          */
8029         if (!trace_ok_for_array(tracer, tr))
8030                 return;
8031
8032         for (i = 0; i < tr->nr_topts; i++) {
8033                 /* Make sure there's no duplicate flags. */
8034                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8035                         return;
8036         }
8037
8038         opts = flags->opts;
8039
8040         for (cnt = 0; opts[cnt].name; cnt++)
8041                 ;
8042
8043         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8044         if (!topts)
8045                 return;
8046
8047         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8048                             GFP_KERNEL);
8049         if (!tr_topts) {
8050                 kfree(topts);
8051                 return;
8052         }
8053
8054         tr->topts = tr_topts;
8055         tr->topts[tr->nr_topts].tracer = tracer;
8056         tr->topts[tr->nr_topts].topts = topts;
8057         tr->nr_topts++;
8058
8059         for (cnt = 0; opts[cnt].name; cnt++) {
8060                 create_trace_option_file(tr, &topts[cnt], flags,
8061                                          &opts[cnt]);
8062                 WARN_ONCE(topts[cnt].entry == NULL,
8063                           "Failed to create trace option: %s",
8064                           opts[cnt].name);
8065         }
8066 }
8067
8068 static struct dentry *
8069 create_trace_option_core_file(struct trace_array *tr,
8070                               const char *option, long index)
8071 {
8072         struct dentry *t_options;
8073
8074         t_options = trace_options_init_dentry(tr);
8075         if (!t_options)
8076                 return NULL;
8077
8078         return trace_create_file(option, 0644, t_options,
8079                                  (void *)&tr->trace_flags_index[index],
8080                                  &trace_options_core_fops);
8081 }
8082
8083 static void create_trace_options_dir(struct trace_array *tr)
8084 {
8085         struct dentry *t_options;
8086         bool top_level = tr == &global_trace;
8087         int i;
8088
8089         t_options = trace_options_init_dentry(tr);
8090         if (!t_options)
8091                 return;
8092
8093         for (i = 0; trace_options[i]; i++) {
8094                 if (top_level ||
8095                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8096                         create_trace_option_core_file(tr, trace_options[i], i);
8097         }
8098 }
8099
8100 static ssize_t
8101 rb_simple_read(struct file *filp, char __user *ubuf,
8102                size_t cnt, loff_t *ppos)
8103 {
8104         struct trace_array *tr = filp->private_data;
8105         char buf[64];
8106         int r;
8107
8108         r = tracer_tracing_is_on(tr);
8109         r = sprintf(buf, "%d\n", r);
8110
8111         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8112 }
8113
8114 static ssize_t
8115 rb_simple_write(struct file *filp, const char __user *ubuf,
8116                 size_t cnt, loff_t *ppos)
8117 {
8118         struct trace_array *tr = filp->private_data;
8119         struct ring_buffer *buffer = tr->trace_buffer.buffer;
8120         unsigned long val;
8121         int ret;
8122
8123         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8124         if (ret)
8125                 return ret;
8126
8127         if (buffer) {
8128                 mutex_lock(&trace_types_lock);
8129                 if (!!val == tracer_tracing_is_on(tr)) {
8130                         val = 0; /* do nothing */
8131                 } else if (val) {
8132                         tracer_tracing_on(tr);
8133                         if (tr->current_trace->start)
8134                                 tr->current_trace->start(tr);
8135                 } else {
8136                         tracer_tracing_off(tr);
8137                         if (tr->current_trace->stop)
8138                                 tr->current_trace->stop(tr);
8139                 }
8140                 mutex_unlock(&trace_types_lock);
8141         }
8142
8143         (*ppos)++;
8144
8145         return cnt;
8146 }
8147
8148 static const struct file_operations rb_simple_fops = {
8149         .open           = tracing_open_generic_tr,
8150         .read           = rb_simple_read,
8151         .write          = rb_simple_write,
8152         .release        = tracing_release_generic_tr,
8153         .llseek         = default_llseek,
8154 };
8155
8156 static ssize_t
8157 buffer_percent_read(struct file *filp, char __user *ubuf,
8158                     size_t cnt, loff_t *ppos)
8159 {
8160         struct trace_array *tr = filp->private_data;
8161         char buf[64];
8162         int r;
8163
8164         r = tr->buffer_percent;
8165         r = sprintf(buf, "%d\n", r);
8166
8167         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8168 }
8169
8170 static ssize_t
8171 buffer_percent_write(struct file *filp, const char __user *ubuf,
8172                      size_t cnt, loff_t *ppos)
8173 {
8174         struct trace_array *tr = filp->private_data;
8175         unsigned long val;
8176         int ret;
8177
8178         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8179         if (ret)
8180                 return ret;
8181
8182         if (val > 100)
8183                 return -EINVAL;
8184
8185         if (!val)
8186                 val = 1;
8187
8188         tr->buffer_percent = val;
8189
8190         (*ppos)++;
8191
8192         return cnt;
8193 }
8194
8195 static const struct file_operations buffer_percent_fops = {
8196         .open           = tracing_open_generic_tr,
8197         .read           = buffer_percent_read,
8198         .write          = buffer_percent_write,
8199         .release        = tracing_release_generic_tr,
8200         .llseek         = default_llseek,
8201 };
8202
8203 struct dentry *trace_instance_dir;
8204
8205 static void
8206 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
8207
8208 static int
8209 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
8210 {
8211         enum ring_buffer_flags rb_flags;
8212
8213         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
8214
8215         buf->tr = tr;
8216
8217         buf->buffer = ring_buffer_alloc(size, rb_flags);
8218         if (!buf->buffer)
8219                 return -ENOMEM;
8220
8221         buf->data = alloc_percpu(struct trace_array_cpu);
8222         if (!buf->data) {
8223                 ring_buffer_free(buf->buffer);
8224                 buf->buffer = NULL;
8225                 return -ENOMEM;
8226         }
8227
8228         /* Allocate the first page for all buffers */
8229         set_buffer_entries(&tr->trace_buffer,
8230                            ring_buffer_size(tr->trace_buffer.buffer, 0));
8231
8232         return 0;
8233 }
8234
8235 static int allocate_trace_buffers(struct trace_array *tr, int size)
8236 {
8237         int ret;
8238
8239         ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
8240         if (ret)
8241                 return ret;
8242
8243 #ifdef CONFIG_TRACER_MAX_TRACE
8244         ret = allocate_trace_buffer(tr, &tr->max_buffer,
8245                                     allocate_snapshot ? size : 1);
8246         if (WARN_ON(ret)) {
8247                 ring_buffer_free(tr->trace_buffer.buffer);
8248                 tr->trace_buffer.buffer = NULL;
8249                 free_percpu(tr->trace_buffer.data);
8250                 tr->trace_buffer.data = NULL;
8251                 return -ENOMEM;
8252         }
8253         tr->allocated_snapshot = allocate_snapshot;
8254
8255         /*
8256          * Only the top level trace array gets its snapshot allocated
8257          * from the kernel command line.
8258          */
8259         allocate_snapshot = false;
8260 #endif
8261         return 0;
8262 }
8263
8264 static void free_trace_buffer(struct trace_buffer *buf)
8265 {
8266         if (buf->buffer) {
8267                 ring_buffer_free(buf->buffer);
8268                 buf->buffer = NULL;
8269                 free_percpu(buf->data);
8270                 buf->data = NULL;
8271         }
8272 }
8273
8274 static void free_trace_buffers(struct trace_array *tr)
8275 {
8276         if (!tr)
8277                 return;
8278
8279         free_trace_buffer(&tr->trace_buffer);
8280
8281 #ifdef CONFIG_TRACER_MAX_TRACE
8282         free_trace_buffer(&tr->max_buffer);
8283 #endif
8284 }
8285
8286 static void init_trace_flags_index(struct trace_array *tr)
8287 {
8288         int i;
8289
8290         /* Used by the trace options files */
8291         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
8292                 tr->trace_flags_index[i] = i;
8293 }
8294
8295 static void __update_tracer_options(struct trace_array *tr)
8296 {
8297         struct tracer *t;
8298
8299         for (t = trace_types; t; t = t->next)
8300                 add_tracer_options(tr, t);
8301 }
8302
8303 static void update_tracer_options(struct trace_array *tr)
8304 {
8305         mutex_lock(&trace_types_lock);
8306         __update_tracer_options(tr);
8307         mutex_unlock(&trace_types_lock);
8308 }
8309
8310 struct trace_array *trace_array_create(const char *name)
8311 {
8312         struct trace_array *tr;
8313         int ret;
8314
8315         mutex_lock(&event_mutex);
8316         mutex_lock(&trace_types_lock);
8317
8318         ret = -EEXIST;
8319         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8320                 if (tr->name && strcmp(tr->name, name) == 0)
8321                         goto out_unlock;
8322         }
8323
8324         ret = -ENOMEM;
8325         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
8326         if (!tr)
8327                 goto out_unlock;
8328
8329         tr->name = kstrdup(name, GFP_KERNEL);
8330         if (!tr->name)
8331                 goto out_free_tr;
8332
8333         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
8334                 goto out_free_tr;
8335
8336         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
8337
8338         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
8339
8340         raw_spin_lock_init(&tr->start_lock);
8341
8342         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8343
8344         tr->current_trace = &nop_trace;
8345
8346         INIT_LIST_HEAD(&tr->systems);
8347         INIT_LIST_HEAD(&tr->events);
8348         INIT_LIST_HEAD(&tr->hist_vars);
8349         INIT_LIST_HEAD(&tr->err_log);
8350
8351         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
8352                 goto out_free_tr;
8353
8354         tr->dir = tracefs_create_dir(name, trace_instance_dir);
8355         if (!tr->dir)
8356                 goto out_free_tr;
8357
8358         ret = event_trace_add_tracer(tr->dir, tr);
8359         if (ret) {
8360                 tracefs_remove_recursive(tr->dir);
8361                 goto out_free_tr;
8362         }
8363
8364         ftrace_init_trace_array(tr);
8365
8366         init_tracer_tracefs(tr, tr->dir);
8367         init_trace_flags_index(tr);
8368         __update_tracer_options(tr);
8369
8370         list_add(&tr->list, &ftrace_trace_arrays);
8371
8372         mutex_unlock(&trace_types_lock);
8373         mutex_unlock(&event_mutex);
8374
8375         return tr;
8376
8377  out_free_tr:
8378         free_trace_buffers(tr);
8379         free_cpumask_var(tr->tracing_cpumask);
8380         kfree(tr->name);
8381         kfree(tr);
8382
8383  out_unlock:
8384         mutex_unlock(&trace_types_lock);
8385         mutex_unlock(&event_mutex);
8386
8387         return ERR_PTR(ret);
8388 }
8389 EXPORT_SYMBOL_GPL(trace_array_create);
8390
8391 static int instance_mkdir(const char *name)
8392 {
8393         return PTR_ERR_OR_ZERO(trace_array_create(name));
8394 }
8395
8396 static int __remove_instance(struct trace_array *tr)
8397 {
8398         int i;
8399
8400         if (tr->ref || (tr->current_trace && tr->current_trace->ref))
8401                 return -EBUSY;
8402
8403         list_del(&tr->list);
8404
8405         /* Disable all the flags that were enabled coming in */
8406         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
8407                 if ((1 << i) & ZEROED_TRACE_FLAGS)
8408                         set_tracer_flag(tr, 1 << i, 0);
8409         }
8410
8411         tracing_set_nop(tr);
8412         clear_ftrace_function_probes(tr);
8413         event_trace_del_tracer(tr);
8414         ftrace_clear_pids(tr);
8415         ftrace_destroy_function_files(tr);
8416         tracefs_remove_recursive(tr->dir);
8417         free_trace_buffers(tr);
8418
8419         for (i = 0; i < tr->nr_topts; i++) {
8420                 kfree(tr->topts[i].topts);
8421         }
8422         kfree(tr->topts);
8423
8424         free_cpumask_var(tr->tracing_cpumask);
8425         kfree(tr->name);
8426         kfree(tr);
8427         tr = NULL;
8428
8429         return 0;
8430 }
8431
8432 int trace_array_destroy(struct trace_array *tr)
8433 {
8434         int ret;
8435
8436         if (!tr)
8437                 return -EINVAL;
8438
8439         mutex_lock(&event_mutex);
8440         mutex_lock(&trace_types_lock);
8441
8442         ret = __remove_instance(tr);
8443
8444         mutex_unlock(&trace_types_lock);
8445         mutex_unlock(&event_mutex);
8446
8447         return ret;
8448 }
8449 EXPORT_SYMBOL_GPL(trace_array_destroy);
8450
8451 static int instance_rmdir(const char *name)
8452 {
8453         struct trace_array *tr;
8454         int ret;
8455
8456         mutex_lock(&event_mutex);
8457         mutex_lock(&trace_types_lock);
8458
8459         ret = -ENODEV;
8460         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8461                 if (tr->name && strcmp(tr->name, name) == 0) {
8462                         ret = __remove_instance(tr);
8463                         break;
8464                 }
8465         }
8466
8467         mutex_unlock(&trace_types_lock);
8468         mutex_unlock(&event_mutex);
8469
8470         return ret;
8471 }
8472
8473 static __init void create_trace_instances(struct dentry *d_tracer)
8474 {
8475         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
8476                                                          instance_mkdir,
8477                                                          instance_rmdir);
8478         if (WARN_ON(!trace_instance_dir))
8479                 return;
8480 }
8481
8482 static void
8483 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
8484 {
8485         struct trace_event_file *file;
8486         int cpu;
8487
8488         trace_create_file("available_tracers", 0444, d_tracer,
8489                         tr, &show_traces_fops);
8490
8491         trace_create_file("current_tracer", 0644, d_tracer,
8492                         tr, &set_tracer_fops);
8493
8494         trace_create_file("tracing_cpumask", 0644, d_tracer,
8495                           tr, &tracing_cpumask_fops);
8496
8497         trace_create_file("trace_options", 0644, d_tracer,
8498                           tr, &tracing_iter_fops);
8499
8500         trace_create_file("trace", 0644, d_tracer,
8501                           tr, &tracing_fops);
8502
8503         trace_create_file("trace_pipe", 0444, d_tracer,
8504                           tr, &tracing_pipe_fops);
8505
8506         trace_create_file("buffer_size_kb", 0644, d_tracer,
8507                           tr, &tracing_entries_fops);
8508
8509         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
8510                           tr, &tracing_total_entries_fops);
8511
8512         trace_create_file("free_buffer", 0200, d_tracer,
8513                           tr, &tracing_free_buffer_fops);
8514
8515         trace_create_file("trace_marker", 0220, d_tracer,
8516                           tr, &tracing_mark_fops);
8517
8518         file = __find_event_file(tr, "ftrace", "print");
8519         if (file && file->dir)
8520                 trace_create_file("trigger", 0644, file->dir, file,
8521                                   &event_trigger_fops);
8522         tr->trace_marker_file = file;
8523
8524         trace_create_file("trace_marker_raw", 0220, d_tracer,
8525                           tr, &tracing_mark_raw_fops);
8526
8527         trace_create_file("trace_clock", 0644, d_tracer, tr,
8528                           &trace_clock_fops);
8529
8530         trace_create_file("tracing_on", 0644, d_tracer,
8531                           tr, &rb_simple_fops);
8532
8533         trace_create_file("timestamp_mode", 0444, d_tracer, tr,
8534                           &trace_time_stamp_mode_fops);
8535
8536         tr->buffer_percent = 50;
8537
8538         trace_create_file("buffer_percent", 0444, d_tracer,
8539                         tr, &buffer_percent_fops);
8540
8541         create_trace_options_dir(tr);
8542
8543 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
8544         trace_create_file("tracing_max_latency", 0644, d_tracer,
8545                         &tr->max_latency, &tracing_max_lat_fops);
8546 #endif
8547
8548         if (ftrace_create_function_files(tr, d_tracer))
8549                 WARN(1, "Could not allocate function filter files");
8550
8551 #ifdef CONFIG_TRACER_SNAPSHOT
8552         trace_create_file("snapshot", 0644, d_tracer,
8553                           tr, &snapshot_fops);
8554 #endif
8555
8556         trace_create_file("error_log", 0644, d_tracer,
8557                           tr, &tracing_err_log_fops);
8558
8559         for_each_tracing_cpu(cpu)
8560                 tracing_init_tracefs_percpu(tr, cpu);
8561
8562         ftrace_init_tracefs(tr, d_tracer);
8563 }
8564
8565 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
8566 {
8567         struct vfsmount *mnt;
8568         struct file_system_type *type;
8569
8570         /*
8571          * To maintain backward compatibility for tools that mount
8572          * debugfs to get to the tracing facility, tracefs is automatically
8573          * mounted to the debugfs/tracing directory.
8574          */
8575         type = get_fs_type("tracefs");
8576         if (!type)
8577                 return NULL;
8578         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
8579         put_filesystem(type);
8580         if (IS_ERR(mnt))
8581                 return NULL;
8582         mntget(mnt);
8583
8584         return mnt;
8585 }
8586
8587 /**
8588  * tracing_init_dentry - initialize top level trace array
8589  *
8590  * This is called when creating files or directories in the tracing
8591  * directory. It is called via fs_initcall() by any of the boot up code
8592  * and expects to return the dentry of the top level tracing directory.
8593  */
8594 struct dentry *tracing_init_dentry(void)
8595 {
8596         struct trace_array *tr = &global_trace;
8597
8598         /* The top level trace array uses  NULL as parent */
8599         if (tr->dir)
8600                 return NULL;
8601
8602         if (WARN_ON(!tracefs_initialized()) ||
8603                 (IS_ENABLED(CONFIG_DEBUG_FS) &&
8604                  WARN_ON(!debugfs_initialized())))
8605                 return ERR_PTR(-ENODEV);
8606
8607         /*
8608          * As there may still be users that expect the tracing
8609          * files to exist in debugfs/tracing, we must automount
8610          * the tracefs file system there, so older tools still
8611          * work with the newer kerenl.
8612          */
8613         tr->dir = debugfs_create_automount("tracing", NULL,
8614                                            trace_automount, NULL);
8615         if (!tr->dir) {
8616                 pr_warn_once("Could not create debugfs directory 'tracing'\n");
8617                 return ERR_PTR(-ENOMEM);
8618         }
8619
8620         return NULL;
8621 }
8622
8623 extern struct trace_eval_map *__start_ftrace_eval_maps[];
8624 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
8625
8626 static void __init trace_eval_init(void)
8627 {
8628         int len;
8629
8630         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
8631         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
8632 }
8633
8634 #ifdef CONFIG_MODULES
8635 static void trace_module_add_evals(struct module *mod)
8636 {
8637         if (!mod->num_trace_evals)
8638                 return;
8639
8640         /*
8641          * Modules with bad taint do not have events created, do
8642          * not bother with enums either.
8643          */
8644         if (trace_module_has_bad_taint(mod))
8645                 return;
8646
8647         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
8648 }
8649
8650 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
8651 static void trace_module_remove_evals(struct module *mod)
8652 {
8653         union trace_eval_map_item *map;
8654         union trace_eval_map_item **last = &trace_eval_maps;
8655
8656         if (!mod->num_trace_evals)
8657                 return;
8658
8659         mutex_lock(&trace_eval_mutex);
8660
8661         map = trace_eval_maps;
8662
8663         while (map) {
8664                 if (map->head.mod == mod)
8665                         break;
8666                 map = trace_eval_jmp_to_tail(map);
8667                 last = &map->tail.next;
8668                 map = map->tail.next;
8669         }
8670         if (!map)
8671                 goto out;
8672
8673         *last = trace_eval_jmp_to_tail(map)->tail.next;
8674         kfree(map);
8675  out:
8676         mutex_unlock(&trace_eval_mutex);
8677 }
8678 #else
8679 static inline void trace_module_remove_evals(struct module *mod) { }
8680 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
8681
8682 static int trace_module_notify(struct notifier_block *self,
8683                                unsigned long val, void *data)
8684 {
8685         struct module *mod = data;
8686
8687         switch (val) {
8688         case MODULE_STATE_COMING:
8689                 trace_module_add_evals(mod);
8690                 break;
8691         case MODULE_STATE_GOING:
8692                 trace_module_remove_evals(mod);
8693                 break;
8694         }
8695
8696         return 0;
8697 }
8698
8699 static struct notifier_block trace_module_nb = {
8700         .notifier_call = trace_module_notify,
8701         .priority = 0,
8702 };
8703 #endif /* CONFIG_MODULES */
8704
8705 static __init int tracer_init_tracefs(void)
8706 {
8707         struct dentry *d_tracer;
8708
8709         trace_access_lock_init();
8710
8711         d_tracer = tracing_init_dentry();
8712         if (IS_ERR(d_tracer))
8713                 return 0;
8714
8715         event_trace_init();
8716
8717         init_tracer_tracefs(&global_trace, d_tracer);
8718         ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
8719
8720         trace_create_file("tracing_thresh", 0644, d_tracer,
8721                         &global_trace, &tracing_thresh_fops);
8722
8723         trace_create_file("README", 0444, d_tracer,
8724                         NULL, &tracing_readme_fops);
8725
8726         trace_create_file("saved_cmdlines", 0444, d_tracer,
8727                         NULL, &tracing_saved_cmdlines_fops);
8728
8729         trace_create_file("saved_cmdlines_size", 0644, d_tracer,
8730                           NULL, &tracing_saved_cmdlines_size_fops);
8731
8732         trace_create_file("saved_tgids", 0444, d_tracer,
8733                         NULL, &tracing_saved_tgids_fops);
8734
8735         trace_eval_init();
8736
8737         trace_create_eval_file(d_tracer);
8738
8739 #ifdef CONFIG_MODULES
8740         register_module_notifier(&trace_module_nb);
8741 #endif
8742
8743 #ifdef CONFIG_DYNAMIC_FTRACE
8744         trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
8745                         &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
8746 #endif
8747
8748         create_trace_instances(d_tracer);
8749
8750         update_tracer_options(&global_trace);
8751
8752         return 0;
8753 }
8754
8755 static int trace_panic_handler(struct notifier_block *this,
8756                                unsigned long event, void *unused)
8757 {
8758         if (ftrace_dump_on_oops)
8759                 ftrace_dump(ftrace_dump_on_oops);
8760         return NOTIFY_OK;
8761 }
8762
8763 static struct notifier_block trace_panic_notifier = {
8764         .notifier_call  = trace_panic_handler,
8765         .next           = NULL,
8766         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
8767 };
8768
8769 static int trace_die_handler(struct notifier_block *self,
8770                              unsigned long val,
8771                              void *data)
8772 {
8773         switch (val) {
8774         case DIE_OOPS:
8775                 if (ftrace_dump_on_oops)
8776                         ftrace_dump(ftrace_dump_on_oops);
8777                 break;
8778         default:
8779                 break;
8780         }
8781         return NOTIFY_OK;
8782 }
8783
8784 static struct notifier_block trace_die_notifier = {
8785         .notifier_call = trace_die_handler,
8786         .priority = 200
8787 };
8788
8789 /*
8790  * printk is set to max of 1024, we really don't need it that big.
8791  * Nothing should be printing 1000 characters anyway.
8792  */
8793 #define TRACE_MAX_PRINT         1000
8794
8795 /*
8796  * Define here KERN_TRACE so that we have one place to modify
8797  * it if we decide to change what log level the ftrace dump
8798  * should be at.
8799  */
8800 #define KERN_TRACE              KERN_EMERG
8801
8802 void
8803 trace_printk_seq(struct trace_seq *s)
8804 {
8805         /* Probably should print a warning here. */
8806         if (s->seq.len >= TRACE_MAX_PRINT)
8807                 s->seq.len = TRACE_MAX_PRINT;
8808
8809         /*
8810          * More paranoid code. Although the buffer size is set to
8811          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
8812          * an extra layer of protection.
8813          */
8814         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
8815                 s->seq.len = s->seq.size - 1;
8816
8817         /* should be zero ended, but we are paranoid. */
8818         s->buffer[s->seq.len] = 0;
8819
8820         printk(KERN_TRACE "%s", s->buffer);
8821
8822         trace_seq_init(s);
8823 }
8824
8825 void trace_init_global_iter(struct trace_iterator *iter)
8826 {
8827         iter->tr = &global_trace;
8828         iter->trace = iter->tr->current_trace;
8829         iter->cpu_file = RING_BUFFER_ALL_CPUS;
8830         iter->trace_buffer = &global_trace.trace_buffer;
8831
8832         if (iter->trace && iter->trace->open)
8833                 iter->trace->open(iter);
8834
8835         /* Annotate start of buffers if we had overruns */
8836         if (ring_buffer_overruns(iter->trace_buffer->buffer))
8837                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
8838
8839         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
8840         if (trace_clocks[iter->tr->clock_id].in_ns)
8841                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
8842 }
8843
8844 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
8845 {
8846         /* use static because iter can be a bit big for the stack */
8847         static struct trace_iterator iter;
8848         static atomic_t dump_running;
8849         struct trace_array *tr = &global_trace;
8850         unsigned int old_userobj;
8851         unsigned long flags;
8852         int cnt = 0, cpu;
8853
8854         /* Only allow one dump user at a time. */
8855         if (atomic_inc_return(&dump_running) != 1) {
8856                 atomic_dec(&dump_running);
8857                 return;
8858         }
8859
8860         /*
8861          * Always turn off tracing when we dump.
8862          * We don't need to show trace output of what happens
8863          * between multiple crashes.
8864          *
8865          * If the user does a sysrq-z, then they can re-enable
8866          * tracing with echo 1 > tracing_on.
8867          */
8868         tracing_off();
8869
8870         local_irq_save(flags);
8871         printk_nmi_direct_enter();
8872
8873         /* Simulate the iterator */
8874         trace_init_global_iter(&iter);
8875
8876         for_each_tracing_cpu(cpu) {
8877                 atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8878         }
8879
8880         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
8881
8882         /* don't look at user memory in panic mode */
8883         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
8884
8885         switch (oops_dump_mode) {
8886         case DUMP_ALL:
8887                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
8888                 break;
8889         case DUMP_ORIG:
8890                 iter.cpu_file = raw_smp_processor_id();
8891                 break;
8892         case DUMP_NONE:
8893                 goto out_enable;
8894         default:
8895                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
8896                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
8897         }
8898
8899         printk(KERN_TRACE "Dumping ftrace buffer:\n");
8900
8901         /* Did function tracer already get disabled? */
8902         if (ftrace_is_dead()) {
8903                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
8904                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
8905         }
8906
8907         /*
8908          * We need to stop all tracing on all CPUS to read the
8909          * the next buffer. This is a bit expensive, but is
8910          * not done often. We fill all what we can read,
8911          * and then release the locks again.
8912          */
8913
8914         while (!trace_empty(&iter)) {
8915
8916                 if (!cnt)
8917                         printk(KERN_TRACE "---------------------------------\n");
8918
8919                 cnt++;
8920
8921                 /* reset all but tr, trace, and overruns */
8922                 memset(&iter.seq, 0,
8923                        sizeof(struct trace_iterator) -
8924                        offsetof(struct trace_iterator, seq));
8925                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
8926                 iter.pos = -1;
8927
8928                 if (trace_find_next_entry_inc(&iter) != NULL) {
8929                         int ret;
8930
8931                         ret = print_trace_line(&iter);
8932                         if (ret != TRACE_TYPE_NO_CONSUME)
8933                                 trace_consume(&iter);
8934                 }
8935                 touch_nmi_watchdog();
8936
8937                 trace_printk_seq(&iter.seq);
8938         }
8939
8940         if (!cnt)
8941                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
8942         else
8943                 printk(KERN_TRACE "---------------------------------\n");
8944
8945  out_enable:
8946         tr->trace_flags |= old_userobj;
8947
8948         for_each_tracing_cpu(cpu) {
8949                 atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8950         }
8951         atomic_dec(&dump_running);
8952         printk_nmi_direct_exit();
8953         local_irq_restore(flags);
8954 }
8955 EXPORT_SYMBOL_GPL(ftrace_dump);
8956
8957 int trace_run_command(const char *buf, int (*createfn)(int, char **))
8958 {
8959         char **argv;
8960         int argc, ret;
8961
8962         argc = 0;
8963         ret = 0;
8964         argv = argv_split(GFP_KERNEL, buf, &argc);
8965         if (!argv)
8966                 return -ENOMEM;
8967
8968         if (argc)
8969                 ret = createfn(argc, argv);
8970
8971         argv_free(argv);
8972
8973         return ret;
8974 }
8975
8976 #define WRITE_BUFSIZE  4096
8977
8978 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
8979                                 size_t count, loff_t *ppos,
8980                                 int (*createfn)(int, char **))
8981 {
8982         char *kbuf, *buf, *tmp;
8983         int ret = 0;
8984         size_t done = 0;
8985         size_t size;
8986
8987         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
8988         if (!kbuf)
8989                 return -ENOMEM;
8990
8991         while (done < count) {
8992                 size = count - done;
8993
8994                 if (size >= WRITE_BUFSIZE)
8995                         size = WRITE_BUFSIZE - 1;
8996
8997                 if (copy_from_user(kbuf, buffer + done, size)) {
8998                         ret = -EFAULT;
8999                         goto out;
9000                 }
9001                 kbuf[size] = '\0';
9002                 buf = kbuf;
9003                 do {
9004                         tmp = strchr(buf, '\n');
9005                         if (tmp) {
9006                                 *tmp = '\0';
9007                                 size = tmp - buf + 1;
9008                         } else {
9009                                 size = strlen(buf);
9010                                 if (done + size < count) {
9011                                         if (buf != kbuf)
9012                                                 break;
9013                                         /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
9014                                         pr_warn("Line length is too long: Should be less than %d\n",
9015                                                 WRITE_BUFSIZE - 2);
9016                                         ret = -EINVAL;
9017                                         goto out;
9018                                 }
9019                         }
9020                         done += size;
9021
9022                         /* Remove comments */
9023                         tmp = strchr(buf, '#');
9024
9025                         if (tmp)
9026                                 *tmp = '\0';
9027
9028                         ret = trace_run_command(buf, createfn);
9029                         if (ret)
9030                                 goto out;
9031                         buf += size;
9032
9033                 } while (done < count);
9034         }
9035         ret = done;
9036
9037 out:
9038         kfree(kbuf);
9039
9040         return ret;
9041 }
9042
9043 __init static int tracer_alloc_buffers(void)
9044 {
9045         int ring_buf_size;
9046         int ret = -ENOMEM;
9047
9048         /*
9049          * Make sure we don't accidently add more trace options
9050          * than we have bits for.
9051          */
9052         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
9053
9054         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
9055                 goto out;
9056
9057         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
9058                 goto out_free_buffer_mask;
9059
9060         /* Only allocate trace_printk buffers if a trace_printk exists */
9061         if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
9062                 /* Must be called before global_trace.buffer is allocated */
9063                 trace_printk_init_buffers();
9064
9065         /* To save memory, keep the ring buffer size to its minimum */
9066         if (ring_buffer_expanded)
9067                 ring_buf_size = trace_buf_size;
9068         else
9069                 ring_buf_size = 1;
9070
9071         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
9072         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
9073
9074         raw_spin_lock_init(&global_trace.start_lock);
9075
9076         /*
9077          * The prepare callbacks allocates some memory for the ring buffer. We
9078          * don't free the buffer if the if the CPU goes down. If we were to free
9079          * the buffer, then the user would lose any trace that was in the
9080          * buffer. The memory will be removed once the "instance" is removed.
9081          */
9082         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
9083                                       "trace/RB:preapre", trace_rb_cpu_prepare,
9084                                       NULL);
9085         if (ret < 0)
9086                 goto out_free_cpumask;
9087         /* Used for event triggers */
9088         ret = -ENOMEM;
9089         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
9090         if (!temp_buffer)
9091                 goto out_rm_hp_state;
9092
9093         if (trace_create_savedcmd() < 0)
9094                 goto out_free_temp_buffer;
9095
9096         /* TODO: make the number of buffers hot pluggable with CPUS */
9097         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
9098                 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
9099                 WARN_ON(1);
9100                 goto out_free_savedcmd;
9101         }
9102
9103         if (global_trace.buffer_disabled)
9104                 tracing_off();
9105
9106         if (trace_boot_clock) {
9107                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
9108                 if (ret < 0)
9109                         pr_warn("Trace clock %s not defined, going back to default\n",
9110                                 trace_boot_clock);
9111         }
9112
9113         /*
9114          * register_tracer() might reference current_trace, so it
9115          * needs to be set before we register anything. This is
9116          * just a bootstrap of current_trace anyway.
9117          */
9118         global_trace.current_trace = &nop_trace;
9119
9120         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9121
9122         ftrace_init_global_array_ops(&global_trace);
9123
9124         init_trace_flags_index(&global_trace);
9125
9126         register_tracer(&nop_trace);
9127
9128         /* Function tracing may start here (via kernel command line) */
9129         init_function_trace();
9130
9131         /* All seems OK, enable tracing */
9132         tracing_disabled = 0;
9133
9134         atomic_notifier_chain_register(&panic_notifier_list,
9135                                        &trace_panic_notifier);
9136
9137         register_die_notifier(&trace_die_notifier);
9138
9139         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
9140
9141         INIT_LIST_HEAD(&global_trace.systems);
9142         INIT_LIST_HEAD(&global_trace.events);
9143         INIT_LIST_HEAD(&global_trace.hist_vars);
9144         INIT_LIST_HEAD(&global_trace.err_log);
9145         list_add(&global_trace.list, &ftrace_trace_arrays);
9146
9147         apply_trace_boot_options();
9148
9149         register_snapshot_cmd();
9150
9151         return 0;
9152
9153 out_free_savedcmd:
9154         free_saved_cmdlines_buffer(savedcmd);
9155 out_free_temp_buffer:
9156         ring_buffer_free(temp_buffer);
9157 out_rm_hp_state:
9158         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
9159 out_free_cpumask:
9160         free_cpumask_var(global_trace.tracing_cpumask);
9161 out_free_buffer_mask:
9162         free_cpumask_var(tracing_buffer_mask);
9163 out:
9164         return ret;
9165 }
9166
9167 void __init early_trace_init(void)
9168 {
9169         if (tracepoint_printk) {
9170                 tracepoint_print_iter =
9171                         kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
9172                 if (WARN_ON(!tracepoint_print_iter))
9173                         tracepoint_printk = 0;
9174                 else
9175                         static_key_enable(&tracepoint_printk_key.key);
9176         }
9177         tracer_alloc_buffers();
9178 }
9179
9180 void __init trace_init(void)
9181 {
9182         trace_event_init();
9183 }
9184
9185 __init static int clear_boot_tracer(void)
9186 {
9187         /*
9188          * The default tracer at boot buffer is an init section.
9189          * This function is called in lateinit. If we did not
9190          * find the boot tracer, then clear it out, to prevent
9191          * later registration from accessing the buffer that is
9192          * about to be freed.
9193          */
9194         if (!default_bootup_tracer)
9195                 return 0;
9196
9197         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
9198                default_bootup_tracer);
9199         default_bootup_tracer = NULL;
9200
9201         return 0;
9202 }
9203
9204 fs_initcall(tracer_init_tracefs);
9205 late_initcall_sync(clear_boot_tracer);
9206
9207 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
9208 __init static int tracing_set_default_clock(void)
9209 {
9210         /* sched_clock_stable() is determined in late_initcall */
9211         if (!trace_boot_clock && !sched_clock_stable()) {
9212                 printk(KERN_WARNING
9213                        "Unstable clock detected, switching default tracing clock to \"global\"\n"
9214                        "If you want to keep using the local clock, then add:\n"
9215                        "  \"trace_clock=local\"\n"
9216                        "on the kernel command line\n");
9217                 tracing_set_clock(&global_trace, "global");
9218         }
9219
9220         return 0;
9221 }
9222 late_initcall_sync(tracing_set_default_clock);
9223 #endif