]> asedeno.scripts.mit.edu Git - linux.git/blob - kernel/trace/trace.c
Fix up for "printk: Drop pr_warning definition"
[linux.git] / kernel / trace / trace.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/poll.h>
43 #include <linux/nmi.h>
44 #include <linux/fs.h>
45 #include <linux/trace.h>
46 #include <linux/sched/clock.h>
47 #include <linux/sched/rt.h>
48 #include <linux/fsnotify.h>
49 #include <linux/irq_work.h>
50 #include <linux/workqueue.h>
51
52 #include "trace.h"
53 #include "trace_output.h"
54
55 /*
56  * On boot up, the ring buffer is set to the minimum size, so that
57  * we do not waste memory on systems that are not using tracing.
58  */
59 bool ring_buffer_expanded;
60
61 /*
62  * We need to change this state when a selftest is running.
63  * A selftest will lurk into the ring-buffer to count the
64  * entries inserted during the selftest although some concurrent
65  * insertions into the ring-buffer such as trace_printk could occurred
66  * at the same time, giving false positive or negative results.
67  */
68 static bool __read_mostly tracing_selftest_running;
69
70 /*
71  * If a tracer is running, we do not want to run SELFTEST.
72  */
73 bool __read_mostly tracing_selftest_disabled;
74
75 /* Pipe tracepoints to printk */
76 struct trace_iterator *tracepoint_print_iter;
77 int tracepoint_printk;
78 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
79
80 /* For tracers that don't implement custom flags */
81 static struct tracer_opt dummy_tracer_opt[] = {
82         { }
83 };
84
85 static int
86 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
87 {
88         return 0;
89 }
90
91 /*
92  * To prevent the comm cache from being overwritten when no
93  * tracing is active, only save the comm when a trace event
94  * occurred.
95  */
96 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
97
98 /*
99  * Kill all tracing for good (never come back).
100  * It is initialized to 1 but will turn to zero if the initialization
101  * of the tracer is successful. But that is the only place that sets
102  * this back to zero.
103  */
104 static int tracing_disabled = 1;
105
106 cpumask_var_t __read_mostly     tracing_buffer_mask;
107
108 /*
109  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
110  *
111  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
112  * is set, then ftrace_dump is called. This will output the contents
113  * of the ftrace buffers to the console.  This is very useful for
114  * capturing traces that lead to crashes and outputing it to a
115  * serial console.
116  *
117  * It is default off, but you can enable it with either specifying
118  * "ftrace_dump_on_oops" in the kernel command line, or setting
119  * /proc/sys/kernel/ftrace_dump_on_oops
120  * Set 1 if you want to dump buffers of all CPUs
121  * Set 2 if you want to dump the buffer of the CPU that triggered oops
122  */
123
124 enum ftrace_dump_mode ftrace_dump_on_oops;
125
126 /* When set, tracing will stop when a WARN*() is hit */
127 int __disable_trace_on_warning;
128
129 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
130 /* Map of enums to their values, for "eval_map" file */
131 struct trace_eval_map_head {
132         struct module                   *mod;
133         unsigned long                   length;
134 };
135
136 union trace_eval_map_item;
137
138 struct trace_eval_map_tail {
139         /*
140          * "end" is first and points to NULL as it must be different
141          * than "mod" or "eval_string"
142          */
143         union trace_eval_map_item       *next;
144         const char                      *end;   /* points to NULL */
145 };
146
147 static DEFINE_MUTEX(trace_eval_mutex);
148
149 /*
150  * The trace_eval_maps are saved in an array with two extra elements,
151  * one at the beginning, and one at the end. The beginning item contains
152  * the count of the saved maps (head.length), and the module they
153  * belong to if not built in (head.mod). The ending item contains a
154  * pointer to the next array of saved eval_map items.
155  */
156 union trace_eval_map_item {
157         struct trace_eval_map           map;
158         struct trace_eval_map_head      head;
159         struct trace_eval_map_tail      tail;
160 };
161
162 static union trace_eval_map_item *trace_eval_maps;
163 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
164
165 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
166 static void ftrace_trace_userstack(struct ring_buffer *buffer,
167                                    unsigned long flags, int pc);
168
169 #define MAX_TRACER_SIZE         100
170 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
171 static char *default_bootup_tracer;
172
173 static bool allocate_snapshot;
174
175 static int __init set_cmdline_ftrace(char *str)
176 {
177         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
178         default_bootup_tracer = bootup_tracer_buf;
179         /* We are using ftrace early, expand it */
180         ring_buffer_expanded = true;
181         return 1;
182 }
183 __setup("ftrace=", set_cmdline_ftrace);
184
185 static int __init set_ftrace_dump_on_oops(char *str)
186 {
187         if (*str++ != '=' || !*str) {
188                 ftrace_dump_on_oops = DUMP_ALL;
189                 return 1;
190         }
191
192         if (!strcmp("orig_cpu", str)) {
193                 ftrace_dump_on_oops = DUMP_ORIG;
194                 return 1;
195         }
196
197         return 0;
198 }
199 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
200
201 static int __init stop_trace_on_warning(char *str)
202 {
203         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
204                 __disable_trace_on_warning = 1;
205         return 1;
206 }
207 __setup("traceoff_on_warning", stop_trace_on_warning);
208
209 static int __init boot_alloc_snapshot(char *str)
210 {
211         allocate_snapshot = true;
212         /* We also need the main ring buffer expanded */
213         ring_buffer_expanded = true;
214         return 1;
215 }
216 __setup("alloc_snapshot", boot_alloc_snapshot);
217
218
219 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
220
221 static int __init set_trace_boot_options(char *str)
222 {
223         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
224         return 0;
225 }
226 __setup("trace_options=", set_trace_boot_options);
227
228 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
229 static char *trace_boot_clock __initdata;
230
231 static int __init set_trace_boot_clock(char *str)
232 {
233         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
234         trace_boot_clock = trace_boot_clock_buf;
235         return 0;
236 }
237 __setup("trace_clock=", set_trace_boot_clock);
238
239 static int __init set_tracepoint_printk(char *str)
240 {
241         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
242                 tracepoint_printk = 1;
243         return 1;
244 }
245 __setup("tp_printk", set_tracepoint_printk);
246
247 unsigned long long ns2usecs(u64 nsec)
248 {
249         nsec += 500;
250         do_div(nsec, 1000);
251         return nsec;
252 }
253
254 /* trace_flags holds trace_options default values */
255 #define TRACE_DEFAULT_FLAGS                                             \
256         (FUNCTION_DEFAULT_FLAGS |                                       \
257          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
258          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
259          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
260          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
261
262 /* trace_options that are only supported by global_trace */
263 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
264                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
265
266 /* trace_flags that are default zero for instances */
267 #define ZEROED_TRACE_FLAGS \
268         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
269
270 /*
271  * The global_trace is the descriptor that holds the top-level tracing
272  * buffers for the live tracing.
273  */
274 static struct trace_array global_trace = {
275         .trace_flags = TRACE_DEFAULT_FLAGS,
276 };
277
278 LIST_HEAD(ftrace_trace_arrays);
279
280 int trace_array_get(struct trace_array *this_tr)
281 {
282         struct trace_array *tr;
283         int ret = -ENODEV;
284
285         mutex_lock(&trace_types_lock);
286         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
287                 if (tr == this_tr) {
288                         tr->ref++;
289                         ret = 0;
290                         break;
291                 }
292         }
293         mutex_unlock(&trace_types_lock);
294
295         return ret;
296 }
297
298 static void __trace_array_put(struct trace_array *this_tr)
299 {
300         WARN_ON(!this_tr->ref);
301         this_tr->ref--;
302 }
303
304 /**
305  * trace_array_put - Decrement the reference counter for this trace array.
306  *
307  * NOTE: Use this when we no longer need the trace array returned by
308  * trace_array_get_by_name(). This ensures the trace array can be later
309  * destroyed.
310  *
311  */
312 void trace_array_put(struct trace_array *this_tr)
313 {
314         if (!this_tr)
315                 return;
316
317         mutex_lock(&trace_types_lock);
318         __trace_array_put(this_tr);
319         mutex_unlock(&trace_types_lock);
320 }
321 EXPORT_SYMBOL_GPL(trace_array_put);
322
323 int tracing_check_open_get_tr(struct trace_array *tr)
324 {
325         int ret;
326
327         ret = security_locked_down(LOCKDOWN_TRACEFS);
328         if (ret)
329                 return ret;
330
331         if (tracing_disabled)
332                 return -ENODEV;
333
334         if (tr && trace_array_get(tr) < 0)
335                 return -ENODEV;
336
337         return 0;
338 }
339
340 int call_filter_check_discard(struct trace_event_call *call, void *rec,
341                               struct ring_buffer *buffer,
342                               struct ring_buffer_event *event)
343 {
344         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
345             !filter_match_preds(call->filter, rec)) {
346                 __trace_event_discard_commit(buffer, event);
347                 return 1;
348         }
349
350         return 0;
351 }
352
353 void trace_free_pid_list(struct trace_pid_list *pid_list)
354 {
355         vfree(pid_list->pids);
356         kfree(pid_list);
357 }
358
359 /**
360  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
361  * @filtered_pids: The list of pids to check
362  * @search_pid: The PID to find in @filtered_pids
363  *
364  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
365  */
366 bool
367 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
368 {
369         /*
370          * If pid_max changed after filtered_pids was created, we
371          * by default ignore all pids greater than the previous pid_max.
372          */
373         if (search_pid >= filtered_pids->pid_max)
374                 return false;
375
376         return test_bit(search_pid, filtered_pids->pids);
377 }
378
379 /**
380  * trace_ignore_this_task - should a task be ignored for tracing
381  * @filtered_pids: The list of pids to check
382  * @task: The task that should be ignored if not filtered
383  *
384  * Checks if @task should be traced or not from @filtered_pids.
385  * Returns true if @task should *NOT* be traced.
386  * Returns false if @task should be traced.
387  */
388 bool
389 trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
390 {
391         /*
392          * Return false, because if filtered_pids does not exist,
393          * all pids are good to trace.
394          */
395         if (!filtered_pids)
396                 return false;
397
398         return !trace_find_filtered_pid(filtered_pids, task->pid);
399 }
400
401 /**
402  * trace_filter_add_remove_task - Add or remove a task from a pid_list
403  * @pid_list: The list to modify
404  * @self: The current task for fork or NULL for exit
405  * @task: The task to add or remove
406  *
407  * If adding a task, if @self is defined, the task is only added if @self
408  * is also included in @pid_list. This happens on fork and tasks should
409  * only be added when the parent is listed. If @self is NULL, then the
410  * @task pid will be removed from the list, which would happen on exit
411  * of a task.
412  */
413 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
414                                   struct task_struct *self,
415                                   struct task_struct *task)
416 {
417         if (!pid_list)
418                 return;
419
420         /* For forks, we only add if the forking task is listed */
421         if (self) {
422                 if (!trace_find_filtered_pid(pid_list, self->pid))
423                         return;
424         }
425
426         /* Sorry, but we don't support pid_max changing after setting */
427         if (task->pid >= pid_list->pid_max)
428                 return;
429
430         /* "self" is set for forks, and NULL for exits */
431         if (self)
432                 set_bit(task->pid, pid_list->pids);
433         else
434                 clear_bit(task->pid, pid_list->pids);
435 }
436
437 /**
438  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
439  * @pid_list: The pid list to show
440  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
441  * @pos: The position of the file
442  *
443  * This is used by the seq_file "next" operation to iterate the pids
444  * listed in a trace_pid_list structure.
445  *
446  * Returns the pid+1 as we want to display pid of zero, but NULL would
447  * stop the iteration.
448  */
449 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
450 {
451         unsigned long pid = (unsigned long)v;
452
453         (*pos)++;
454
455         /* pid already is +1 of the actual prevous bit */
456         pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
457
458         /* Return pid + 1 to allow zero to be represented */
459         if (pid < pid_list->pid_max)
460                 return (void *)(pid + 1);
461
462         return NULL;
463 }
464
465 /**
466  * trace_pid_start - Used for seq_file to start reading pid lists
467  * @pid_list: The pid list to show
468  * @pos: The position of the file
469  *
470  * This is used by seq_file "start" operation to start the iteration
471  * of listing pids.
472  *
473  * Returns the pid+1 as we want to display pid of zero, but NULL would
474  * stop the iteration.
475  */
476 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
477 {
478         unsigned long pid;
479         loff_t l = 0;
480
481         pid = find_first_bit(pid_list->pids, pid_list->pid_max);
482         if (pid >= pid_list->pid_max)
483                 return NULL;
484
485         /* Return pid + 1 so that zero can be the exit value */
486         for (pid++; pid && l < *pos;
487              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
488                 ;
489         return (void *)pid;
490 }
491
492 /**
493  * trace_pid_show - show the current pid in seq_file processing
494  * @m: The seq_file structure to write into
495  * @v: A void pointer of the pid (+1) value to display
496  *
497  * Can be directly used by seq_file operations to display the current
498  * pid value.
499  */
500 int trace_pid_show(struct seq_file *m, void *v)
501 {
502         unsigned long pid = (unsigned long)v - 1;
503
504         seq_printf(m, "%lu\n", pid);
505         return 0;
506 }
507
508 /* 128 should be much more than enough */
509 #define PID_BUF_SIZE            127
510
511 int trace_pid_write(struct trace_pid_list *filtered_pids,
512                     struct trace_pid_list **new_pid_list,
513                     const char __user *ubuf, size_t cnt)
514 {
515         struct trace_pid_list *pid_list;
516         struct trace_parser parser;
517         unsigned long val;
518         int nr_pids = 0;
519         ssize_t read = 0;
520         ssize_t ret = 0;
521         loff_t pos;
522         pid_t pid;
523
524         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
525                 return -ENOMEM;
526
527         /*
528          * Always recreate a new array. The write is an all or nothing
529          * operation. Always create a new array when adding new pids by
530          * the user. If the operation fails, then the current list is
531          * not modified.
532          */
533         pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
534         if (!pid_list) {
535                 trace_parser_put(&parser);
536                 return -ENOMEM;
537         }
538
539         pid_list->pid_max = READ_ONCE(pid_max);
540
541         /* Only truncating will shrink pid_max */
542         if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
543                 pid_list->pid_max = filtered_pids->pid_max;
544
545         pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
546         if (!pid_list->pids) {
547                 trace_parser_put(&parser);
548                 kfree(pid_list);
549                 return -ENOMEM;
550         }
551
552         if (filtered_pids) {
553                 /* copy the current bits to the new max */
554                 for_each_set_bit(pid, filtered_pids->pids,
555                                  filtered_pids->pid_max) {
556                         set_bit(pid, pid_list->pids);
557                         nr_pids++;
558                 }
559         }
560
561         while (cnt > 0) {
562
563                 pos = 0;
564
565                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
566                 if (ret < 0 || !trace_parser_loaded(&parser))
567                         break;
568
569                 read += ret;
570                 ubuf += ret;
571                 cnt -= ret;
572
573                 ret = -EINVAL;
574                 if (kstrtoul(parser.buffer, 0, &val))
575                         break;
576                 if (val >= pid_list->pid_max)
577                         break;
578
579                 pid = (pid_t)val;
580
581                 set_bit(pid, pid_list->pids);
582                 nr_pids++;
583
584                 trace_parser_clear(&parser);
585                 ret = 0;
586         }
587         trace_parser_put(&parser);
588
589         if (ret < 0) {
590                 trace_free_pid_list(pid_list);
591                 return ret;
592         }
593
594         if (!nr_pids) {
595                 /* Cleared the list of pids */
596                 trace_free_pid_list(pid_list);
597                 read = ret;
598                 pid_list = NULL;
599         }
600
601         *new_pid_list = pid_list;
602
603         return read;
604 }
605
606 static u64 buffer_ftrace_now(struct trace_buffer *buf, int cpu)
607 {
608         u64 ts;
609
610         /* Early boot up does not have a buffer yet */
611         if (!buf->buffer)
612                 return trace_clock_local();
613
614         ts = ring_buffer_time_stamp(buf->buffer, cpu);
615         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
616
617         return ts;
618 }
619
620 u64 ftrace_now(int cpu)
621 {
622         return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
623 }
624
625 /**
626  * tracing_is_enabled - Show if global_trace has been disabled
627  *
628  * Shows if the global trace has been enabled or not. It uses the
629  * mirror flag "buffer_disabled" to be used in fast paths such as for
630  * the irqsoff tracer. But it may be inaccurate due to races. If you
631  * need to know the accurate state, use tracing_is_on() which is a little
632  * slower, but accurate.
633  */
634 int tracing_is_enabled(void)
635 {
636         /*
637          * For quick access (irqsoff uses this in fast path), just
638          * return the mirror variable of the state of the ring buffer.
639          * It's a little racy, but we don't really care.
640          */
641         smp_rmb();
642         return !global_trace.buffer_disabled;
643 }
644
645 /*
646  * trace_buf_size is the size in bytes that is allocated
647  * for a buffer. Note, the number of bytes is always rounded
648  * to page size.
649  *
650  * This number is purposely set to a low number of 16384.
651  * If the dump on oops happens, it will be much appreciated
652  * to not have to wait for all that output. Anyway this can be
653  * boot time and run time configurable.
654  */
655 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
656
657 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
658
659 /* trace_types holds a link list of available tracers. */
660 static struct tracer            *trace_types __read_mostly;
661
662 /*
663  * trace_types_lock is used to protect the trace_types list.
664  */
665 DEFINE_MUTEX(trace_types_lock);
666
667 /*
668  * serialize the access of the ring buffer
669  *
670  * ring buffer serializes readers, but it is low level protection.
671  * The validity of the events (which returns by ring_buffer_peek() ..etc)
672  * are not protected by ring buffer.
673  *
674  * The content of events may become garbage if we allow other process consumes
675  * these events concurrently:
676  *   A) the page of the consumed events may become a normal page
677  *      (not reader page) in ring buffer, and this page will be rewrited
678  *      by events producer.
679  *   B) The page of the consumed events may become a page for splice_read,
680  *      and this page will be returned to system.
681  *
682  * These primitives allow multi process access to different cpu ring buffer
683  * concurrently.
684  *
685  * These primitives don't distinguish read-only and read-consume access.
686  * Multi read-only access are also serialized.
687  */
688
689 #ifdef CONFIG_SMP
690 static DECLARE_RWSEM(all_cpu_access_lock);
691 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
692
693 static inline void trace_access_lock(int cpu)
694 {
695         if (cpu == RING_BUFFER_ALL_CPUS) {
696                 /* gain it for accessing the whole ring buffer. */
697                 down_write(&all_cpu_access_lock);
698         } else {
699                 /* gain it for accessing a cpu ring buffer. */
700
701                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
702                 down_read(&all_cpu_access_lock);
703
704                 /* Secondly block other access to this @cpu ring buffer. */
705                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
706         }
707 }
708
709 static inline void trace_access_unlock(int cpu)
710 {
711         if (cpu == RING_BUFFER_ALL_CPUS) {
712                 up_write(&all_cpu_access_lock);
713         } else {
714                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
715                 up_read(&all_cpu_access_lock);
716         }
717 }
718
719 static inline void trace_access_lock_init(void)
720 {
721         int cpu;
722
723         for_each_possible_cpu(cpu)
724                 mutex_init(&per_cpu(cpu_access_lock, cpu));
725 }
726
727 #else
728
729 static DEFINE_MUTEX(access_lock);
730
731 static inline void trace_access_lock(int cpu)
732 {
733         (void)cpu;
734         mutex_lock(&access_lock);
735 }
736
737 static inline void trace_access_unlock(int cpu)
738 {
739         (void)cpu;
740         mutex_unlock(&access_lock);
741 }
742
743 static inline void trace_access_lock_init(void)
744 {
745 }
746
747 #endif
748
749 #ifdef CONFIG_STACKTRACE
750 static void __ftrace_trace_stack(struct ring_buffer *buffer,
751                                  unsigned long flags,
752                                  int skip, int pc, struct pt_regs *regs);
753 static inline void ftrace_trace_stack(struct trace_array *tr,
754                                       struct ring_buffer *buffer,
755                                       unsigned long flags,
756                                       int skip, int pc, struct pt_regs *regs);
757
758 #else
759 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
760                                         unsigned long flags,
761                                         int skip, int pc, struct pt_regs *regs)
762 {
763 }
764 static inline void ftrace_trace_stack(struct trace_array *tr,
765                                       struct ring_buffer *buffer,
766                                       unsigned long flags,
767                                       int skip, int pc, struct pt_regs *regs)
768 {
769 }
770
771 #endif
772
773 static __always_inline void
774 trace_event_setup(struct ring_buffer_event *event,
775                   int type, unsigned long flags, int pc)
776 {
777         struct trace_entry *ent = ring_buffer_event_data(event);
778
779         tracing_generic_entry_update(ent, type, flags, pc);
780 }
781
782 static __always_inline struct ring_buffer_event *
783 __trace_buffer_lock_reserve(struct ring_buffer *buffer,
784                           int type,
785                           unsigned long len,
786                           unsigned long flags, int pc)
787 {
788         struct ring_buffer_event *event;
789
790         event = ring_buffer_lock_reserve(buffer, len);
791         if (event != NULL)
792                 trace_event_setup(event, type, flags, pc);
793
794         return event;
795 }
796
797 void tracer_tracing_on(struct trace_array *tr)
798 {
799         if (tr->trace_buffer.buffer)
800                 ring_buffer_record_on(tr->trace_buffer.buffer);
801         /*
802          * This flag is looked at when buffers haven't been allocated
803          * yet, or by some tracers (like irqsoff), that just want to
804          * know if the ring buffer has been disabled, but it can handle
805          * races of where it gets disabled but we still do a record.
806          * As the check is in the fast path of the tracers, it is more
807          * important to be fast than accurate.
808          */
809         tr->buffer_disabled = 0;
810         /* Make the flag seen by readers */
811         smp_wmb();
812 }
813
814 /**
815  * tracing_on - enable tracing buffers
816  *
817  * This function enables tracing buffers that may have been
818  * disabled with tracing_off.
819  */
820 void tracing_on(void)
821 {
822         tracer_tracing_on(&global_trace);
823 }
824 EXPORT_SYMBOL_GPL(tracing_on);
825
826
827 static __always_inline void
828 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
829 {
830         __this_cpu_write(trace_taskinfo_save, true);
831
832         /* If this is the temp buffer, we need to commit fully */
833         if (this_cpu_read(trace_buffered_event) == event) {
834                 /* Length is in event->array[0] */
835                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
836                 /* Release the temp buffer */
837                 this_cpu_dec(trace_buffered_event_cnt);
838         } else
839                 ring_buffer_unlock_commit(buffer, event);
840 }
841
842 /**
843  * __trace_puts - write a constant string into the trace buffer.
844  * @ip:    The address of the caller
845  * @str:   The constant string to write
846  * @size:  The size of the string.
847  */
848 int __trace_puts(unsigned long ip, const char *str, int size)
849 {
850         struct ring_buffer_event *event;
851         struct ring_buffer *buffer;
852         struct print_entry *entry;
853         unsigned long irq_flags;
854         int alloc;
855         int pc;
856
857         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
858                 return 0;
859
860         pc = preempt_count();
861
862         if (unlikely(tracing_selftest_running || tracing_disabled))
863                 return 0;
864
865         alloc = sizeof(*entry) + size + 2; /* possible \n added */
866
867         local_save_flags(irq_flags);
868         buffer = global_trace.trace_buffer.buffer;
869         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
870                                             irq_flags, pc);
871         if (!event)
872                 return 0;
873
874         entry = ring_buffer_event_data(event);
875         entry->ip = ip;
876
877         memcpy(&entry->buf, str, size);
878
879         /* Add a newline if necessary */
880         if (entry->buf[size - 1] != '\n') {
881                 entry->buf[size] = '\n';
882                 entry->buf[size + 1] = '\0';
883         } else
884                 entry->buf[size] = '\0';
885
886         __buffer_unlock_commit(buffer, event);
887         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
888
889         return size;
890 }
891 EXPORT_SYMBOL_GPL(__trace_puts);
892
893 /**
894  * __trace_bputs - write the pointer to a constant string into trace buffer
895  * @ip:    The address of the caller
896  * @str:   The constant string to write to the buffer to
897  */
898 int __trace_bputs(unsigned long ip, const char *str)
899 {
900         struct ring_buffer_event *event;
901         struct ring_buffer *buffer;
902         struct bputs_entry *entry;
903         unsigned long irq_flags;
904         int size = sizeof(struct bputs_entry);
905         int pc;
906
907         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
908                 return 0;
909
910         pc = preempt_count();
911
912         if (unlikely(tracing_selftest_running || tracing_disabled))
913                 return 0;
914
915         local_save_flags(irq_flags);
916         buffer = global_trace.trace_buffer.buffer;
917         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
918                                             irq_flags, pc);
919         if (!event)
920                 return 0;
921
922         entry = ring_buffer_event_data(event);
923         entry->ip                       = ip;
924         entry->str                      = str;
925
926         __buffer_unlock_commit(buffer, event);
927         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
928
929         return 1;
930 }
931 EXPORT_SYMBOL_GPL(__trace_bputs);
932
933 #ifdef CONFIG_TRACER_SNAPSHOT
934 void tracing_snapshot_instance_cond(struct trace_array *tr, void *cond_data)
935 {
936         struct tracer *tracer = tr->current_trace;
937         unsigned long flags;
938
939         if (in_nmi()) {
940                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
941                 internal_trace_puts("*** snapshot is being ignored        ***\n");
942                 return;
943         }
944
945         if (!tr->allocated_snapshot) {
946                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
947                 internal_trace_puts("*** stopping trace here!   ***\n");
948                 tracing_off();
949                 return;
950         }
951
952         /* Note, snapshot can not be used when the tracer uses it */
953         if (tracer->use_max_tr) {
954                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
955                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
956                 return;
957         }
958
959         local_irq_save(flags);
960         update_max_tr(tr, current, smp_processor_id(), cond_data);
961         local_irq_restore(flags);
962 }
963
964 void tracing_snapshot_instance(struct trace_array *tr)
965 {
966         tracing_snapshot_instance_cond(tr, NULL);
967 }
968
969 /**
970  * tracing_snapshot - take a snapshot of the current buffer.
971  *
972  * This causes a swap between the snapshot buffer and the current live
973  * tracing buffer. You can use this to take snapshots of the live
974  * trace when some condition is triggered, but continue to trace.
975  *
976  * Note, make sure to allocate the snapshot with either
977  * a tracing_snapshot_alloc(), or by doing it manually
978  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
979  *
980  * If the snapshot buffer is not allocated, it will stop tracing.
981  * Basically making a permanent snapshot.
982  */
983 void tracing_snapshot(void)
984 {
985         struct trace_array *tr = &global_trace;
986
987         tracing_snapshot_instance(tr);
988 }
989 EXPORT_SYMBOL_GPL(tracing_snapshot);
990
991 /**
992  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
993  * @tr:         The tracing instance to snapshot
994  * @cond_data:  The data to be tested conditionally, and possibly saved
995  *
996  * This is the same as tracing_snapshot() except that the snapshot is
997  * conditional - the snapshot will only happen if the
998  * cond_snapshot.update() implementation receiving the cond_data
999  * returns true, which means that the trace array's cond_snapshot
1000  * update() operation used the cond_data to determine whether the
1001  * snapshot should be taken, and if it was, presumably saved it along
1002  * with the snapshot.
1003  */
1004 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1005 {
1006         tracing_snapshot_instance_cond(tr, cond_data);
1007 }
1008 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1009
1010 /**
1011  * tracing_snapshot_cond_data - get the user data associated with a snapshot
1012  * @tr:         The tracing instance
1013  *
1014  * When the user enables a conditional snapshot using
1015  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1016  * with the snapshot.  This accessor is used to retrieve it.
1017  *
1018  * Should not be called from cond_snapshot.update(), since it takes
1019  * the tr->max_lock lock, which the code calling
1020  * cond_snapshot.update() has already done.
1021  *
1022  * Returns the cond_data associated with the trace array's snapshot.
1023  */
1024 void *tracing_cond_snapshot_data(struct trace_array *tr)
1025 {
1026         void *cond_data = NULL;
1027
1028         arch_spin_lock(&tr->max_lock);
1029
1030         if (tr->cond_snapshot)
1031                 cond_data = tr->cond_snapshot->cond_data;
1032
1033         arch_spin_unlock(&tr->max_lock);
1034
1035         return cond_data;
1036 }
1037 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1038
1039 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
1040                                         struct trace_buffer *size_buf, int cpu_id);
1041 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
1042
1043 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1044 {
1045         int ret;
1046
1047         if (!tr->allocated_snapshot) {
1048
1049                 /* allocate spare buffer */
1050                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1051                                    &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
1052                 if (ret < 0)
1053                         return ret;
1054
1055                 tr->allocated_snapshot = true;
1056         }
1057
1058         return 0;
1059 }
1060
1061 static void free_snapshot(struct trace_array *tr)
1062 {
1063         /*
1064          * We don't free the ring buffer. instead, resize it because
1065          * The max_tr ring buffer has some state (e.g. ring->clock) and
1066          * we want preserve it.
1067          */
1068         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1069         set_buffer_entries(&tr->max_buffer, 1);
1070         tracing_reset_online_cpus(&tr->max_buffer);
1071         tr->allocated_snapshot = false;
1072 }
1073
1074 /**
1075  * tracing_alloc_snapshot - allocate snapshot buffer.
1076  *
1077  * This only allocates the snapshot buffer if it isn't already
1078  * allocated - it doesn't also take a snapshot.
1079  *
1080  * This is meant to be used in cases where the snapshot buffer needs
1081  * to be set up for events that can't sleep but need to be able to
1082  * trigger a snapshot.
1083  */
1084 int tracing_alloc_snapshot(void)
1085 {
1086         struct trace_array *tr = &global_trace;
1087         int ret;
1088
1089         ret = tracing_alloc_snapshot_instance(tr);
1090         WARN_ON(ret < 0);
1091
1092         return ret;
1093 }
1094 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1095
1096 /**
1097  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1098  *
1099  * This is similar to tracing_snapshot(), but it will allocate the
1100  * snapshot buffer if it isn't already allocated. Use this only
1101  * where it is safe to sleep, as the allocation may sleep.
1102  *
1103  * This causes a swap between the snapshot buffer and the current live
1104  * tracing buffer. You can use this to take snapshots of the live
1105  * trace when some condition is triggered, but continue to trace.
1106  */
1107 void tracing_snapshot_alloc(void)
1108 {
1109         int ret;
1110
1111         ret = tracing_alloc_snapshot();
1112         if (ret < 0)
1113                 return;
1114
1115         tracing_snapshot();
1116 }
1117 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1118
1119 /**
1120  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1121  * @tr:         The tracing instance
1122  * @cond_data:  User data to associate with the snapshot
1123  * @update:     Implementation of the cond_snapshot update function
1124  *
1125  * Check whether the conditional snapshot for the given instance has
1126  * already been enabled, or if the current tracer is already using a
1127  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1128  * save the cond_data and update function inside.
1129  *
1130  * Returns 0 if successful, error otherwise.
1131  */
1132 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1133                                  cond_update_fn_t update)
1134 {
1135         struct cond_snapshot *cond_snapshot;
1136         int ret = 0;
1137
1138         cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1139         if (!cond_snapshot)
1140                 return -ENOMEM;
1141
1142         cond_snapshot->cond_data = cond_data;
1143         cond_snapshot->update = update;
1144
1145         mutex_lock(&trace_types_lock);
1146
1147         ret = tracing_alloc_snapshot_instance(tr);
1148         if (ret)
1149                 goto fail_unlock;
1150
1151         if (tr->current_trace->use_max_tr) {
1152                 ret = -EBUSY;
1153                 goto fail_unlock;
1154         }
1155
1156         /*
1157          * The cond_snapshot can only change to NULL without the
1158          * trace_types_lock. We don't care if we race with it going
1159          * to NULL, but we want to make sure that it's not set to
1160          * something other than NULL when we get here, which we can
1161          * do safely with only holding the trace_types_lock and not
1162          * having to take the max_lock.
1163          */
1164         if (tr->cond_snapshot) {
1165                 ret = -EBUSY;
1166                 goto fail_unlock;
1167         }
1168
1169         arch_spin_lock(&tr->max_lock);
1170         tr->cond_snapshot = cond_snapshot;
1171         arch_spin_unlock(&tr->max_lock);
1172
1173         mutex_unlock(&trace_types_lock);
1174
1175         return ret;
1176
1177  fail_unlock:
1178         mutex_unlock(&trace_types_lock);
1179         kfree(cond_snapshot);
1180         return ret;
1181 }
1182 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1183
1184 /**
1185  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1186  * @tr:         The tracing instance
1187  *
1188  * Check whether the conditional snapshot for the given instance is
1189  * enabled; if so, free the cond_snapshot associated with it,
1190  * otherwise return -EINVAL.
1191  *
1192  * Returns 0 if successful, error otherwise.
1193  */
1194 int tracing_snapshot_cond_disable(struct trace_array *tr)
1195 {
1196         int ret = 0;
1197
1198         arch_spin_lock(&tr->max_lock);
1199
1200         if (!tr->cond_snapshot)
1201                 ret = -EINVAL;
1202         else {
1203                 kfree(tr->cond_snapshot);
1204                 tr->cond_snapshot = NULL;
1205         }
1206
1207         arch_spin_unlock(&tr->max_lock);
1208
1209         return ret;
1210 }
1211 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1212 #else
1213 void tracing_snapshot(void)
1214 {
1215         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1216 }
1217 EXPORT_SYMBOL_GPL(tracing_snapshot);
1218 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1219 {
1220         WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1221 }
1222 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1223 int tracing_alloc_snapshot(void)
1224 {
1225         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1226         return -ENODEV;
1227 }
1228 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1229 void tracing_snapshot_alloc(void)
1230 {
1231         /* Give warning */
1232         tracing_snapshot();
1233 }
1234 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1235 void *tracing_cond_snapshot_data(struct trace_array *tr)
1236 {
1237         return NULL;
1238 }
1239 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1240 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1241 {
1242         return -ENODEV;
1243 }
1244 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1245 int tracing_snapshot_cond_disable(struct trace_array *tr)
1246 {
1247         return false;
1248 }
1249 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1250 #endif /* CONFIG_TRACER_SNAPSHOT */
1251
1252 void tracer_tracing_off(struct trace_array *tr)
1253 {
1254         if (tr->trace_buffer.buffer)
1255                 ring_buffer_record_off(tr->trace_buffer.buffer);
1256         /*
1257          * This flag is looked at when buffers haven't been allocated
1258          * yet, or by some tracers (like irqsoff), that just want to
1259          * know if the ring buffer has been disabled, but it can handle
1260          * races of where it gets disabled but we still do a record.
1261          * As the check is in the fast path of the tracers, it is more
1262          * important to be fast than accurate.
1263          */
1264         tr->buffer_disabled = 1;
1265         /* Make the flag seen by readers */
1266         smp_wmb();
1267 }
1268
1269 /**
1270  * tracing_off - turn off tracing buffers
1271  *
1272  * This function stops the tracing buffers from recording data.
1273  * It does not disable any overhead the tracers themselves may
1274  * be causing. This function simply causes all recording to
1275  * the ring buffers to fail.
1276  */
1277 void tracing_off(void)
1278 {
1279         tracer_tracing_off(&global_trace);
1280 }
1281 EXPORT_SYMBOL_GPL(tracing_off);
1282
1283 void disable_trace_on_warning(void)
1284 {
1285         if (__disable_trace_on_warning)
1286                 tracing_off();
1287 }
1288
1289 /**
1290  * tracer_tracing_is_on - show real state of ring buffer enabled
1291  * @tr : the trace array to know if ring buffer is enabled
1292  *
1293  * Shows real state of the ring buffer if it is enabled or not.
1294  */
1295 bool tracer_tracing_is_on(struct trace_array *tr)
1296 {
1297         if (tr->trace_buffer.buffer)
1298                 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1299         return !tr->buffer_disabled;
1300 }
1301
1302 /**
1303  * tracing_is_on - show state of ring buffers enabled
1304  */
1305 int tracing_is_on(void)
1306 {
1307         return tracer_tracing_is_on(&global_trace);
1308 }
1309 EXPORT_SYMBOL_GPL(tracing_is_on);
1310
1311 static int __init set_buf_size(char *str)
1312 {
1313         unsigned long buf_size;
1314
1315         if (!str)
1316                 return 0;
1317         buf_size = memparse(str, &str);
1318         /* nr_entries can not be zero */
1319         if (buf_size == 0)
1320                 return 0;
1321         trace_buf_size = buf_size;
1322         return 1;
1323 }
1324 __setup("trace_buf_size=", set_buf_size);
1325
1326 static int __init set_tracing_thresh(char *str)
1327 {
1328         unsigned long threshold;
1329         int ret;
1330
1331         if (!str)
1332                 return 0;
1333         ret = kstrtoul(str, 0, &threshold);
1334         if (ret < 0)
1335                 return 0;
1336         tracing_thresh = threshold * 1000;
1337         return 1;
1338 }
1339 __setup("tracing_thresh=", set_tracing_thresh);
1340
1341 unsigned long nsecs_to_usecs(unsigned long nsecs)
1342 {
1343         return nsecs / 1000;
1344 }
1345
1346 /*
1347  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1348  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1349  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1350  * of strings in the order that the evals (enum) were defined.
1351  */
1352 #undef C
1353 #define C(a, b) b
1354
1355 /* These must match the bit postions in trace_iterator_flags */
1356 static const char *trace_options[] = {
1357         TRACE_FLAGS
1358         NULL
1359 };
1360
1361 static struct {
1362         u64 (*func)(void);
1363         const char *name;
1364         int in_ns;              /* is this clock in nanoseconds? */
1365 } trace_clocks[] = {
1366         { trace_clock_local,            "local",        1 },
1367         { trace_clock_global,           "global",       1 },
1368         { trace_clock_counter,          "counter",      0 },
1369         { trace_clock_jiffies,          "uptime",       0 },
1370         { trace_clock,                  "perf",         1 },
1371         { ktime_get_mono_fast_ns,       "mono",         1 },
1372         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1373         { ktime_get_boot_fast_ns,       "boot",         1 },
1374         ARCH_TRACE_CLOCKS
1375 };
1376
1377 bool trace_clock_in_ns(struct trace_array *tr)
1378 {
1379         if (trace_clocks[tr->clock_id].in_ns)
1380                 return true;
1381
1382         return false;
1383 }
1384
1385 /*
1386  * trace_parser_get_init - gets the buffer for trace parser
1387  */
1388 int trace_parser_get_init(struct trace_parser *parser, int size)
1389 {
1390         memset(parser, 0, sizeof(*parser));
1391
1392         parser->buffer = kmalloc(size, GFP_KERNEL);
1393         if (!parser->buffer)
1394                 return 1;
1395
1396         parser->size = size;
1397         return 0;
1398 }
1399
1400 /*
1401  * trace_parser_put - frees the buffer for trace parser
1402  */
1403 void trace_parser_put(struct trace_parser *parser)
1404 {
1405         kfree(parser->buffer);
1406         parser->buffer = NULL;
1407 }
1408
1409 /*
1410  * trace_get_user - reads the user input string separated by  space
1411  * (matched by isspace(ch))
1412  *
1413  * For each string found the 'struct trace_parser' is updated,
1414  * and the function returns.
1415  *
1416  * Returns number of bytes read.
1417  *
1418  * See kernel/trace/trace.h for 'struct trace_parser' details.
1419  */
1420 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1421         size_t cnt, loff_t *ppos)
1422 {
1423         char ch;
1424         size_t read = 0;
1425         ssize_t ret;
1426
1427         if (!*ppos)
1428                 trace_parser_clear(parser);
1429
1430         ret = get_user(ch, ubuf++);
1431         if (ret)
1432                 goto out;
1433
1434         read++;
1435         cnt--;
1436
1437         /*
1438          * The parser is not finished with the last write,
1439          * continue reading the user input without skipping spaces.
1440          */
1441         if (!parser->cont) {
1442                 /* skip white space */
1443                 while (cnt && isspace(ch)) {
1444                         ret = get_user(ch, ubuf++);
1445                         if (ret)
1446                                 goto out;
1447                         read++;
1448                         cnt--;
1449                 }
1450
1451                 parser->idx = 0;
1452
1453                 /* only spaces were written */
1454                 if (isspace(ch) || !ch) {
1455                         *ppos += read;
1456                         ret = read;
1457                         goto out;
1458                 }
1459         }
1460
1461         /* read the non-space input */
1462         while (cnt && !isspace(ch) && ch) {
1463                 if (parser->idx < parser->size - 1)
1464                         parser->buffer[parser->idx++] = ch;
1465                 else {
1466                         ret = -EINVAL;
1467                         goto out;
1468                 }
1469                 ret = get_user(ch, ubuf++);
1470                 if (ret)
1471                         goto out;
1472                 read++;
1473                 cnt--;
1474         }
1475
1476         /* We either got finished input or we have to wait for another call. */
1477         if (isspace(ch) || !ch) {
1478                 parser->buffer[parser->idx] = 0;
1479                 parser->cont = false;
1480         } else if (parser->idx < parser->size - 1) {
1481                 parser->cont = true;
1482                 parser->buffer[parser->idx++] = ch;
1483                 /* Make sure the parsed string always terminates with '\0'. */
1484                 parser->buffer[parser->idx] = 0;
1485         } else {
1486                 ret = -EINVAL;
1487                 goto out;
1488         }
1489
1490         *ppos += read;
1491         ret = read;
1492
1493 out:
1494         return ret;
1495 }
1496
1497 /* TODO add a seq_buf_to_buffer() */
1498 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1499 {
1500         int len;
1501
1502         if (trace_seq_used(s) <= s->seq.readpos)
1503                 return -EBUSY;
1504
1505         len = trace_seq_used(s) - s->seq.readpos;
1506         if (cnt > len)
1507                 cnt = len;
1508         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1509
1510         s->seq.readpos += cnt;
1511         return cnt;
1512 }
1513
1514 unsigned long __read_mostly     tracing_thresh;
1515 static const struct file_operations tracing_max_lat_fops;
1516
1517 #if (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1518         defined(CONFIG_FSNOTIFY)
1519
1520 static struct workqueue_struct *fsnotify_wq;
1521
1522 static void latency_fsnotify_workfn(struct work_struct *work)
1523 {
1524         struct trace_array *tr = container_of(work, struct trace_array,
1525                                               fsnotify_work);
1526         fsnotify(tr->d_max_latency->d_inode, FS_MODIFY,
1527                  tr->d_max_latency->d_inode, FSNOTIFY_EVENT_INODE, NULL, 0);
1528 }
1529
1530 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1531 {
1532         struct trace_array *tr = container_of(iwork, struct trace_array,
1533                                               fsnotify_irqwork);
1534         queue_work(fsnotify_wq, &tr->fsnotify_work);
1535 }
1536
1537 static void trace_create_maxlat_file(struct trace_array *tr,
1538                                      struct dentry *d_tracer)
1539 {
1540         INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1541         init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1542         tr->d_max_latency = trace_create_file("tracing_max_latency", 0644,
1543                                               d_tracer, &tr->max_latency,
1544                                               &tracing_max_lat_fops);
1545 }
1546
1547 __init static int latency_fsnotify_init(void)
1548 {
1549         fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1550                                       WQ_UNBOUND | WQ_HIGHPRI, 0);
1551         if (!fsnotify_wq) {
1552                 pr_err("Unable to allocate tr_max_lat_wq\n");
1553                 return -ENOMEM;
1554         }
1555         return 0;
1556 }
1557
1558 late_initcall_sync(latency_fsnotify_init);
1559
1560 void latency_fsnotify(struct trace_array *tr)
1561 {
1562         if (!fsnotify_wq)
1563                 return;
1564         /*
1565          * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1566          * possible that we are called from __schedule() or do_idle(), which
1567          * could cause a deadlock.
1568          */
1569         irq_work_queue(&tr->fsnotify_irqwork);
1570 }
1571
1572 /*
1573  * (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1574  *  defined(CONFIG_FSNOTIFY)
1575  */
1576 #else
1577
1578 #define trace_create_maxlat_file(tr, d_tracer)                          \
1579         trace_create_file("tracing_max_latency", 0644, d_tracer,        \
1580                           &tr->max_latency, &tracing_max_lat_fops)
1581
1582 #endif
1583
1584 #ifdef CONFIG_TRACER_MAX_TRACE
1585 /*
1586  * Copy the new maximum trace into the separate maximum-trace
1587  * structure. (this way the maximum trace is permanently saved,
1588  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1589  */
1590 static void
1591 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1592 {
1593         struct trace_buffer *trace_buf = &tr->trace_buffer;
1594         struct trace_buffer *max_buf = &tr->max_buffer;
1595         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1596         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1597
1598         max_buf->cpu = cpu;
1599         max_buf->time_start = data->preempt_timestamp;
1600
1601         max_data->saved_latency = tr->max_latency;
1602         max_data->critical_start = data->critical_start;
1603         max_data->critical_end = data->critical_end;
1604
1605         strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1606         max_data->pid = tsk->pid;
1607         /*
1608          * If tsk == current, then use current_uid(), as that does not use
1609          * RCU. The irq tracer can be called out of RCU scope.
1610          */
1611         if (tsk == current)
1612                 max_data->uid = current_uid();
1613         else
1614                 max_data->uid = task_uid(tsk);
1615
1616         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1617         max_data->policy = tsk->policy;
1618         max_data->rt_priority = tsk->rt_priority;
1619
1620         /* record this tasks comm */
1621         tracing_record_cmdline(tsk);
1622         latency_fsnotify(tr);
1623 }
1624
1625 /**
1626  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1627  * @tr: tracer
1628  * @tsk: the task with the latency
1629  * @cpu: The cpu that initiated the trace.
1630  * @cond_data: User data associated with a conditional snapshot
1631  *
1632  * Flip the buffers between the @tr and the max_tr and record information
1633  * about which task was the cause of this latency.
1634  */
1635 void
1636 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1637               void *cond_data)
1638 {
1639         if (tr->stop_count)
1640                 return;
1641
1642         WARN_ON_ONCE(!irqs_disabled());
1643
1644         if (!tr->allocated_snapshot) {
1645                 /* Only the nop tracer should hit this when disabling */
1646                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1647                 return;
1648         }
1649
1650         arch_spin_lock(&tr->max_lock);
1651
1652         /* Inherit the recordable setting from trace_buffer */
1653         if (ring_buffer_record_is_set_on(tr->trace_buffer.buffer))
1654                 ring_buffer_record_on(tr->max_buffer.buffer);
1655         else
1656                 ring_buffer_record_off(tr->max_buffer.buffer);
1657
1658 #ifdef CONFIG_TRACER_SNAPSHOT
1659         if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1660                 goto out_unlock;
1661 #endif
1662         swap(tr->trace_buffer.buffer, tr->max_buffer.buffer);
1663
1664         __update_max_tr(tr, tsk, cpu);
1665
1666  out_unlock:
1667         arch_spin_unlock(&tr->max_lock);
1668 }
1669
1670 /**
1671  * update_max_tr_single - only copy one trace over, and reset the rest
1672  * @tr: tracer
1673  * @tsk: task with the latency
1674  * @cpu: the cpu of the buffer to copy.
1675  *
1676  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1677  */
1678 void
1679 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1680 {
1681         int ret;
1682
1683         if (tr->stop_count)
1684                 return;
1685
1686         WARN_ON_ONCE(!irqs_disabled());
1687         if (!tr->allocated_snapshot) {
1688                 /* Only the nop tracer should hit this when disabling */
1689                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1690                 return;
1691         }
1692
1693         arch_spin_lock(&tr->max_lock);
1694
1695         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1696
1697         if (ret == -EBUSY) {
1698                 /*
1699                  * We failed to swap the buffer due to a commit taking
1700                  * place on this CPU. We fail to record, but we reset
1701                  * the max trace buffer (no one writes directly to it)
1702                  * and flag that it failed.
1703                  */
1704                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1705                         "Failed to swap buffers due to commit in progress\n");
1706         }
1707
1708         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1709
1710         __update_max_tr(tr, tsk, cpu);
1711         arch_spin_unlock(&tr->max_lock);
1712 }
1713 #endif /* CONFIG_TRACER_MAX_TRACE */
1714
1715 static int wait_on_pipe(struct trace_iterator *iter, int full)
1716 {
1717         /* Iterators are static, they should be filled or empty */
1718         if (trace_buffer_iter(iter, iter->cpu_file))
1719                 return 0;
1720
1721         return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1722                                 full);
1723 }
1724
1725 #ifdef CONFIG_FTRACE_STARTUP_TEST
1726 static bool selftests_can_run;
1727
1728 struct trace_selftests {
1729         struct list_head                list;
1730         struct tracer                   *type;
1731 };
1732
1733 static LIST_HEAD(postponed_selftests);
1734
1735 static int save_selftest(struct tracer *type)
1736 {
1737         struct trace_selftests *selftest;
1738
1739         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1740         if (!selftest)
1741                 return -ENOMEM;
1742
1743         selftest->type = type;
1744         list_add(&selftest->list, &postponed_selftests);
1745         return 0;
1746 }
1747
1748 static int run_tracer_selftest(struct tracer *type)
1749 {
1750         struct trace_array *tr = &global_trace;
1751         struct tracer *saved_tracer = tr->current_trace;
1752         int ret;
1753
1754         if (!type->selftest || tracing_selftest_disabled)
1755                 return 0;
1756
1757         /*
1758          * If a tracer registers early in boot up (before scheduling is
1759          * initialized and such), then do not run its selftests yet.
1760          * Instead, run it a little later in the boot process.
1761          */
1762         if (!selftests_can_run)
1763                 return save_selftest(type);
1764
1765         /*
1766          * Run a selftest on this tracer.
1767          * Here we reset the trace buffer, and set the current
1768          * tracer to be this tracer. The tracer can then run some
1769          * internal tracing to verify that everything is in order.
1770          * If we fail, we do not register this tracer.
1771          */
1772         tracing_reset_online_cpus(&tr->trace_buffer);
1773
1774         tr->current_trace = type;
1775
1776 #ifdef CONFIG_TRACER_MAX_TRACE
1777         if (type->use_max_tr) {
1778                 /* If we expanded the buffers, make sure the max is expanded too */
1779                 if (ring_buffer_expanded)
1780                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1781                                            RING_BUFFER_ALL_CPUS);
1782                 tr->allocated_snapshot = true;
1783         }
1784 #endif
1785
1786         /* the test is responsible for initializing and enabling */
1787         pr_info("Testing tracer %s: ", type->name);
1788         ret = type->selftest(type, tr);
1789         /* the test is responsible for resetting too */
1790         tr->current_trace = saved_tracer;
1791         if (ret) {
1792                 printk(KERN_CONT "FAILED!\n");
1793                 /* Add the warning after printing 'FAILED' */
1794                 WARN_ON(1);
1795                 return -1;
1796         }
1797         /* Only reset on passing, to avoid touching corrupted buffers */
1798         tracing_reset_online_cpus(&tr->trace_buffer);
1799
1800 #ifdef CONFIG_TRACER_MAX_TRACE
1801         if (type->use_max_tr) {
1802                 tr->allocated_snapshot = false;
1803
1804                 /* Shrink the max buffer again */
1805                 if (ring_buffer_expanded)
1806                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1807                                            RING_BUFFER_ALL_CPUS);
1808         }
1809 #endif
1810
1811         printk(KERN_CONT "PASSED\n");
1812         return 0;
1813 }
1814
1815 static __init int init_trace_selftests(void)
1816 {
1817         struct trace_selftests *p, *n;
1818         struct tracer *t, **last;
1819         int ret;
1820
1821         selftests_can_run = true;
1822
1823         mutex_lock(&trace_types_lock);
1824
1825         if (list_empty(&postponed_selftests))
1826                 goto out;
1827
1828         pr_info("Running postponed tracer tests:\n");
1829
1830         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1831                 /* This loop can take minutes when sanitizers are enabled, so
1832                  * lets make sure we allow RCU processing.
1833                  */
1834                 cond_resched();
1835                 ret = run_tracer_selftest(p->type);
1836                 /* If the test fails, then warn and remove from available_tracers */
1837                 if (ret < 0) {
1838                         WARN(1, "tracer: %s failed selftest, disabling\n",
1839                              p->type->name);
1840                         last = &trace_types;
1841                         for (t = trace_types; t; t = t->next) {
1842                                 if (t == p->type) {
1843                                         *last = t->next;
1844                                         break;
1845                                 }
1846                                 last = &t->next;
1847                         }
1848                 }
1849                 list_del(&p->list);
1850                 kfree(p);
1851         }
1852
1853  out:
1854         mutex_unlock(&trace_types_lock);
1855
1856         return 0;
1857 }
1858 core_initcall(init_trace_selftests);
1859 #else
1860 static inline int run_tracer_selftest(struct tracer *type)
1861 {
1862         return 0;
1863 }
1864 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1865
1866 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1867
1868 static void __init apply_trace_boot_options(void);
1869
1870 /**
1871  * register_tracer - register a tracer with the ftrace system.
1872  * @type: the plugin for the tracer
1873  *
1874  * Register a new plugin tracer.
1875  */
1876 int __init register_tracer(struct tracer *type)
1877 {
1878         struct tracer *t;
1879         int ret = 0;
1880
1881         if (!type->name) {
1882                 pr_info("Tracer must have a name\n");
1883                 return -1;
1884         }
1885
1886         if (strlen(type->name) >= MAX_TRACER_SIZE) {
1887                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1888                 return -1;
1889         }
1890
1891         if (security_locked_down(LOCKDOWN_TRACEFS)) {
1892                 pr_warn("Can not register tracer %s due to lockdown\n",
1893                            type->name);
1894                 return -EPERM;
1895         }
1896
1897         mutex_lock(&trace_types_lock);
1898
1899         tracing_selftest_running = true;
1900
1901         for (t = trace_types; t; t = t->next) {
1902                 if (strcmp(type->name, t->name) == 0) {
1903                         /* already found */
1904                         pr_info("Tracer %s already registered\n",
1905                                 type->name);
1906                         ret = -1;
1907                         goto out;
1908                 }
1909         }
1910
1911         if (!type->set_flag)
1912                 type->set_flag = &dummy_set_flag;
1913         if (!type->flags) {
1914                 /*allocate a dummy tracer_flags*/
1915                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1916                 if (!type->flags) {
1917                         ret = -ENOMEM;
1918                         goto out;
1919                 }
1920                 type->flags->val = 0;
1921                 type->flags->opts = dummy_tracer_opt;
1922         } else
1923                 if (!type->flags->opts)
1924                         type->flags->opts = dummy_tracer_opt;
1925
1926         /* store the tracer for __set_tracer_option */
1927         type->flags->trace = type;
1928
1929         ret = run_tracer_selftest(type);
1930         if (ret < 0)
1931                 goto out;
1932
1933         type->next = trace_types;
1934         trace_types = type;
1935         add_tracer_options(&global_trace, type);
1936
1937  out:
1938         tracing_selftest_running = false;
1939         mutex_unlock(&trace_types_lock);
1940
1941         if (ret || !default_bootup_tracer)
1942                 goto out_unlock;
1943
1944         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1945                 goto out_unlock;
1946
1947         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1948         /* Do we want this tracer to start on bootup? */
1949         tracing_set_tracer(&global_trace, type->name);
1950         default_bootup_tracer = NULL;
1951
1952         apply_trace_boot_options();
1953
1954         /* disable other selftests, since this will break it. */
1955         tracing_selftest_disabled = true;
1956 #ifdef CONFIG_FTRACE_STARTUP_TEST
1957         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1958                type->name);
1959 #endif
1960
1961  out_unlock:
1962         return ret;
1963 }
1964
1965 static void tracing_reset_cpu(struct trace_buffer *buf, int cpu)
1966 {
1967         struct ring_buffer *buffer = buf->buffer;
1968
1969         if (!buffer)
1970                 return;
1971
1972         ring_buffer_record_disable(buffer);
1973
1974         /* Make sure all commits have finished */
1975         synchronize_rcu();
1976         ring_buffer_reset_cpu(buffer, cpu);
1977
1978         ring_buffer_record_enable(buffer);
1979 }
1980
1981 void tracing_reset_online_cpus(struct trace_buffer *buf)
1982 {
1983         struct ring_buffer *buffer = buf->buffer;
1984         int cpu;
1985
1986         if (!buffer)
1987                 return;
1988
1989         ring_buffer_record_disable(buffer);
1990
1991         /* Make sure all commits have finished */
1992         synchronize_rcu();
1993
1994         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1995
1996         for_each_online_cpu(cpu)
1997                 ring_buffer_reset_cpu(buffer, cpu);
1998
1999         ring_buffer_record_enable(buffer);
2000 }
2001
2002 /* Must have trace_types_lock held */
2003 void tracing_reset_all_online_cpus(void)
2004 {
2005         struct trace_array *tr;
2006
2007         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2008                 if (!tr->clear_trace)
2009                         continue;
2010                 tr->clear_trace = false;
2011                 tracing_reset_online_cpus(&tr->trace_buffer);
2012 #ifdef CONFIG_TRACER_MAX_TRACE
2013                 tracing_reset_online_cpus(&tr->max_buffer);
2014 #endif
2015         }
2016 }
2017
2018 static int *tgid_map;
2019
2020 #define SAVED_CMDLINES_DEFAULT 128
2021 #define NO_CMDLINE_MAP UINT_MAX
2022 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2023 struct saved_cmdlines_buffer {
2024         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2025         unsigned *map_cmdline_to_pid;
2026         unsigned cmdline_num;
2027         int cmdline_idx;
2028         char *saved_cmdlines;
2029 };
2030 static struct saved_cmdlines_buffer *savedcmd;
2031
2032 /* temporary disable recording */
2033 static atomic_t trace_record_taskinfo_disabled __read_mostly;
2034
2035 static inline char *get_saved_cmdlines(int idx)
2036 {
2037         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2038 }
2039
2040 static inline void set_cmdline(int idx, const char *cmdline)
2041 {
2042         strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2043 }
2044
2045 static int allocate_cmdlines_buffer(unsigned int val,
2046                                     struct saved_cmdlines_buffer *s)
2047 {
2048         s->map_cmdline_to_pid = kmalloc_array(val,
2049                                               sizeof(*s->map_cmdline_to_pid),
2050                                               GFP_KERNEL);
2051         if (!s->map_cmdline_to_pid)
2052                 return -ENOMEM;
2053
2054         s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2055         if (!s->saved_cmdlines) {
2056                 kfree(s->map_cmdline_to_pid);
2057                 return -ENOMEM;
2058         }
2059
2060         s->cmdline_idx = 0;
2061         s->cmdline_num = val;
2062         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2063                sizeof(s->map_pid_to_cmdline));
2064         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2065                val * sizeof(*s->map_cmdline_to_pid));
2066
2067         return 0;
2068 }
2069
2070 static int trace_create_savedcmd(void)
2071 {
2072         int ret;
2073
2074         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2075         if (!savedcmd)
2076                 return -ENOMEM;
2077
2078         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2079         if (ret < 0) {
2080                 kfree(savedcmd);
2081                 savedcmd = NULL;
2082                 return -ENOMEM;
2083         }
2084
2085         return 0;
2086 }
2087
2088 int is_tracing_stopped(void)
2089 {
2090         return global_trace.stop_count;
2091 }
2092
2093 /**
2094  * tracing_start - quick start of the tracer
2095  *
2096  * If tracing is enabled but was stopped by tracing_stop,
2097  * this will start the tracer back up.
2098  */
2099 void tracing_start(void)
2100 {
2101         struct ring_buffer *buffer;
2102         unsigned long flags;
2103
2104         if (tracing_disabled)
2105                 return;
2106
2107         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2108         if (--global_trace.stop_count) {
2109                 if (global_trace.stop_count < 0) {
2110                         /* Someone screwed up their debugging */
2111                         WARN_ON_ONCE(1);
2112                         global_trace.stop_count = 0;
2113                 }
2114                 goto out;
2115         }
2116
2117         /* Prevent the buffers from switching */
2118         arch_spin_lock(&global_trace.max_lock);
2119
2120         buffer = global_trace.trace_buffer.buffer;
2121         if (buffer)
2122                 ring_buffer_record_enable(buffer);
2123
2124 #ifdef CONFIG_TRACER_MAX_TRACE
2125         buffer = global_trace.max_buffer.buffer;
2126         if (buffer)
2127                 ring_buffer_record_enable(buffer);
2128 #endif
2129
2130         arch_spin_unlock(&global_trace.max_lock);
2131
2132  out:
2133         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2134 }
2135
2136 static void tracing_start_tr(struct trace_array *tr)
2137 {
2138         struct ring_buffer *buffer;
2139         unsigned long flags;
2140
2141         if (tracing_disabled)
2142                 return;
2143
2144         /* If global, we need to also start the max tracer */
2145         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2146                 return tracing_start();
2147
2148         raw_spin_lock_irqsave(&tr->start_lock, flags);
2149
2150         if (--tr->stop_count) {
2151                 if (tr->stop_count < 0) {
2152                         /* Someone screwed up their debugging */
2153                         WARN_ON_ONCE(1);
2154                         tr->stop_count = 0;
2155                 }
2156                 goto out;
2157         }
2158
2159         buffer = tr->trace_buffer.buffer;
2160         if (buffer)
2161                 ring_buffer_record_enable(buffer);
2162
2163  out:
2164         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2165 }
2166
2167 /**
2168  * tracing_stop - quick stop of the tracer
2169  *
2170  * Light weight way to stop tracing. Use in conjunction with
2171  * tracing_start.
2172  */
2173 void tracing_stop(void)
2174 {
2175         struct ring_buffer *buffer;
2176         unsigned long flags;
2177
2178         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2179         if (global_trace.stop_count++)
2180                 goto out;
2181
2182         /* Prevent the buffers from switching */
2183         arch_spin_lock(&global_trace.max_lock);
2184
2185         buffer = global_trace.trace_buffer.buffer;
2186         if (buffer)
2187                 ring_buffer_record_disable(buffer);
2188
2189 #ifdef CONFIG_TRACER_MAX_TRACE
2190         buffer = global_trace.max_buffer.buffer;
2191         if (buffer)
2192                 ring_buffer_record_disable(buffer);
2193 #endif
2194
2195         arch_spin_unlock(&global_trace.max_lock);
2196
2197  out:
2198         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2199 }
2200
2201 static void tracing_stop_tr(struct trace_array *tr)
2202 {
2203         struct ring_buffer *buffer;
2204         unsigned long flags;
2205
2206         /* If global, we need to also stop the max tracer */
2207         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2208                 return tracing_stop();
2209
2210         raw_spin_lock_irqsave(&tr->start_lock, flags);
2211         if (tr->stop_count++)
2212                 goto out;
2213
2214         buffer = tr->trace_buffer.buffer;
2215         if (buffer)
2216                 ring_buffer_record_disable(buffer);
2217
2218  out:
2219         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2220 }
2221
2222 static int trace_save_cmdline(struct task_struct *tsk)
2223 {
2224         unsigned pid, idx;
2225
2226         /* treat recording of idle task as a success */
2227         if (!tsk->pid)
2228                 return 1;
2229
2230         if (unlikely(tsk->pid > PID_MAX_DEFAULT))
2231                 return 0;
2232
2233         /*
2234          * It's not the end of the world if we don't get
2235          * the lock, but we also don't want to spin
2236          * nor do we want to disable interrupts,
2237          * so if we miss here, then better luck next time.
2238          */
2239         if (!arch_spin_trylock(&trace_cmdline_lock))
2240                 return 0;
2241
2242         idx = savedcmd->map_pid_to_cmdline[tsk->pid];
2243         if (idx == NO_CMDLINE_MAP) {
2244                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2245
2246                 /*
2247                  * Check whether the cmdline buffer at idx has a pid
2248                  * mapped. We are going to overwrite that entry so we
2249                  * need to clear the map_pid_to_cmdline. Otherwise we
2250                  * would read the new comm for the old pid.
2251                  */
2252                 pid = savedcmd->map_cmdline_to_pid[idx];
2253                 if (pid != NO_CMDLINE_MAP)
2254                         savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
2255
2256                 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2257                 savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
2258
2259                 savedcmd->cmdline_idx = idx;
2260         }
2261
2262         set_cmdline(idx, tsk->comm);
2263
2264         arch_spin_unlock(&trace_cmdline_lock);
2265
2266         return 1;
2267 }
2268
2269 static void __trace_find_cmdline(int pid, char comm[])
2270 {
2271         unsigned map;
2272
2273         if (!pid) {
2274                 strcpy(comm, "<idle>");
2275                 return;
2276         }
2277
2278         if (WARN_ON_ONCE(pid < 0)) {
2279                 strcpy(comm, "<XXX>");
2280                 return;
2281         }
2282
2283         if (pid > PID_MAX_DEFAULT) {
2284                 strcpy(comm, "<...>");
2285                 return;
2286         }
2287
2288         map = savedcmd->map_pid_to_cmdline[pid];
2289         if (map != NO_CMDLINE_MAP)
2290                 strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2291         else
2292                 strcpy(comm, "<...>");
2293 }
2294
2295 void trace_find_cmdline(int pid, char comm[])
2296 {
2297         preempt_disable();
2298         arch_spin_lock(&trace_cmdline_lock);
2299
2300         __trace_find_cmdline(pid, comm);
2301
2302         arch_spin_unlock(&trace_cmdline_lock);
2303         preempt_enable();
2304 }
2305
2306 int trace_find_tgid(int pid)
2307 {
2308         if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2309                 return 0;
2310
2311         return tgid_map[pid];
2312 }
2313
2314 static int trace_save_tgid(struct task_struct *tsk)
2315 {
2316         /* treat recording of idle task as a success */
2317         if (!tsk->pid)
2318                 return 1;
2319
2320         if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2321                 return 0;
2322
2323         tgid_map[tsk->pid] = tsk->tgid;
2324         return 1;
2325 }
2326
2327 static bool tracing_record_taskinfo_skip(int flags)
2328 {
2329         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2330                 return true;
2331         if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2332                 return true;
2333         if (!__this_cpu_read(trace_taskinfo_save))
2334                 return true;
2335         return false;
2336 }
2337
2338 /**
2339  * tracing_record_taskinfo - record the task info of a task
2340  *
2341  * @task:  task to record
2342  * @flags: TRACE_RECORD_CMDLINE for recording comm
2343  *         TRACE_RECORD_TGID for recording tgid
2344  */
2345 void tracing_record_taskinfo(struct task_struct *task, int flags)
2346 {
2347         bool done;
2348
2349         if (tracing_record_taskinfo_skip(flags))
2350                 return;
2351
2352         /*
2353          * Record as much task information as possible. If some fail, continue
2354          * to try to record the others.
2355          */
2356         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2357         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2358
2359         /* If recording any information failed, retry again soon. */
2360         if (!done)
2361                 return;
2362
2363         __this_cpu_write(trace_taskinfo_save, false);
2364 }
2365
2366 /**
2367  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2368  *
2369  * @prev: previous task during sched_switch
2370  * @next: next task during sched_switch
2371  * @flags: TRACE_RECORD_CMDLINE for recording comm
2372  *         TRACE_RECORD_TGID for recording tgid
2373  */
2374 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2375                                           struct task_struct *next, int flags)
2376 {
2377         bool done;
2378
2379         if (tracing_record_taskinfo_skip(flags))
2380                 return;
2381
2382         /*
2383          * Record as much task information as possible. If some fail, continue
2384          * to try to record the others.
2385          */
2386         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2387         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2388         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2389         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2390
2391         /* If recording any information failed, retry again soon. */
2392         if (!done)
2393                 return;
2394
2395         __this_cpu_write(trace_taskinfo_save, false);
2396 }
2397
2398 /* Helpers to record a specific task information */
2399 void tracing_record_cmdline(struct task_struct *task)
2400 {
2401         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2402 }
2403
2404 void tracing_record_tgid(struct task_struct *task)
2405 {
2406         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2407 }
2408
2409 /*
2410  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2411  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2412  * simplifies those functions and keeps them in sync.
2413  */
2414 enum print_line_t trace_handle_return(struct trace_seq *s)
2415 {
2416         return trace_seq_has_overflowed(s) ?
2417                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2418 }
2419 EXPORT_SYMBOL_GPL(trace_handle_return);
2420
2421 void
2422 tracing_generic_entry_update(struct trace_entry *entry, unsigned short type,
2423                              unsigned long flags, int pc)
2424 {
2425         struct task_struct *tsk = current;
2426
2427         entry->preempt_count            = pc & 0xff;
2428         entry->pid                      = (tsk) ? tsk->pid : 0;
2429         entry->type                     = type;
2430         entry->flags =
2431 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2432                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2433 #else
2434                 TRACE_FLAG_IRQS_NOSUPPORT |
2435 #endif
2436                 ((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
2437                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2438                 ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2439                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2440                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2441 }
2442 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2443
2444 struct ring_buffer_event *
2445 trace_buffer_lock_reserve(struct ring_buffer *buffer,
2446                           int type,
2447                           unsigned long len,
2448                           unsigned long flags, int pc)
2449 {
2450         return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2451 }
2452
2453 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2454 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2455 static int trace_buffered_event_ref;
2456
2457 /**
2458  * trace_buffered_event_enable - enable buffering events
2459  *
2460  * When events are being filtered, it is quicker to use a temporary
2461  * buffer to write the event data into if there's a likely chance
2462  * that it will not be committed. The discard of the ring buffer
2463  * is not as fast as committing, and is much slower than copying
2464  * a commit.
2465  *
2466  * When an event is to be filtered, allocate per cpu buffers to
2467  * write the event data into, and if the event is filtered and discarded
2468  * it is simply dropped, otherwise, the entire data is to be committed
2469  * in one shot.
2470  */
2471 void trace_buffered_event_enable(void)
2472 {
2473         struct ring_buffer_event *event;
2474         struct page *page;
2475         int cpu;
2476
2477         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2478
2479         if (trace_buffered_event_ref++)
2480                 return;
2481
2482         for_each_tracing_cpu(cpu) {
2483                 page = alloc_pages_node(cpu_to_node(cpu),
2484                                         GFP_KERNEL | __GFP_NORETRY, 0);
2485                 if (!page)
2486                         goto failed;
2487
2488                 event = page_address(page);
2489                 memset(event, 0, sizeof(*event));
2490
2491                 per_cpu(trace_buffered_event, cpu) = event;
2492
2493                 preempt_disable();
2494                 if (cpu == smp_processor_id() &&
2495                     this_cpu_read(trace_buffered_event) !=
2496                     per_cpu(trace_buffered_event, cpu))
2497                         WARN_ON_ONCE(1);
2498                 preempt_enable();
2499         }
2500
2501         return;
2502  failed:
2503         trace_buffered_event_disable();
2504 }
2505
2506 static void enable_trace_buffered_event(void *data)
2507 {
2508         /* Probably not needed, but do it anyway */
2509         smp_rmb();
2510         this_cpu_dec(trace_buffered_event_cnt);
2511 }
2512
2513 static void disable_trace_buffered_event(void *data)
2514 {
2515         this_cpu_inc(trace_buffered_event_cnt);
2516 }
2517
2518 /**
2519  * trace_buffered_event_disable - disable buffering events
2520  *
2521  * When a filter is removed, it is faster to not use the buffered
2522  * events, and to commit directly into the ring buffer. Free up
2523  * the temp buffers when there are no more users. This requires
2524  * special synchronization with current events.
2525  */
2526 void trace_buffered_event_disable(void)
2527 {
2528         int cpu;
2529
2530         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2531
2532         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2533                 return;
2534
2535         if (--trace_buffered_event_ref)
2536                 return;
2537
2538         preempt_disable();
2539         /* For each CPU, set the buffer as used. */
2540         smp_call_function_many(tracing_buffer_mask,
2541                                disable_trace_buffered_event, NULL, 1);
2542         preempt_enable();
2543
2544         /* Wait for all current users to finish */
2545         synchronize_rcu();
2546
2547         for_each_tracing_cpu(cpu) {
2548                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2549                 per_cpu(trace_buffered_event, cpu) = NULL;
2550         }
2551         /*
2552          * Make sure trace_buffered_event is NULL before clearing
2553          * trace_buffered_event_cnt.
2554          */
2555         smp_wmb();
2556
2557         preempt_disable();
2558         /* Do the work on each cpu */
2559         smp_call_function_many(tracing_buffer_mask,
2560                                enable_trace_buffered_event, NULL, 1);
2561         preempt_enable();
2562 }
2563
2564 static struct ring_buffer *temp_buffer;
2565
2566 struct ring_buffer_event *
2567 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2568                           struct trace_event_file *trace_file,
2569                           int type, unsigned long len,
2570                           unsigned long flags, int pc)
2571 {
2572         struct ring_buffer_event *entry;
2573         int val;
2574
2575         *current_rb = trace_file->tr->trace_buffer.buffer;
2576
2577         if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
2578              (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2579             (entry = this_cpu_read(trace_buffered_event))) {
2580                 /* Try to use the per cpu buffer first */
2581                 val = this_cpu_inc_return(trace_buffered_event_cnt);
2582                 if (val == 1) {
2583                         trace_event_setup(entry, type, flags, pc);
2584                         entry->array[0] = len;
2585                         return entry;
2586                 }
2587                 this_cpu_dec(trace_buffered_event_cnt);
2588         }
2589
2590         entry = __trace_buffer_lock_reserve(*current_rb,
2591                                             type, len, flags, pc);
2592         /*
2593          * If tracing is off, but we have triggers enabled
2594          * we still need to look at the event data. Use the temp_buffer
2595          * to store the trace event for the tigger to use. It's recusive
2596          * safe and will not be recorded anywhere.
2597          */
2598         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2599                 *current_rb = temp_buffer;
2600                 entry = __trace_buffer_lock_reserve(*current_rb,
2601                                                     type, len, flags, pc);
2602         }
2603         return entry;
2604 }
2605 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2606
2607 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2608 static DEFINE_MUTEX(tracepoint_printk_mutex);
2609
2610 static void output_printk(struct trace_event_buffer *fbuffer)
2611 {
2612         struct trace_event_call *event_call;
2613         struct trace_event *event;
2614         unsigned long flags;
2615         struct trace_iterator *iter = tracepoint_print_iter;
2616
2617         /* We should never get here if iter is NULL */
2618         if (WARN_ON_ONCE(!iter))
2619                 return;
2620
2621         event_call = fbuffer->trace_file->event_call;
2622         if (!event_call || !event_call->event.funcs ||
2623             !event_call->event.funcs->trace)
2624                 return;
2625
2626         event = &fbuffer->trace_file->event_call->event;
2627
2628         spin_lock_irqsave(&tracepoint_iter_lock, flags);
2629         trace_seq_init(&iter->seq);
2630         iter->ent = fbuffer->entry;
2631         event_call->event.funcs->trace(iter, 0, event);
2632         trace_seq_putc(&iter->seq, 0);
2633         printk("%s", iter->seq.buffer);
2634
2635         spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2636 }
2637
2638 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2639                              void __user *buffer, size_t *lenp,
2640                              loff_t *ppos)
2641 {
2642         int save_tracepoint_printk;
2643         int ret;
2644
2645         mutex_lock(&tracepoint_printk_mutex);
2646         save_tracepoint_printk = tracepoint_printk;
2647
2648         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2649
2650         /*
2651          * This will force exiting early, as tracepoint_printk
2652          * is always zero when tracepoint_printk_iter is not allocated
2653          */
2654         if (!tracepoint_print_iter)
2655                 tracepoint_printk = 0;
2656
2657         if (save_tracepoint_printk == tracepoint_printk)
2658                 goto out;
2659
2660         if (tracepoint_printk)
2661                 static_key_enable(&tracepoint_printk_key.key);
2662         else
2663                 static_key_disable(&tracepoint_printk_key.key);
2664
2665  out:
2666         mutex_unlock(&tracepoint_printk_mutex);
2667
2668         return ret;
2669 }
2670
2671 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2672 {
2673         if (static_key_false(&tracepoint_printk_key.key))
2674                 output_printk(fbuffer);
2675
2676         event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer,
2677                                     fbuffer->event, fbuffer->entry,
2678                                     fbuffer->flags, fbuffer->pc);
2679 }
2680 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2681
2682 /*
2683  * Skip 3:
2684  *
2685  *   trace_buffer_unlock_commit_regs()
2686  *   trace_event_buffer_commit()
2687  *   trace_event_raw_event_xxx()
2688  */
2689 # define STACK_SKIP 3
2690
2691 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2692                                      struct ring_buffer *buffer,
2693                                      struct ring_buffer_event *event,
2694                                      unsigned long flags, int pc,
2695                                      struct pt_regs *regs)
2696 {
2697         __buffer_unlock_commit(buffer, event);
2698
2699         /*
2700          * If regs is not set, then skip the necessary functions.
2701          * Note, we can still get here via blktrace, wakeup tracer
2702          * and mmiotrace, but that's ok if they lose a function or
2703          * two. They are not that meaningful.
2704          */
2705         ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
2706         ftrace_trace_userstack(buffer, flags, pc);
2707 }
2708
2709 /*
2710  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2711  */
2712 void
2713 trace_buffer_unlock_commit_nostack(struct ring_buffer *buffer,
2714                                    struct ring_buffer_event *event)
2715 {
2716         __buffer_unlock_commit(buffer, event);
2717 }
2718
2719 static void
2720 trace_process_export(struct trace_export *export,
2721                struct ring_buffer_event *event)
2722 {
2723         struct trace_entry *entry;
2724         unsigned int size = 0;
2725
2726         entry = ring_buffer_event_data(event);
2727         size = ring_buffer_event_length(event);
2728         export->write(export, entry, size);
2729 }
2730
2731 static DEFINE_MUTEX(ftrace_export_lock);
2732
2733 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2734
2735 static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2736
2737 static inline void ftrace_exports_enable(void)
2738 {
2739         static_branch_enable(&ftrace_exports_enabled);
2740 }
2741
2742 static inline void ftrace_exports_disable(void)
2743 {
2744         static_branch_disable(&ftrace_exports_enabled);
2745 }
2746
2747 static void ftrace_exports(struct ring_buffer_event *event)
2748 {
2749         struct trace_export *export;
2750
2751         preempt_disable_notrace();
2752
2753         export = rcu_dereference_raw_check(ftrace_exports_list);
2754         while (export) {
2755                 trace_process_export(export, event);
2756                 export = rcu_dereference_raw_check(export->next);
2757         }
2758
2759         preempt_enable_notrace();
2760 }
2761
2762 static inline void
2763 add_trace_export(struct trace_export **list, struct trace_export *export)
2764 {
2765         rcu_assign_pointer(export->next, *list);
2766         /*
2767          * We are entering export into the list but another
2768          * CPU might be walking that list. We need to make sure
2769          * the export->next pointer is valid before another CPU sees
2770          * the export pointer included into the list.
2771          */
2772         rcu_assign_pointer(*list, export);
2773 }
2774
2775 static inline int
2776 rm_trace_export(struct trace_export **list, struct trace_export *export)
2777 {
2778         struct trace_export **p;
2779
2780         for (p = list; *p != NULL; p = &(*p)->next)
2781                 if (*p == export)
2782                         break;
2783
2784         if (*p != export)
2785                 return -1;
2786
2787         rcu_assign_pointer(*p, (*p)->next);
2788
2789         return 0;
2790 }
2791
2792 static inline void
2793 add_ftrace_export(struct trace_export **list, struct trace_export *export)
2794 {
2795         if (*list == NULL)
2796                 ftrace_exports_enable();
2797
2798         add_trace_export(list, export);
2799 }
2800
2801 static inline int
2802 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2803 {
2804         int ret;
2805
2806         ret = rm_trace_export(list, export);
2807         if (*list == NULL)
2808                 ftrace_exports_disable();
2809
2810         return ret;
2811 }
2812
2813 int register_ftrace_export(struct trace_export *export)
2814 {
2815         if (WARN_ON_ONCE(!export->write))
2816                 return -1;
2817
2818         mutex_lock(&ftrace_export_lock);
2819
2820         add_ftrace_export(&ftrace_exports_list, export);
2821
2822         mutex_unlock(&ftrace_export_lock);
2823
2824         return 0;
2825 }
2826 EXPORT_SYMBOL_GPL(register_ftrace_export);
2827
2828 int unregister_ftrace_export(struct trace_export *export)
2829 {
2830         int ret;
2831
2832         mutex_lock(&ftrace_export_lock);
2833
2834         ret = rm_ftrace_export(&ftrace_exports_list, export);
2835
2836         mutex_unlock(&ftrace_export_lock);
2837
2838         return ret;
2839 }
2840 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2841
2842 void
2843 trace_function(struct trace_array *tr,
2844                unsigned long ip, unsigned long parent_ip, unsigned long flags,
2845                int pc)
2846 {
2847         struct trace_event_call *call = &event_function;
2848         struct ring_buffer *buffer = tr->trace_buffer.buffer;
2849         struct ring_buffer_event *event;
2850         struct ftrace_entry *entry;
2851
2852         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2853                                             flags, pc);
2854         if (!event)
2855                 return;
2856         entry   = ring_buffer_event_data(event);
2857         entry->ip                       = ip;
2858         entry->parent_ip                = parent_ip;
2859
2860         if (!call_filter_check_discard(call, entry, buffer, event)) {
2861                 if (static_branch_unlikely(&ftrace_exports_enabled))
2862                         ftrace_exports(event);
2863                 __buffer_unlock_commit(buffer, event);
2864         }
2865 }
2866
2867 #ifdef CONFIG_STACKTRACE
2868
2869 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2870 #define FTRACE_KSTACK_NESTING   4
2871
2872 #define FTRACE_KSTACK_ENTRIES   (PAGE_SIZE / FTRACE_KSTACK_NESTING)
2873
2874 struct ftrace_stack {
2875         unsigned long           calls[FTRACE_KSTACK_ENTRIES];
2876 };
2877
2878
2879 struct ftrace_stacks {
2880         struct ftrace_stack     stacks[FTRACE_KSTACK_NESTING];
2881 };
2882
2883 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2884 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2885
2886 static void __ftrace_trace_stack(struct ring_buffer *buffer,
2887                                  unsigned long flags,
2888                                  int skip, int pc, struct pt_regs *regs)
2889 {
2890         struct trace_event_call *call = &event_kernel_stack;
2891         struct ring_buffer_event *event;
2892         unsigned int size, nr_entries;
2893         struct ftrace_stack *fstack;
2894         struct stack_entry *entry;
2895         int stackidx;
2896
2897         /*
2898          * Add one, for this function and the call to save_stack_trace()
2899          * If regs is set, then these functions will not be in the way.
2900          */
2901 #ifndef CONFIG_UNWINDER_ORC
2902         if (!regs)
2903                 skip++;
2904 #endif
2905
2906         /*
2907          * Since events can happen in NMIs there's no safe way to
2908          * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2909          * or NMI comes in, it will just have to use the default
2910          * FTRACE_STACK_SIZE.
2911          */
2912         preempt_disable_notrace();
2913
2914         stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2915
2916         /* This should never happen. If it does, yell once and skip */
2917         if (WARN_ON_ONCE(stackidx > FTRACE_KSTACK_NESTING))
2918                 goto out;
2919
2920         /*
2921          * The above __this_cpu_inc_return() is 'atomic' cpu local. An
2922          * interrupt will either see the value pre increment or post
2923          * increment. If the interrupt happens pre increment it will have
2924          * restored the counter when it returns.  We just need a barrier to
2925          * keep gcc from moving things around.
2926          */
2927         barrier();
2928
2929         fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
2930         size = ARRAY_SIZE(fstack->calls);
2931
2932         if (regs) {
2933                 nr_entries = stack_trace_save_regs(regs, fstack->calls,
2934                                                    size, skip);
2935         } else {
2936                 nr_entries = stack_trace_save(fstack->calls, size, skip);
2937         }
2938
2939         size = nr_entries * sizeof(unsigned long);
2940         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2941                                             sizeof(*entry) + size, flags, pc);
2942         if (!event)
2943                 goto out;
2944         entry = ring_buffer_event_data(event);
2945
2946         memcpy(&entry->caller, fstack->calls, size);
2947         entry->size = nr_entries;
2948
2949         if (!call_filter_check_discard(call, entry, buffer, event))
2950                 __buffer_unlock_commit(buffer, event);
2951
2952  out:
2953         /* Again, don't let gcc optimize things here */
2954         barrier();
2955         __this_cpu_dec(ftrace_stack_reserve);
2956         preempt_enable_notrace();
2957
2958 }
2959
2960 static inline void ftrace_trace_stack(struct trace_array *tr,
2961                                       struct ring_buffer *buffer,
2962                                       unsigned long flags,
2963                                       int skip, int pc, struct pt_regs *regs)
2964 {
2965         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2966                 return;
2967
2968         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
2969 }
2970
2971 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2972                    int pc)
2973 {
2974         struct ring_buffer *buffer = tr->trace_buffer.buffer;
2975
2976         if (rcu_is_watching()) {
2977                 __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2978                 return;
2979         }
2980
2981         /*
2982          * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
2983          * but if the above rcu_is_watching() failed, then the NMI
2984          * triggered someplace critical, and rcu_irq_enter() should
2985          * not be called from NMI.
2986          */
2987         if (unlikely(in_nmi()))
2988                 return;
2989
2990         rcu_irq_enter_irqson();
2991         __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2992         rcu_irq_exit_irqson();
2993 }
2994
2995 /**
2996  * trace_dump_stack - record a stack back trace in the trace buffer
2997  * @skip: Number of functions to skip (helper handlers)
2998  */
2999 void trace_dump_stack(int skip)
3000 {
3001         unsigned long flags;
3002
3003         if (tracing_disabled || tracing_selftest_running)
3004                 return;
3005
3006         local_save_flags(flags);
3007
3008 #ifndef CONFIG_UNWINDER_ORC
3009         /* Skip 1 to skip this function. */
3010         skip++;
3011 #endif
3012         __ftrace_trace_stack(global_trace.trace_buffer.buffer,
3013                              flags, skip, preempt_count(), NULL);
3014 }
3015 EXPORT_SYMBOL_GPL(trace_dump_stack);
3016
3017 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3018 static DEFINE_PER_CPU(int, user_stack_count);
3019
3020 static void
3021 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
3022 {
3023         struct trace_event_call *call = &event_user_stack;
3024         struct ring_buffer_event *event;
3025         struct userstack_entry *entry;
3026
3027         if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
3028                 return;
3029
3030         /*
3031          * NMIs can not handle page faults, even with fix ups.
3032          * The save user stack can (and often does) fault.
3033          */
3034         if (unlikely(in_nmi()))
3035                 return;
3036
3037         /*
3038          * prevent recursion, since the user stack tracing may
3039          * trigger other kernel events.
3040          */
3041         preempt_disable();
3042         if (__this_cpu_read(user_stack_count))
3043                 goto out;
3044
3045         __this_cpu_inc(user_stack_count);
3046
3047         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3048                                             sizeof(*entry), flags, pc);
3049         if (!event)
3050                 goto out_drop_count;
3051         entry   = ring_buffer_event_data(event);
3052
3053         entry->tgid             = current->tgid;
3054         memset(&entry->caller, 0, sizeof(entry->caller));
3055
3056         stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3057         if (!call_filter_check_discard(call, entry, buffer, event))
3058                 __buffer_unlock_commit(buffer, event);
3059
3060  out_drop_count:
3061         __this_cpu_dec(user_stack_count);
3062  out:
3063         preempt_enable();
3064 }
3065 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3066 static void ftrace_trace_userstack(struct ring_buffer *buffer,
3067                                    unsigned long flags, int pc)
3068 {
3069 }
3070 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3071
3072 #endif /* CONFIG_STACKTRACE */
3073
3074 /* created for use with alloc_percpu */
3075 struct trace_buffer_struct {
3076         int nesting;
3077         char buffer[4][TRACE_BUF_SIZE];
3078 };
3079
3080 static struct trace_buffer_struct *trace_percpu_buffer;
3081
3082 /*
3083  * Thise allows for lockless recording.  If we're nested too deeply, then
3084  * this returns NULL.
3085  */
3086 static char *get_trace_buf(void)
3087 {
3088         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3089
3090         if (!buffer || buffer->nesting >= 4)
3091                 return NULL;
3092
3093         buffer->nesting++;
3094
3095         /* Interrupts must see nesting incremented before we use the buffer */
3096         barrier();
3097         return &buffer->buffer[buffer->nesting][0];
3098 }
3099
3100 static void put_trace_buf(void)
3101 {
3102         /* Don't let the decrement of nesting leak before this */
3103         barrier();
3104         this_cpu_dec(trace_percpu_buffer->nesting);
3105 }
3106
3107 static int alloc_percpu_trace_buffer(void)
3108 {
3109         struct trace_buffer_struct *buffers;
3110
3111         buffers = alloc_percpu(struct trace_buffer_struct);
3112         if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
3113                 return -ENOMEM;
3114
3115         trace_percpu_buffer = buffers;
3116         return 0;
3117 }
3118
3119 static int buffers_allocated;
3120
3121 void trace_printk_init_buffers(void)
3122 {
3123         if (buffers_allocated)
3124                 return;
3125
3126         if (alloc_percpu_trace_buffer())
3127                 return;
3128
3129         /* trace_printk() is for debug use only. Don't use it in production. */
3130
3131         pr_warn("\n");
3132         pr_warn("**********************************************************\n");
3133         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3134         pr_warn("**                                                      **\n");
3135         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3136         pr_warn("**                                                      **\n");
3137         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3138         pr_warn("** unsafe for production use.                           **\n");
3139         pr_warn("**                                                      **\n");
3140         pr_warn("** If you see this message and you are not debugging    **\n");
3141         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3142         pr_warn("**                                                      **\n");
3143         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3144         pr_warn("**********************************************************\n");
3145
3146         /* Expand the buffers to set size */
3147         tracing_update_buffers();
3148
3149         buffers_allocated = 1;
3150
3151         /*
3152          * trace_printk_init_buffers() can be called by modules.
3153          * If that happens, then we need to start cmdline recording
3154          * directly here. If the global_trace.buffer is already
3155          * allocated here, then this was called by module code.
3156          */
3157         if (global_trace.trace_buffer.buffer)
3158                 tracing_start_cmdline_record();
3159 }
3160 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3161
3162 void trace_printk_start_comm(void)
3163 {
3164         /* Start tracing comms if trace printk is set */
3165         if (!buffers_allocated)
3166                 return;
3167         tracing_start_cmdline_record();
3168 }
3169
3170 static void trace_printk_start_stop_comm(int enabled)
3171 {
3172         if (!buffers_allocated)
3173                 return;
3174
3175         if (enabled)
3176                 tracing_start_cmdline_record();
3177         else
3178                 tracing_stop_cmdline_record();
3179 }
3180
3181 /**
3182  * trace_vbprintk - write binary msg to tracing buffer
3183  * @ip:    The address of the caller
3184  * @fmt:   The string format to write to the buffer
3185  * @args:  Arguments for @fmt
3186  */
3187 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3188 {
3189         struct trace_event_call *call = &event_bprint;
3190         struct ring_buffer_event *event;
3191         struct ring_buffer *buffer;
3192         struct trace_array *tr = &global_trace;
3193         struct bprint_entry *entry;
3194         unsigned long flags;
3195         char *tbuffer;
3196         int len = 0, size, pc;
3197
3198         if (unlikely(tracing_selftest_running || tracing_disabled))
3199                 return 0;
3200
3201         /* Don't pollute graph traces with trace_vprintk internals */
3202         pause_graph_tracing();
3203
3204         pc = preempt_count();
3205         preempt_disable_notrace();
3206
3207         tbuffer = get_trace_buf();
3208         if (!tbuffer) {
3209                 len = 0;
3210                 goto out_nobuffer;
3211         }
3212
3213         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3214
3215         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3216                 goto out;
3217
3218         local_save_flags(flags);
3219         size = sizeof(*entry) + sizeof(u32) * len;
3220         buffer = tr->trace_buffer.buffer;
3221         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3222                                             flags, pc);
3223         if (!event)
3224                 goto out;
3225         entry = ring_buffer_event_data(event);
3226         entry->ip                       = ip;
3227         entry->fmt                      = fmt;
3228
3229         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3230         if (!call_filter_check_discard(call, entry, buffer, event)) {
3231                 __buffer_unlock_commit(buffer, event);
3232                 ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
3233         }
3234
3235 out:
3236         put_trace_buf();
3237
3238 out_nobuffer:
3239         preempt_enable_notrace();
3240         unpause_graph_tracing();
3241
3242         return len;
3243 }
3244 EXPORT_SYMBOL_GPL(trace_vbprintk);
3245
3246 __printf(3, 0)
3247 static int
3248 __trace_array_vprintk(struct ring_buffer *buffer,
3249                       unsigned long ip, const char *fmt, va_list args)
3250 {
3251         struct trace_event_call *call = &event_print;
3252         struct ring_buffer_event *event;
3253         int len = 0, size, pc;
3254         struct print_entry *entry;
3255         unsigned long flags;
3256         char *tbuffer;
3257
3258         if (tracing_disabled || tracing_selftest_running)
3259                 return 0;
3260
3261         /* Don't pollute graph traces with trace_vprintk internals */
3262         pause_graph_tracing();
3263
3264         pc = preempt_count();
3265         preempt_disable_notrace();
3266
3267
3268         tbuffer = get_trace_buf();
3269         if (!tbuffer) {
3270                 len = 0;
3271                 goto out_nobuffer;
3272         }
3273
3274         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3275
3276         local_save_flags(flags);
3277         size = sizeof(*entry) + len + 1;
3278         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3279                                             flags, pc);
3280         if (!event)
3281                 goto out;
3282         entry = ring_buffer_event_data(event);
3283         entry->ip = ip;
3284
3285         memcpy(&entry->buf, tbuffer, len + 1);
3286         if (!call_filter_check_discard(call, entry, buffer, event)) {
3287                 __buffer_unlock_commit(buffer, event);
3288                 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
3289         }
3290
3291 out:
3292         put_trace_buf();
3293
3294 out_nobuffer:
3295         preempt_enable_notrace();
3296         unpause_graph_tracing();
3297
3298         return len;
3299 }
3300
3301 __printf(3, 0)
3302 int trace_array_vprintk(struct trace_array *tr,
3303                         unsigned long ip, const char *fmt, va_list args)
3304 {
3305         return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
3306 }
3307
3308 __printf(3, 0)
3309 int trace_array_printk(struct trace_array *tr,
3310                        unsigned long ip, const char *fmt, ...)
3311 {
3312         int ret;
3313         va_list ap;
3314
3315         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3316                 return 0;
3317
3318         if (!tr)
3319                 return -ENOENT;
3320
3321         va_start(ap, fmt);
3322         ret = trace_array_vprintk(tr, ip, fmt, ap);
3323         va_end(ap);
3324         return ret;
3325 }
3326 EXPORT_SYMBOL_GPL(trace_array_printk);
3327
3328 __printf(3, 4)
3329 int trace_array_printk_buf(struct ring_buffer *buffer,
3330                            unsigned long ip, const char *fmt, ...)
3331 {
3332         int ret;
3333         va_list ap;
3334
3335         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3336                 return 0;
3337
3338         va_start(ap, fmt);
3339         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3340         va_end(ap);
3341         return ret;
3342 }
3343
3344 __printf(2, 0)
3345 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3346 {
3347         return trace_array_vprintk(&global_trace, ip, fmt, args);
3348 }
3349 EXPORT_SYMBOL_GPL(trace_vprintk);
3350
3351 static void trace_iterator_increment(struct trace_iterator *iter)
3352 {
3353         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3354
3355         iter->idx++;
3356         if (buf_iter)
3357                 ring_buffer_read(buf_iter, NULL);
3358 }
3359
3360 static struct trace_entry *
3361 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3362                 unsigned long *lost_events)
3363 {
3364         struct ring_buffer_event *event;
3365         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3366
3367         if (buf_iter)
3368                 event = ring_buffer_iter_peek(buf_iter, ts);
3369         else
3370                 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
3371                                          lost_events);
3372
3373         if (event) {
3374                 iter->ent_size = ring_buffer_event_length(event);
3375                 return ring_buffer_event_data(event);
3376         }
3377         iter->ent_size = 0;
3378         return NULL;
3379 }
3380
3381 static struct trace_entry *
3382 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3383                   unsigned long *missing_events, u64 *ent_ts)
3384 {
3385         struct ring_buffer *buffer = iter->trace_buffer->buffer;
3386         struct trace_entry *ent, *next = NULL;
3387         unsigned long lost_events = 0, next_lost = 0;
3388         int cpu_file = iter->cpu_file;
3389         u64 next_ts = 0, ts;
3390         int next_cpu = -1;
3391         int next_size = 0;
3392         int cpu;
3393
3394         /*
3395          * If we are in a per_cpu trace file, don't bother by iterating over
3396          * all cpu and peek directly.
3397          */
3398         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3399                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3400                         return NULL;
3401                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3402                 if (ent_cpu)
3403                         *ent_cpu = cpu_file;
3404
3405                 return ent;
3406         }
3407
3408         for_each_tracing_cpu(cpu) {
3409
3410                 if (ring_buffer_empty_cpu(buffer, cpu))
3411                         continue;
3412
3413                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3414
3415                 /*
3416                  * Pick the entry with the smallest timestamp:
3417                  */
3418                 if (ent && (!next || ts < next_ts)) {
3419                         next = ent;
3420                         next_cpu = cpu;
3421                         next_ts = ts;
3422                         next_lost = lost_events;
3423                         next_size = iter->ent_size;
3424                 }
3425         }
3426
3427         iter->ent_size = next_size;
3428
3429         if (ent_cpu)
3430                 *ent_cpu = next_cpu;
3431
3432         if (ent_ts)
3433                 *ent_ts = next_ts;
3434
3435         if (missing_events)
3436                 *missing_events = next_lost;
3437
3438         return next;
3439 }
3440
3441 /* Find the next real entry, without updating the iterator itself */
3442 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3443                                           int *ent_cpu, u64 *ent_ts)
3444 {
3445         return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3446 }
3447
3448 /* Find the next real entry, and increment the iterator to the next entry */
3449 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3450 {
3451         iter->ent = __find_next_entry(iter, &iter->cpu,
3452                                       &iter->lost_events, &iter->ts);
3453
3454         if (iter->ent)
3455                 trace_iterator_increment(iter);
3456
3457         return iter->ent ? iter : NULL;
3458 }
3459
3460 static void trace_consume(struct trace_iterator *iter)
3461 {
3462         ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
3463                             &iter->lost_events);
3464 }
3465
3466 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3467 {
3468         struct trace_iterator *iter = m->private;
3469         int i = (int)*pos;
3470         void *ent;
3471
3472         WARN_ON_ONCE(iter->leftover);
3473
3474         (*pos)++;
3475
3476         /* can't go backwards */
3477         if (iter->idx > i)
3478                 return NULL;
3479
3480         if (iter->idx < 0)
3481                 ent = trace_find_next_entry_inc(iter);
3482         else
3483                 ent = iter;
3484
3485         while (ent && iter->idx < i)
3486                 ent = trace_find_next_entry_inc(iter);
3487
3488         iter->pos = *pos;
3489
3490         return ent;
3491 }
3492
3493 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3494 {
3495         struct ring_buffer_event *event;
3496         struct ring_buffer_iter *buf_iter;
3497         unsigned long entries = 0;
3498         u64 ts;
3499
3500         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
3501
3502         buf_iter = trace_buffer_iter(iter, cpu);
3503         if (!buf_iter)
3504                 return;
3505
3506         ring_buffer_iter_reset(buf_iter);
3507
3508         /*
3509          * We could have the case with the max latency tracers
3510          * that a reset never took place on a cpu. This is evident
3511          * by the timestamp being before the start of the buffer.
3512          */
3513         while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
3514                 if (ts >= iter->trace_buffer->time_start)
3515                         break;
3516                 entries++;
3517                 ring_buffer_read(buf_iter, NULL);
3518         }
3519
3520         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
3521 }
3522
3523 /*
3524  * The current tracer is copied to avoid a global locking
3525  * all around.
3526  */
3527 static void *s_start(struct seq_file *m, loff_t *pos)
3528 {
3529         struct trace_iterator *iter = m->private;
3530         struct trace_array *tr = iter->tr;
3531         int cpu_file = iter->cpu_file;
3532         void *p = NULL;
3533         loff_t l = 0;
3534         int cpu;
3535
3536         /*
3537          * copy the tracer to avoid using a global lock all around.
3538          * iter->trace is a copy of current_trace, the pointer to the
3539          * name may be used instead of a strcmp(), as iter->trace->name
3540          * will point to the same string as current_trace->name.
3541          */
3542         mutex_lock(&trace_types_lock);
3543         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3544                 *iter->trace = *tr->current_trace;
3545         mutex_unlock(&trace_types_lock);
3546
3547 #ifdef CONFIG_TRACER_MAX_TRACE
3548         if (iter->snapshot && iter->trace->use_max_tr)
3549                 return ERR_PTR(-EBUSY);
3550 #endif
3551
3552         if (!iter->snapshot)
3553                 atomic_inc(&trace_record_taskinfo_disabled);
3554
3555         if (*pos != iter->pos) {
3556                 iter->ent = NULL;
3557                 iter->cpu = 0;
3558                 iter->idx = -1;
3559
3560                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
3561                         for_each_tracing_cpu(cpu)
3562                                 tracing_iter_reset(iter, cpu);
3563                 } else
3564                         tracing_iter_reset(iter, cpu_file);
3565
3566                 iter->leftover = 0;
3567                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3568                         ;
3569
3570         } else {
3571                 /*
3572                  * If we overflowed the seq_file before, then we want
3573                  * to just reuse the trace_seq buffer again.
3574                  */
3575                 if (iter->leftover)
3576                         p = iter;
3577                 else {
3578                         l = *pos - 1;
3579                         p = s_next(m, p, &l);
3580                 }
3581         }
3582
3583         trace_event_read_lock();
3584         trace_access_lock(cpu_file);
3585         return p;
3586 }
3587
3588 static void s_stop(struct seq_file *m, void *p)
3589 {
3590         struct trace_iterator *iter = m->private;
3591
3592 #ifdef CONFIG_TRACER_MAX_TRACE
3593         if (iter->snapshot && iter->trace->use_max_tr)
3594                 return;
3595 #endif
3596
3597         if (!iter->snapshot)
3598                 atomic_dec(&trace_record_taskinfo_disabled);
3599
3600         trace_access_unlock(iter->cpu_file);
3601         trace_event_read_unlock();
3602 }
3603
3604 static void
3605 get_total_entries_cpu(struct trace_buffer *buf, unsigned long *total,
3606                       unsigned long *entries, int cpu)
3607 {
3608         unsigned long count;
3609
3610         count = ring_buffer_entries_cpu(buf->buffer, cpu);
3611         /*
3612          * If this buffer has skipped entries, then we hold all
3613          * entries for the trace and we need to ignore the
3614          * ones before the time stamp.
3615          */
3616         if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3617                 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3618                 /* total is the same as the entries */
3619                 *total = count;
3620         } else
3621                 *total = count +
3622                         ring_buffer_overrun_cpu(buf->buffer, cpu);
3623         *entries = count;
3624 }
3625
3626 static void
3627 get_total_entries(struct trace_buffer *buf,
3628                   unsigned long *total, unsigned long *entries)
3629 {
3630         unsigned long t, e;
3631         int cpu;
3632
3633         *total = 0;
3634         *entries = 0;
3635
3636         for_each_tracing_cpu(cpu) {
3637                 get_total_entries_cpu(buf, &t, &e, cpu);
3638                 *total += t;
3639                 *entries += e;
3640         }
3641 }
3642
3643 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
3644 {
3645         unsigned long total, entries;
3646
3647         if (!tr)
3648                 tr = &global_trace;
3649
3650         get_total_entries_cpu(&tr->trace_buffer, &total, &entries, cpu);
3651
3652         return entries;
3653 }
3654
3655 unsigned long trace_total_entries(struct trace_array *tr)
3656 {
3657         unsigned long total, entries;
3658
3659         if (!tr)
3660                 tr = &global_trace;
3661
3662         get_total_entries(&tr->trace_buffer, &total, &entries);
3663
3664         return entries;
3665 }
3666
3667 static void print_lat_help_header(struct seq_file *m)
3668 {
3669         seq_puts(m, "#                  _------=> CPU#            \n"
3670                     "#                 / _-----=> irqs-off        \n"
3671                     "#                | / _----=> need-resched    \n"
3672                     "#                || / _---=> hardirq/softirq \n"
3673                     "#                ||| / _--=> preempt-depth   \n"
3674                     "#                |||| /     delay            \n"
3675                     "#  cmd     pid   ||||| time  |   caller      \n"
3676                     "#     \\   /      |||||  \\    |   /         \n");
3677 }
3678
3679 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
3680 {
3681         unsigned long total;
3682         unsigned long entries;
3683
3684         get_total_entries(buf, &total, &entries);
3685         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3686                    entries, total, num_online_cpus());
3687         seq_puts(m, "#\n");
3688 }
3689
3690 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m,
3691                                    unsigned int flags)
3692 {
3693         bool tgid = flags & TRACE_ITER_RECORD_TGID;
3694
3695         print_event_info(buf, m);
3696
3697         seq_printf(m, "#           TASK-PID   %s  CPU#   TIMESTAMP  FUNCTION\n", tgid ? "TGID     " : "");
3698         seq_printf(m, "#              | |     %s    |       |         |\n",      tgid ? "  |      " : "");
3699 }
3700
3701 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m,
3702                                        unsigned int flags)
3703 {
3704         bool tgid = flags & TRACE_ITER_RECORD_TGID;
3705         const char *space = "          ";
3706         int prec = tgid ? 10 : 2;
3707
3708         print_event_info(buf, m);
3709
3710         seq_printf(m, "#                          %.*s  _-----=> irqs-off\n", prec, space);
3711         seq_printf(m, "#                          %.*s / _----=> need-resched\n", prec, space);
3712         seq_printf(m, "#                          %.*s| / _---=> hardirq/softirq\n", prec, space);
3713         seq_printf(m, "#                          %.*s|| / _--=> preempt-depth\n", prec, space);
3714         seq_printf(m, "#                          %.*s||| /     delay\n", prec, space);
3715         seq_printf(m, "#           TASK-PID %.*sCPU#  ||||    TIMESTAMP  FUNCTION\n", prec, "   TGID   ");
3716         seq_printf(m, "#              | |   %.*s  |   ||||       |         |\n", prec, "     |    ");
3717 }
3718
3719 void
3720 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3721 {
3722         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3723         struct trace_buffer *buf = iter->trace_buffer;
3724         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3725         struct tracer *type = iter->trace;
3726         unsigned long entries;
3727         unsigned long total;
3728         const char *name = "preemption";
3729
3730         name = type->name;
3731
3732         get_total_entries(buf, &total, &entries);
3733
3734         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3735                    name, UTS_RELEASE);
3736         seq_puts(m, "# -----------------------------------"
3737                  "---------------------------------\n");
3738         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3739                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3740                    nsecs_to_usecs(data->saved_latency),
3741                    entries,
3742                    total,
3743                    buf->cpu,
3744 #if defined(CONFIG_PREEMPT_NONE)
3745                    "server",
3746 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3747                    "desktop",
3748 #elif defined(CONFIG_PREEMPT)
3749                    "preempt",
3750 #elif defined(CONFIG_PREEMPT_RT)
3751                    "preempt_rt",
3752 #else
3753                    "unknown",
3754 #endif
3755                    /* These are reserved for later use */
3756                    0, 0, 0, 0);
3757 #ifdef CONFIG_SMP
3758         seq_printf(m, " #P:%d)\n", num_online_cpus());
3759 #else
3760         seq_puts(m, ")\n");
3761 #endif
3762         seq_puts(m, "#    -----------------\n");
3763         seq_printf(m, "#    | task: %.16s-%d "
3764                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3765                    data->comm, data->pid,
3766                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3767                    data->policy, data->rt_priority);
3768         seq_puts(m, "#    -----------------\n");
3769
3770         if (data->critical_start) {
3771                 seq_puts(m, "#  => started at: ");
3772                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3773                 trace_print_seq(m, &iter->seq);
3774                 seq_puts(m, "\n#  => ended at:   ");
3775                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3776                 trace_print_seq(m, &iter->seq);
3777                 seq_puts(m, "\n#\n");
3778         }
3779
3780         seq_puts(m, "#\n");
3781 }
3782
3783 static void test_cpu_buff_start(struct trace_iterator *iter)
3784 {
3785         struct trace_seq *s = &iter->seq;
3786         struct trace_array *tr = iter->tr;
3787
3788         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3789                 return;
3790
3791         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3792                 return;
3793
3794         if (cpumask_available(iter->started) &&
3795             cpumask_test_cpu(iter->cpu, iter->started))
3796                 return;
3797
3798         if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3799                 return;
3800
3801         if (cpumask_available(iter->started))
3802                 cpumask_set_cpu(iter->cpu, iter->started);
3803
3804         /* Don't print started cpu buffer for the first entry of the trace */
3805         if (iter->idx > 1)
3806                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3807                                 iter->cpu);
3808 }
3809
3810 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3811 {
3812         struct trace_array *tr = iter->tr;
3813         struct trace_seq *s = &iter->seq;
3814         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3815         struct trace_entry *entry;
3816         struct trace_event *event;
3817
3818         entry = iter->ent;
3819
3820         test_cpu_buff_start(iter);
3821
3822         event = ftrace_find_event(entry->type);
3823
3824         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3825                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3826                         trace_print_lat_context(iter);
3827                 else
3828                         trace_print_context(iter);
3829         }
3830
3831         if (trace_seq_has_overflowed(s))
3832                 return TRACE_TYPE_PARTIAL_LINE;
3833
3834         if (event)
3835                 return event->funcs->trace(iter, sym_flags, event);
3836
3837         trace_seq_printf(s, "Unknown type %d\n", entry->type);
3838
3839         return trace_handle_return(s);
3840 }
3841
3842 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3843 {
3844         struct trace_array *tr = iter->tr;
3845         struct trace_seq *s = &iter->seq;
3846         struct trace_entry *entry;
3847         struct trace_event *event;
3848
3849         entry = iter->ent;
3850
3851         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3852                 trace_seq_printf(s, "%d %d %llu ",
3853                                  entry->pid, iter->cpu, iter->ts);
3854
3855         if (trace_seq_has_overflowed(s))
3856                 return TRACE_TYPE_PARTIAL_LINE;
3857
3858         event = ftrace_find_event(entry->type);
3859         if (event)
3860                 return event->funcs->raw(iter, 0, event);
3861
3862         trace_seq_printf(s, "%d ?\n", entry->type);
3863
3864         return trace_handle_return(s);
3865 }
3866
3867 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3868 {
3869         struct trace_array *tr = iter->tr;
3870         struct trace_seq *s = &iter->seq;
3871         unsigned char newline = '\n';
3872         struct trace_entry *entry;
3873         struct trace_event *event;
3874
3875         entry = iter->ent;
3876
3877         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3878                 SEQ_PUT_HEX_FIELD(s, entry->pid);
3879                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
3880                 SEQ_PUT_HEX_FIELD(s, iter->ts);
3881                 if (trace_seq_has_overflowed(s))
3882                         return TRACE_TYPE_PARTIAL_LINE;
3883         }
3884
3885         event = ftrace_find_event(entry->type);
3886         if (event) {
3887                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
3888                 if (ret != TRACE_TYPE_HANDLED)
3889                         return ret;
3890         }
3891
3892         SEQ_PUT_FIELD(s, newline);
3893
3894         return trace_handle_return(s);
3895 }
3896
3897 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3898 {
3899         struct trace_array *tr = iter->tr;
3900         struct trace_seq *s = &iter->seq;
3901         struct trace_entry *entry;
3902         struct trace_event *event;
3903
3904         entry = iter->ent;
3905
3906         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3907                 SEQ_PUT_FIELD(s, entry->pid);
3908                 SEQ_PUT_FIELD(s, iter->cpu);
3909                 SEQ_PUT_FIELD(s, iter->ts);
3910                 if (trace_seq_has_overflowed(s))
3911                         return TRACE_TYPE_PARTIAL_LINE;
3912         }
3913
3914         event = ftrace_find_event(entry->type);
3915         return event ? event->funcs->binary(iter, 0, event) :
3916                 TRACE_TYPE_HANDLED;
3917 }
3918
3919 int trace_empty(struct trace_iterator *iter)
3920 {
3921         struct ring_buffer_iter *buf_iter;
3922         int cpu;
3923
3924         /* If we are looking at one CPU buffer, only check that one */
3925         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3926                 cpu = iter->cpu_file;
3927                 buf_iter = trace_buffer_iter(iter, cpu);
3928                 if (buf_iter) {
3929                         if (!ring_buffer_iter_empty(buf_iter))
3930                                 return 0;
3931                 } else {
3932                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3933                                 return 0;
3934                 }
3935                 return 1;
3936         }
3937
3938         for_each_tracing_cpu(cpu) {
3939                 buf_iter = trace_buffer_iter(iter, cpu);
3940                 if (buf_iter) {
3941                         if (!ring_buffer_iter_empty(buf_iter))
3942                                 return 0;
3943                 } else {
3944                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3945                                 return 0;
3946                 }
3947         }
3948
3949         return 1;
3950 }
3951
3952 /*  Called with trace_event_read_lock() held. */
3953 enum print_line_t print_trace_line(struct trace_iterator *iter)
3954 {
3955         struct trace_array *tr = iter->tr;
3956         unsigned long trace_flags = tr->trace_flags;
3957         enum print_line_t ret;
3958
3959         if (iter->lost_events) {
3960                 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3961                                  iter->cpu, iter->lost_events);
3962                 if (trace_seq_has_overflowed(&iter->seq))
3963                         return TRACE_TYPE_PARTIAL_LINE;
3964         }
3965
3966         if (iter->trace && iter->trace->print_line) {
3967                 ret = iter->trace->print_line(iter);
3968                 if (ret != TRACE_TYPE_UNHANDLED)
3969                         return ret;
3970         }
3971
3972         if (iter->ent->type == TRACE_BPUTS &&
3973                         trace_flags & TRACE_ITER_PRINTK &&
3974                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3975                 return trace_print_bputs_msg_only(iter);
3976
3977         if (iter->ent->type == TRACE_BPRINT &&
3978                         trace_flags & TRACE_ITER_PRINTK &&
3979                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3980                 return trace_print_bprintk_msg_only(iter);
3981
3982         if (iter->ent->type == TRACE_PRINT &&
3983                         trace_flags & TRACE_ITER_PRINTK &&
3984                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3985                 return trace_print_printk_msg_only(iter);
3986
3987         if (trace_flags & TRACE_ITER_BIN)
3988                 return print_bin_fmt(iter);
3989
3990         if (trace_flags & TRACE_ITER_HEX)
3991                 return print_hex_fmt(iter);
3992
3993         if (trace_flags & TRACE_ITER_RAW)
3994                 return print_raw_fmt(iter);
3995
3996         return print_trace_fmt(iter);
3997 }
3998
3999 void trace_latency_header(struct seq_file *m)
4000 {
4001         struct trace_iterator *iter = m->private;
4002         struct trace_array *tr = iter->tr;
4003
4004         /* print nothing if the buffers are empty */
4005         if (trace_empty(iter))
4006                 return;
4007
4008         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4009                 print_trace_header(m, iter);
4010
4011         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4012                 print_lat_help_header(m);
4013 }
4014
4015 void trace_default_header(struct seq_file *m)
4016 {
4017         struct trace_iterator *iter = m->private;
4018         struct trace_array *tr = iter->tr;
4019         unsigned long trace_flags = tr->trace_flags;
4020
4021         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4022                 return;
4023
4024         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4025                 /* print nothing if the buffers are empty */
4026                 if (trace_empty(iter))
4027                         return;
4028                 print_trace_header(m, iter);
4029                 if (!(trace_flags & TRACE_ITER_VERBOSE))
4030                         print_lat_help_header(m);
4031         } else {
4032                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4033                         if (trace_flags & TRACE_ITER_IRQ_INFO)
4034                                 print_func_help_header_irq(iter->trace_buffer,
4035                                                            m, trace_flags);
4036                         else
4037                                 print_func_help_header(iter->trace_buffer, m,
4038                                                        trace_flags);
4039                 }
4040         }
4041 }
4042
4043 static void test_ftrace_alive(struct seq_file *m)
4044 {
4045         if (!ftrace_is_dead())
4046                 return;
4047         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4048                     "#          MAY BE MISSING FUNCTION EVENTS\n");
4049 }
4050
4051 #ifdef CONFIG_TRACER_MAX_TRACE
4052 static void show_snapshot_main_help(struct seq_file *m)
4053 {
4054         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4055                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4056                     "#                      Takes a snapshot of the main buffer.\n"
4057                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4058                     "#                      (Doesn't have to be '2' works with any number that\n"
4059                     "#                       is not a '0' or '1')\n");
4060 }
4061
4062 static void show_snapshot_percpu_help(struct seq_file *m)
4063 {
4064         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4065 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4066         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4067                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
4068 #else
4069         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4070                     "#                     Must use main snapshot file to allocate.\n");
4071 #endif
4072         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4073                     "#                      (Doesn't have to be '2' works with any number that\n"
4074                     "#                       is not a '0' or '1')\n");
4075 }
4076
4077 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4078 {
4079         if (iter->tr->allocated_snapshot)
4080                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4081         else
4082                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4083
4084         seq_puts(m, "# Snapshot commands:\n");
4085         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4086                 show_snapshot_main_help(m);
4087         else
4088                 show_snapshot_percpu_help(m);
4089 }
4090 #else
4091 /* Should never be called */
4092 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4093 #endif
4094
4095 static int s_show(struct seq_file *m, void *v)
4096 {
4097         struct trace_iterator *iter = v;
4098         int ret;
4099
4100         if (iter->ent == NULL) {
4101                 if (iter->tr) {
4102                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
4103                         seq_puts(m, "#\n");
4104                         test_ftrace_alive(m);
4105                 }
4106                 if (iter->snapshot && trace_empty(iter))
4107                         print_snapshot_help(m, iter);
4108                 else if (iter->trace && iter->trace->print_header)
4109                         iter->trace->print_header(m);
4110                 else
4111                         trace_default_header(m);
4112
4113         } else if (iter->leftover) {
4114                 /*
4115                  * If we filled the seq_file buffer earlier, we
4116                  * want to just show it now.
4117                  */
4118                 ret = trace_print_seq(m, &iter->seq);
4119
4120                 /* ret should this time be zero, but you never know */
4121                 iter->leftover = ret;
4122
4123         } else {
4124                 print_trace_line(iter);
4125                 ret = trace_print_seq(m, &iter->seq);
4126                 /*
4127                  * If we overflow the seq_file buffer, then it will
4128                  * ask us for this data again at start up.
4129                  * Use that instead.
4130                  *  ret is 0 if seq_file write succeeded.
4131                  *        -1 otherwise.
4132                  */
4133                 iter->leftover = ret;
4134         }
4135
4136         return 0;
4137 }
4138
4139 /*
4140  * Should be used after trace_array_get(), trace_types_lock
4141  * ensures that i_cdev was already initialized.
4142  */
4143 static inline int tracing_get_cpu(struct inode *inode)
4144 {
4145         if (inode->i_cdev) /* See trace_create_cpu_file() */
4146                 return (long)inode->i_cdev - 1;
4147         return RING_BUFFER_ALL_CPUS;
4148 }
4149
4150 static const struct seq_operations tracer_seq_ops = {
4151         .start          = s_start,
4152         .next           = s_next,
4153         .stop           = s_stop,
4154         .show           = s_show,
4155 };
4156
4157 static struct trace_iterator *
4158 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4159 {
4160         struct trace_array *tr = inode->i_private;
4161         struct trace_iterator *iter;
4162         int cpu;
4163
4164         if (tracing_disabled)
4165                 return ERR_PTR(-ENODEV);
4166
4167         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4168         if (!iter)
4169                 return ERR_PTR(-ENOMEM);
4170
4171         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4172                                     GFP_KERNEL);
4173         if (!iter->buffer_iter)
4174                 goto release;
4175
4176         /*
4177          * We make a copy of the current tracer to avoid concurrent
4178          * changes on it while we are reading.
4179          */
4180         mutex_lock(&trace_types_lock);
4181         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4182         if (!iter->trace)
4183                 goto fail;
4184
4185         *iter->trace = *tr->current_trace;
4186
4187         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4188                 goto fail;
4189
4190         iter->tr = tr;
4191
4192 #ifdef CONFIG_TRACER_MAX_TRACE
4193         /* Currently only the top directory has a snapshot */
4194         if (tr->current_trace->print_max || snapshot)
4195                 iter->trace_buffer = &tr->max_buffer;
4196         else
4197 #endif
4198                 iter->trace_buffer = &tr->trace_buffer;
4199         iter->snapshot = snapshot;
4200         iter->pos = -1;
4201         iter->cpu_file = tracing_get_cpu(inode);
4202         mutex_init(&iter->mutex);
4203
4204         /* Notify the tracer early; before we stop tracing. */
4205         if (iter->trace && iter->trace->open)
4206                 iter->trace->open(iter);
4207
4208         /* Annotate start of buffers if we had overruns */
4209         if (ring_buffer_overruns(iter->trace_buffer->buffer))
4210                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4211
4212         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4213         if (trace_clocks[tr->clock_id].in_ns)
4214                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4215
4216         /* stop the trace while dumping if we are not opening "snapshot" */
4217         if (!iter->snapshot)
4218                 tracing_stop_tr(tr);
4219
4220         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4221                 for_each_tracing_cpu(cpu) {
4222                         iter->buffer_iter[cpu] =
4223                                 ring_buffer_read_prepare(iter->trace_buffer->buffer,
4224                                                          cpu, GFP_KERNEL);
4225                 }
4226                 ring_buffer_read_prepare_sync();
4227                 for_each_tracing_cpu(cpu) {
4228                         ring_buffer_read_start(iter->buffer_iter[cpu]);
4229                         tracing_iter_reset(iter, cpu);
4230                 }
4231         } else {
4232                 cpu = iter->cpu_file;
4233                 iter->buffer_iter[cpu] =
4234                         ring_buffer_read_prepare(iter->trace_buffer->buffer,
4235                                                  cpu, GFP_KERNEL);
4236                 ring_buffer_read_prepare_sync();
4237                 ring_buffer_read_start(iter->buffer_iter[cpu]);
4238                 tracing_iter_reset(iter, cpu);
4239         }
4240
4241         mutex_unlock(&trace_types_lock);
4242
4243         return iter;
4244
4245  fail:
4246         mutex_unlock(&trace_types_lock);
4247         kfree(iter->trace);
4248         kfree(iter->buffer_iter);
4249 release:
4250         seq_release_private(inode, file);
4251         return ERR_PTR(-ENOMEM);
4252 }
4253
4254 int tracing_open_generic(struct inode *inode, struct file *filp)
4255 {
4256         int ret;
4257
4258         ret = tracing_check_open_get_tr(NULL);
4259         if (ret)
4260                 return ret;
4261
4262         filp->private_data = inode->i_private;
4263         return 0;
4264 }
4265
4266 bool tracing_is_disabled(void)
4267 {
4268         return (tracing_disabled) ? true: false;
4269 }
4270
4271 /*
4272  * Open and update trace_array ref count.
4273  * Must have the current trace_array passed to it.
4274  */
4275 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4276 {
4277         struct trace_array *tr = inode->i_private;
4278         int ret;
4279
4280         ret = tracing_check_open_get_tr(tr);
4281         if (ret)
4282                 return ret;
4283
4284         filp->private_data = inode->i_private;
4285
4286         return 0;
4287 }
4288
4289 static int tracing_release(struct inode *inode, struct file *file)
4290 {
4291         struct trace_array *tr = inode->i_private;
4292         struct seq_file *m = file->private_data;
4293         struct trace_iterator *iter;
4294         int cpu;
4295
4296         if (!(file->f_mode & FMODE_READ)) {
4297                 trace_array_put(tr);
4298                 return 0;
4299         }
4300
4301         /* Writes do not use seq_file */
4302         iter = m->private;
4303         mutex_lock(&trace_types_lock);
4304
4305         for_each_tracing_cpu(cpu) {
4306                 if (iter->buffer_iter[cpu])
4307                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
4308         }
4309
4310         if (iter->trace && iter->trace->close)
4311                 iter->trace->close(iter);
4312
4313         if (!iter->snapshot)
4314                 /* reenable tracing if it was previously enabled */
4315                 tracing_start_tr(tr);
4316
4317         __trace_array_put(tr);
4318
4319         mutex_unlock(&trace_types_lock);
4320
4321         mutex_destroy(&iter->mutex);
4322         free_cpumask_var(iter->started);
4323         kfree(iter->trace);
4324         kfree(iter->buffer_iter);
4325         seq_release_private(inode, file);
4326
4327         return 0;
4328 }
4329
4330 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4331 {
4332         struct trace_array *tr = inode->i_private;
4333
4334         trace_array_put(tr);
4335         return 0;
4336 }
4337
4338 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4339 {
4340         struct trace_array *tr = inode->i_private;
4341
4342         trace_array_put(tr);
4343
4344         return single_release(inode, file);
4345 }
4346
4347 static int tracing_open(struct inode *inode, struct file *file)
4348 {
4349         struct trace_array *tr = inode->i_private;
4350         struct trace_iterator *iter;
4351         int ret;
4352
4353         ret = tracing_check_open_get_tr(tr);
4354         if (ret)
4355                 return ret;
4356
4357         /* If this file was open for write, then erase contents */
4358         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4359                 int cpu = tracing_get_cpu(inode);
4360                 struct trace_buffer *trace_buf = &tr->trace_buffer;
4361
4362 #ifdef CONFIG_TRACER_MAX_TRACE
4363                 if (tr->current_trace->print_max)
4364                         trace_buf = &tr->max_buffer;
4365 #endif
4366
4367                 if (cpu == RING_BUFFER_ALL_CPUS)
4368                         tracing_reset_online_cpus(trace_buf);
4369                 else
4370                         tracing_reset_cpu(trace_buf, cpu);
4371         }
4372
4373         if (file->f_mode & FMODE_READ) {
4374                 iter = __tracing_open(inode, file, false);
4375                 if (IS_ERR(iter))
4376                         ret = PTR_ERR(iter);
4377                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4378                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
4379         }
4380
4381         if (ret < 0)
4382                 trace_array_put(tr);
4383
4384         return ret;
4385 }
4386
4387 /*
4388  * Some tracers are not suitable for instance buffers.
4389  * A tracer is always available for the global array (toplevel)
4390  * or if it explicitly states that it is.
4391  */
4392 static bool
4393 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4394 {
4395         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4396 }
4397
4398 /* Find the next tracer that this trace array may use */
4399 static struct tracer *
4400 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4401 {
4402         while (t && !trace_ok_for_array(t, tr))
4403                 t = t->next;
4404
4405         return t;
4406 }
4407
4408 static void *
4409 t_next(struct seq_file *m, void *v, loff_t *pos)
4410 {
4411         struct trace_array *tr = m->private;
4412         struct tracer *t = v;
4413
4414         (*pos)++;
4415
4416         if (t)
4417                 t = get_tracer_for_array(tr, t->next);
4418
4419         return t;
4420 }
4421
4422 static void *t_start(struct seq_file *m, loff_t *pos)
4423 {
4424         struct trace_array *tr = m->private;
4425         struct tracer *t;
4426         loff_t l = 0;
4427
4428         mutex_lock(&trace_types_lock);
4429
4430         t = get_tracer_for_array(tr, trace_types);
4431         for (; t && l < *pos; t = t_next(m, t, &l))
4432                         ;
4433
4434         return t;
4435 }
4436
4437 static void t_stop(struct seq_file *m, void *p)
4438 {
4439         mutex_unlock(&trace_types_lock);
4440 }
4441
4442 static int t_show(struct seq_file *m, void *v)
4443 {
4444         struct tracer *t = v;
4445
4446         if (!t)
4447                 return 0;
4448
4449         seq_puts(m, t->name);
4450         if (t->next)
4451                 seq_putc(m, ' ');
4452         else
4453                 seq_putc(m, '\n');
4454
4455         return 0;
4456 }
4457
4458 static const struct seq_operations show_traces_seq_ops = {
4459         .start          = t_start,
4460         .next           = t_next,
4461         .stop           = t_stop,
4462         .show           = t_show,
4463 };
4464
4465 static int show_traces_open(struct inode *inode, struct file *file)
4466 {
4467         struct trace_array *tr = inode->i_private;
4468         struct seq_file *m;
4469         int ret;
4470
4471         ret = tracing_check_open_get_tr(tr);
4472         if (ret)
4473                 return ret;
4474
4475         ret = seq_open(file, &show_traces_seq_ops);
4476         if (ret) {
4477                 trace_array_put(tr);
4478                 return ret;
4479         }
4480
4481         m = file->private_data;
4482         m->private = tr;
4483
4484         return 0;
4485 }
4486
4487 static int show_traces_release(struct inode *inode, struct file *file)
4488 {
4489         struct trace_array *tr = inode->i_private;
4490
4491         trace_array_put(tr);
4492         return seq_release(inode, file);
4493 }
4494
4495 static ssize_t
4496 tracing_write_stub(struct file *filp, const char __user *ubuf,
4497                    size_t count, loff_t *ppos)
4498 {
4499         return count;
4500 }
4501
4502 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4503 {
4504         int ret;
4505
4506         if (file->f_mode & FMODE_READ)
4507                 ret = seq_lseek(file, offset, whence);
4508         else
4509                 file->f_pos = ret = 0;
4510
4511         return ret;
4512 }
4513
4514 static const struct file_operations tracing_fops = {
4515         .open           = tracing_open,
4516         .read           = seq_read,
4517         .write          = tracing_write_stub,
4518         .llseek         = tracing_lseek,
4519         .release        = tracing_release,
4520 };
4521
4522 static const struct file_operations show_traces_fops = {
4523         .open           = show_traces_open,
4524         .read           = seq_read,
4525         .llseek         = seq_lseek,
4526         .release        = show_traces_release,
4527 };
4528
4529 static ssize_t
4530 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4531                      size_t count, loff_t *ppos)
4532 {
4533         struct trace_array *tr = file_inode(filp)->i_private;
4534         char *mask_str;
4535         int len;
4536
4537         len = snprintf(NULL, 0, "%*pb\n",
4538                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
4539         mask_str = kmalloc(len, GFP_KERNEL);
4540         if (!mask_str)
4541                 return -ENOMEM;
4542
4543         len = snprintf(mask_str, len, "%*pb\n",
4544                        cpumask_pr_args(tr->tracing_cpumask));
4545         if (len >= count) {
4546                 count = -EINVAL;
4547                 goto out_err;
4548         }
4549         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4550
4551 out_err:
4552         kfree(mask_str);
4553
4554         return count;
4555 }
4556
4557 static ssize_t
4558 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4559                       size_t count, loff_t *ppos)
4560 {
4561         struct trace_array *tr = file_inode(filp)->i_private;
4562         cpumask_var_t tracing_cpumask_new;
4563         int err, cpu;
4564
4565         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4566                 return -ENOMEM;
4567
4568         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4569         if (err)
4570                 goto err_unlock;
4571
4572         local_irq_disable();
4573         arch_spin_lock(&tr->max_lock);
4574         for_each_tracing_cpu(cpu) {
4575                 /*
4576                  * Increase/decrease the disabled counter if we are
4577                  * about to flip a bit in the cpumask:
4578                  */
4579                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4580                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4581                         atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4582                         ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
4583                 }
4584                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4585                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4586                         atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4587                         ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
4588                 }
4589         }
4590         arch_spin_unlock(&tr->max_lock);
4591         local_irq_enable();
4592
4593         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4594         free_cpumask_var(tracing_cpumask_new);
4595
4596         return count;
4597
4598 err_unlock:
4599         free_cpumask_var(tracing_cpumask_new);
4600
4601         return err;
4602 }
4603
4604 static const struct file_operations tracing_cpumask_fops = {
4605         .open           = tracing_open_generic_tr,
4606         .read           = tracing_cpumask_read,
4607         .write          = tracing_cpumask_write,
4608         .release        = tracing_release_generic_tr,
4609         .llseek         = generic_file_llseek,
4610 };
4611
4612 static int tracing_trace_options_show(struct seq_file *m, void *v)
4613 {
4614         struct tracer_opt *trace_opts;
4615         struct trace_array *tr = m->private;
4616         u32 tracer_flags;
4617         int i;
4618
4619         mutex_lock(&trace_types_lock);
4620         tracer_flags = tr->current_trace->flags->val;
4621         trace_opts = tr->current_trace->flags->opts;
4622
4623         for (i = 0; trace_options[i]; i++) {
4624                 if (tr->trace_flags & (1 << i))
4625                         seq_printf(m, "%s\n", trace_options[i]);
4626                 else
4627                         seq_printf(m, "no%s\n", trace_options[i]);
4628         }
4629
4630         for (i = 0; trace_opts[i].name; i++) {
4631                 if (tracer_flags & trace_opts[i].bit)
4632                         seq_printf(m, "%s\n", trace_opts[i].name);
4633                 else
4634                         seq_printf(m, "no%s\n", trace_opts[i].name);
4635         }
4636         mutex_unlock(&trace_types_lock);
4637
4638         return 0;
4639 }
4640
4641 static int __set_tracer_option(struct trace_array *tr,
4642                                struct tracer_flags *tracer_flags,
4643                                struct tracer_opt *opts, int neg)
4644 {
4645         struct tracer *trace = tracer_flags->trace;
4646         int ret;
4647
4648         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4649         if (ret)
4650                 return ret;
4651
4652         if (neg)
4653                 tracer_flags->val &= ~opts->bit;
4654         else
4655                 tracer_flags->val |= opts->bit;
4656         return 0;
4657 }
4658
4659 /* Try to assign a tracer specific option */
4660 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4661 {
4662         struct tracer *trace = tr->current_trace;
4663         struct tracer_flags *tracer_flags = trace->flags;
4664         struct tracer_opt *opts = NULL;
4665         int i;
4666
4667         for (i = 0; tracer_flags->opts[i].name; i++) {
4668                 opts = &tracer_flags->opts[i];
4669
4670                 if (strcmp(cmp, opts->name) == 0)
4671                         return __set_tracer_option(tr, trace->flags, opts, neg);
4672         }
4673
4674         return -EINVAL;
4675 }
4676
4677 /* Some tracers require overwrite to stay enabled */
4678 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4679 {
4680         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4681                 return -1;
4682
4683         return 0;
4684 }
4685
4686 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4687 {
4688         /* do nothing if flag is already set */
4689         if (!!(tr->trace_flags & mask) == !!enabled)
4690                 return 0;
4691
4692         /* Give the tracer a chance to approve the change */
4693         if (tr->current_trace->flag_changed)
4694                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4695                         return -EINVAL;
4696
4697         if (enabled)
4698                 tr->trace_flags |= mask;
4699         else
4700                 tr->trace_flags &= ~mask;
4701
4702         if (mask == TRACE_ITER_RECORD_CMD)
4703                 trace_event_enable_cmd_record(enabled);
4704
4705         if (mask == TRACE_ITER_RECORD_TGID) {
4706                 if (!tgid_map)
4707                         tgid_map = kvcalloc(PID_MAX_DEFAULT + 1,
4708                                            sizeof(*tgid_map),
4709                                            GFP_KERNEL);
4710                 if (!tgid_map) {
4711                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4712                         return -ENOMEM;
4713                 }
4714
4715                 trace_event_enable_tgid_record(enabled);
4716         }
4717
4718         if (mask == TRACE_ITER_EVENT_FORK)
4719                 trace_event_follow_fork(tr, enabled);
4720
4721         if (mask == TRACE_ITER_FUNC_FORK)
4722                 ftrace_pid_follow_fork(tr, enabled);
4723
4724         if (mask == TRACE_ITER_OVERWRITE) {
4725                 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
4726 #ifdef CONFIG_TRACER_MAX_TRACE
4727                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4728 #endif
4729         }
4730
4731         if (mask == TRACE_ITER_PRINTK) {
4732                 trace_printk_start_stop_comm(enabled);
4733                 trace_printk_control(enabled);
4734         }
4735
4736         return 0;
4737 }
4738
4739 static int trace_set_options(struct trace_array *tr, char *option)
4740 {
4741         char *cmp;
4742         int neg = 0;
4743         int ret;
4744         size_t orig_len = strlen(option);
4745         int len;
4746
4747         cmp = strstrip(option);
4748
4749         len = str_has_prefix(cmp, "no");
4750         if (len)
4751                 neg = 1;
4752
4753         cmp += len;
4754
4755         mutex_lock(&trace_types_lock);
4756
4757         ret = match_string(trace_options, -1, cmp);
4758         /* If no option could be set, test the specific tracer options */
4759         if (ret < 0)
4760                 ret = set_tracer_option(tr, cmp, neg);
4761         else
4762                 ret = set_tracer_flag(tr, 1 << ret, !neg);
4763
4764         mutex_unlock(&trace_types_lock);
4765
4766         /*
4767          * If the first trailing whitespace is replaced with '\0' by strstrip,
4768          * turn it back into a space.
4769          */
4770         if (orig_len > strlen(option))
4771                 option[strlen(option)] = ' ';
4772
4773         return ret;
4774 }
4775
4776 static void __init apply_trace_boot_options(void)
4777 {
4778         char *buf = trace_boot_options_buf;
4779         char *option;
4780
4781         while (true) {
4782                 option = strsep(&buf, ",");
4783
4784                 if (!option)
4785                         break;
4786
4787                 if (*option)
4788                         trace_set_options(&global_trace, option);
4789
4790                 /* Put back the comma to allow this to be called again */
4791                 if (buf)
4792                         *(buf - 1) = ',';
4793         }
4794 }
4795
4796 static ssize_t
4797 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4798                         size_t cnt, loff_t *ppos)
4799 {
4800         struct seq_file *m = filp->private_data;
4801         struct trace_array *tr = m->private;
4802         char buf[64];
4803         int ret;
4804
4805         if (cnt >= sizeof(buf))
4806                 return -EINVAL;
4807
4808         if (copy_from_user(buf, ubuf, cnt))
4809                 return -EFAULT;
4810
4811         buf[cnt] = 0;
4812
4813         ret = trace_set_options(tr, buf);
4814         if (ret < 0)
4815                 return ret;
4816
4817         *ppos += cnt;
4818
4819         return cnt;
4820 }
4821
4822 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4823 {
4824         struct trace_array *tr = inode->i_private;
4825         int ret;
4826
4827         ret = tracing_check_open_get_tr(tr);
4828         if (ret)
4829                 return ret;
4830
4831         ret = single_open(file, tracing_trace_options_show, inode->i_private);
4832         if (ret < 0)
4833                 trace_array_put(tr);
4834
4835         return ret;
4836 }
4837
4838 static const struct file_operations tracing_iter_fops = {
4839         .open           = tracing_trace_options_open,
4840         .read           = seq_read,
4841         .llseek         = seq_lseek,
4842         .release        = tracing_single_release_tr,
4843         .write          = tracing_trace_options_write,
4844 };
4845
4846 static const char readme_msg[] =
4847         "tracing mini-HOWTO:\n\n"
4848         "# echo 0 > tracing_on : quick way to disable tracing\n"
4849         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4850         " Important files:\n"
4851         "  trace\t\t\t- The static contents of the buffer\n"
4852         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
4853         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4854         "  current_tracer\t- function and latency tracers\n"
4855         "  available_tracers\t- list of configured tracers for current_tracer\n"
4856         "  error_log\t- error log for failed commands (that support it)\n"
4857         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4858         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4859         "  trace_clock\t\t-change the clock used to order events\n"
4860         "       local:   Per cpu clock but may not be synced across CPUs\n"
4861         "      global:   Synced across CPUs but slows tracing down.\n"
4862         "     counter:   Not a clock, but just an increment\n"
4863         "      uptime:   Jiffy counter from time of boot\n"
4864         "        perf:   Same clock that perf events use\n"
4865 #ifdef CONFIG_X86_64
4866         "     x86-tsc:   TSC cycle counter\n"
4867 #endif
4868         "\n  timestamp_mode\t-view the mode used to timestamp events\n"
4869         "       delta:   Delta difference against a buffer-wide timestamp\n"
4870         "    absolute:   Absolute (standalone) timestamp\n"
4871         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4872         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4873         "  tracing_cpumask\t- Limit which CPUs to trace\n"
4874         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4875         "\t\t\t  Remove sub-buffer with rmdir\n"
4876         "  trace_options\t\t- Set format or modify how tracing happens\n"
4877         "\t\t\t  Disable an option by prefixing 'no' to the\n"
4878         "\t\t\t  option name\n"
4879         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4880 #ifdef CONFIG_DYNAMIC_FTRACE
4881         "\n  available_filter_functions - list of functions that can be filtered on\n"
4882         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
4883         "\t\t\t  functions\n"
4884         "\t     accepts: func_full_name or glob-matching-pattern\n"
4885         "\t     modules: Can select a group via module\n"
4886         "\t      Format: :mod:<module-name>\n"
4887         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
4888         "\t    triggers: a command to perform when function is hit\n"
4889         "\t      Format: <function>:<trigger>[:count]\n"
4890         "\t     trigger: traceon, traceoff\n"
4891         "\t\t      enable_event:<system>:<event>\n"
4892         "\t\t      disable_event:<system>:<event>\n"
4893 #ifdef CONFIG_STACKTRACE
4894         "\t\t      stacktrace\n"
4895 #endif
4896 #ifdef CONFIG_TRACER_SNAPSHOT
4897         "\t\t      snapshot\n"
4898 #endif
4899         "\t\t      dump\n"
4900         "\t\t      cpudump\n"
4901         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
4902         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
4903         "\t     The first one will disable tracing every time do_fault is hit\n"
4904         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
4905         "\t       The first time do trap is hit and it disables tracing, the\n"
4906         "\t       counter will decrement to 2. If tracing is already disabled,\n"
4907         "\t       the counter will not decrement. It only decrements when the\n"
4908         "\t       trigger did work\n"
4909         "\t     To remove trigger without count:\n"
4910         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
4911         "\t     To remove trigger with a count:\n"
4912         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4913         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
4914         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4915         "\t    modules: Can select a group via module command :mod:\n"
4916         "\t    Does not accept triggers\n"
4917 #endif /* CONFIG_DYNAMIC_FTRACE */
4918 #ifdef CONFIG_FUNCTION_TRACER
4919         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4920         "\t\t    (function)\n"
4921 #endif
4922 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4923         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4924         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4925         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4926 #endif
4927 #ifdef CONFIG_TRACER_SNAPSHOT
4928         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
4929         "\t\t\t  snapshot buffer. Read the contents for more\n"
4930         "\t\t\t  information\n"
4931 #endif
4932 #ifdef CONFIG_STACK_TRACER
4933         "  stack_trace\t\t- Shows the max stack trace when active\n"
4934         "  stack_max_size\t- Shows current max stack size that was traced\n"
4935         "\t\t\t  Write into this file to reset the max size (trigger a\n"
4936         "\t\t\t  new trace)\n"
4937 #ifdef CONFIG_DYNAMIC_FTRACE
4938         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4939         "\t\t\t  traces\n"
4940 #endif
4941 #endif /* CONFIG_STACK_TRACER */
4942 #ifdef CONFIG_DYNAMIC_EVENTS
4943         "  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
4944         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4945 #endif
4946 #ifdef CONFIG_KPROBE_EVENTS
4947         "  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
4948         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4949 #endif
4950 #ifdef CONFIG_UPROBE_EVENTS
4951         "  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
4952         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4953 #endif
4954 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
4955         "\t  accepts: event-definitions (one definition per line)\n"
4956         "\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
4957         "\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
4958 #ifdef CONFIG_HIST_TRIGGERS
4959         "\t           s:[synthetic/]<event> <field> [<field>]\n"
4960 #endif
4961         "\t           -:[<group>/]<event>\n"
4962 #ifdef CONFIG_KPROBE_EVENTS
4963         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4964   "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4965 #endif
4966 #ifdef CONFIG_UPROBE_EVENTS
4967   "   place (uprobe): <path>:<offset>[(ref_ctr_offset)]\n"
4968 #endif
4969         "\t     args: <name>=fetcharg[:type]\n"
4970         "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
4971 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
4972         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
4973 #else
4974         "\t           $stack<index>, $stack, $retval, $comm,\n"
4975 #endif
4976         "\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
4977         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
4978         "\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
4979         "\t           <type>\\[<array-size>\\]\n"
4980 #ifdef CONFIG_HIST_TRIGGERS
4981         "\t    field: <stype> <name>;\n"
4982         "\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
4983         "\t           [unsigned] char/int/long\n"
4984 #endif
4985 #endif
4986         "  events/\t\t- Directory containing all trace event subsystems:\n"
4987         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4988         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
4989         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4990         "\t\t\t  events\n"
4991         "      filter\t\t- If set, only events passing filter are traced\n"
4992         "  events/<system>/<event>/\t- Directory containing control files for\n"
4993         "\t\t\t  <event>:\n"
4994         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4995         "      filter\t\t- If set, only events passing filter are traced\n"
4996         "      trigger\t\t- If set, a command to perform when event is hit\n"
4997         "\t    Format: <trigger>[:count][if <filter>]\n"
4998         "\t   trigger: traceon, traceoff\n"
4999         "\t            enable_event:<system>:<event>\n"
5000         "\t            disable_event:<system>:<event>\n"
5001 #ifdef CONFIG_HIST_TRIGGERS
5002         "\t            enable_hist:<system>:<event>\n"
5003         "\t            disable_hist:<system>:<event>\n"
5004 #endif
5005 #ifdef CONFIG_STACKTRACE
5006         "\t\t    stacktrace\n"
5007 #endif
5008 #ifdef CONFIG_TRACER_SNAPSHOT
5009         "\t\t    snapshot\n"
5010 #endif
5011 #ifdef CONFIG_HIST_TRIGGERS
5012         "\t\t    hist (see below)\n"
5013 #endif
5014         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5015         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5016         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5017         "\t                  events/block/block_unplug/trigger\n"
5018         "\t   The first disables tracing every time block_unplug is hit.\n"
5019         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5020         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5021         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5022         "\t   Like function triggers, the counter is only decremented if it\n"
5023         "\t    enabled or disabled tracing.\n"
5024         "\t   To remove a trigger without a count:\n"
5025         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
5026         "\t   To remove a trigger with a count:\n"
5027         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5028         "\t   Filters can be ignored when removing a trigger.\n"
5029 #ifdef CONFIG_HIST_TRIGGERS
5030         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5031         "\t    Format: hist:keys=<field1[,field2,...]>\n"
5032         "\t            [:values=<field1[,field2,...]>]\n"
5033         "\t            [:sort=<field1[,field2,...]>]\n"
5034         "\t            [:size=#entries]\n"
5035         "\t            [:pause][:continue][:clear]\n"
5036         "\t            [:name=histname1]\n"
5037         "\t            [:<handler>.<action>]\n"
5038         "\t            [if <filter>]\n\n"
5039         "\t    When a matching event is hit, an entry is added to a hash\n"
5040         "\t    table using the key(s) and value(s) named, and the value of a\n"
5041         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
5042         "\t    correspond to fields in the event's format description.  Keys\n"
5043         "\t    can be any field, or the special string 'stacktrace'.\n"
5044         "\t    Compound keys consisting of up to two fields can be specified\n"
5045         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5046         "\t    fields.  Sort keys consisting of up to two fields can be\n"
5047         "\t    specified using the 'sort' keyword.  The sort direction can\n"
5048         "\t    be modified by appending '.descending' or '.ascending' to a\n"
5049         "\t    sort field.  The 'size' parameter can be used to specify more\n"
5050         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
5051         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
5052         "\t    its histogram data will be shared with other triggers of the\n"
5053         "\t    same name, and trigger hits will update this common data.\n\n"
5054         "\t    Reading the 'hist' file for the event will dump the hash\n"
5055         "\t    table in its entirety to stdout.  If there are multiple hist\n"
5056         "\t    triggers attached to an event, there will be a table for each\n"
5057         "\t    trigger in the output.  The table displayed for a named\n"
5058         "\t    trigger will be the same as any other instance having the\n"
5059         "\t    same name.  The default format used to display a given field\n"
5060         "\t    can be modified by appending any of the following modifiers\n"
5061         "\t    to the field name, as applicable:\n\n"
5062         "\t            .hex        display a number as a hex value\n"
5063         "\t            .sym        display an address as a symbol\n"
5064         "\t            .sym-offset display an address as a symbol and offset\n"
5065         "\t            .execname   display a common_pid as a program name\n"
5066         "\t            .syscall    display a syscall id as a syscall name\n"
5067         "\t            .log2       display log2 value rather than raw number\n"
5068         "\t            .usecs      display a common_timestamp in microseconds\n\n"
5069         "\t    The 'pause' parameter can be used to pause an existing hist\n"
5070         "\t    trigger or to start a hist trigger but not log any events\n"
5071         "\t    until told to do so.  'continue' can be used to start or\n"
5072         "\t    restart a paused hist trigger.\n\n"
5073         "\t    The 'clear' parameter will clear the contents of a running\n"
5074         "\t    hist trigger and leave its current paused/active state\n"
5075         "\t    unchanged.\n\n"
5076         "\t    The enable_hist and disable_hist triggers can be used to\n"
5077         "\t    have one event conditionally start and stop another event's\n"
5078         "\t    already-attached hist trigger.  The syntax is analogous to\n"
5079         "\t    the enable_event and disable_event triggers.\n\n"
5080         "\t    Hist trigger handlers and actions are executed whenever a\n"
5081         "\t    a histogram entry is added or updated.  They take the form:\n\n"
5082         "\t        <handler>.<action>\n\n"
5083         "\t    The available handlers are:\n\n"
5084         "\t        onmatch(matching.event)  - invoke on addition or update\n"
5085         "\t        onmax(var)               - invoke if var exceeds current max\n"
5086         "\t        onchange(var)            - invoke action if var changes\n\n"
5087         "\t    The available actions are:\n\n"
5088         "\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5089         "\t        save(field,...)                      - save current event fields\n"
5090 #ifdef CONFIG_TRACER_SNAPSHOT
5091         "\t        snapshot()                           - snapshot the trace buffer\n"
5092 #endif
5093 #endif
5094 ;
5095
5096 static ssize_t
5097 tracing_readme_read(struct file *filp, char __user *ubuf,
5098                        size_t cnt, loff_t *ppos)
5099 {
5100         return simple_read_from_buffer(ubuf, cnt, ppos,
5101                                         readme_msg, strlen(readme_msg));
5102 }
5103
5104 static const struct file_operations tracing_readme_fops = {
5105         .open           = tracing_open_generic,
5106         .read           = tracing_readme_read,
5107         .llseek         = generic_file_llseek,
5108 };
5109
5110 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5111 {
5112         int *ptr = v;
5113
5114         if (*pos || m->count)
5115                 ptr++;
5116
5117         (*pos)++;
5118
5119         for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
5120                 if (trace_find_tgid(*ptr))
5121                         return ptr;
5122         }
5123
5124         return NULL;
5125 }
5126
5127 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5128 {
5129         void *v;
5130         loff_t l = 0;
5131
5132         if (!tgid_map)
5133                 return NULL;
5134
5135         v = &tgid_map[0];
5136         while (l <= *pos) {
5137                 v = saved_tgids_next(m, v, &l);
5138                 if (!v)
5139                         return NULL;
5140         }
5141
5142         return v;
5143 }
5144
5145 static void saved_tgids_stop(struct seq_file *m, void *v)
5146 {
5147 }
5148
5149 static int saved_tgids_show(struct seq_file *m, void *v)
5150 {
5151         int pid = (int *)v - tgid_map;
5152
5153         seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
5154         return 0;
5155 }
5156
5157 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5158         .start          = saved_tgids_start,
5159         .stop           = saved_tgids_stop,
5160         .next           = saved_tgids_next,
5161         .show           = saved_tgids_show,
5162 };
5163
5164 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5165 {
5166         int ret;
5167
5168         ret = tracing_check_open_get_tr(NULL);
5169         if (ret)
5170                 return ret;
5171
5172         return seq_open(filp, &tracing_saved_tgids_seq_ops);
5173 }
5174
5175
5176 static const struct file_operations tracing_saved_tgids_fops = {
5177         .open           = tracing_saved_tgids_open,
5178         .read           = seq_read,
5179         .llseek         = seq_lseek,
5180         .release        = seq_release,
5181 };
5182
5183 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5184 {
5185         unsigned int *ptr = v;
5186
5187         if (*pos || m->count)
5188                 ptr++;
5189
5190         (*pos)++;
5191
5192         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5193              ptr++) {
5194                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5195                         continue;
5196
5197                 return ptr;
5198         }
5199
5200         return NULL;
5201 }
5202
5203 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5204 {
5205         void *v;
5206         loff_t l = 0;
5207
5208         preempt_disable();
5209         arch_spin_lock(&trace_cmdline_lock);
5210
5211         v = &savedcmd->map_cmdline_to_pid[0];
5212         while (l <= *pos) {
5213                 v = saved_cmdlines_next(m, v, &l);
5214                 if (!v)
5215                         return NULL;
5216         }
5217
5218         return v;
5219 }
5220
5221 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5222 {
5223         arch_spin_unlock(&trace_cmdline_lock);
5224         preempt_enable();
5225 }
5226
5227 static int saved_cmdlines_show(struct seq_file *m, void *v)
5228 {
5229         char buf[TASK_COMM_LEN];
5230         unsigned int *pid = v;
5231
5232         __trace_find_cmdline(*pid, buf);
5233         seq_printf(m, "%d %s\n", *pid, buf);
5234         return 0;
5235 }
5236
5237 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5238         .start          = saved_cmdlines_start,
5239         .next           = saved_cmdlines_next,
5240         .stop           = saved_cmdlines_stop,
5241         .show           = saved_cmdlines_show,
5242 };
5243
5244 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5245 {
5246         int ret;
5247
5248         ret = tracing_check_open_get_tr(NULL);
5249         if (ret)
5250                 return ret;
5251
5252         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5253 }
5254
5255 static const struct file_operations tracing_saved_cmdlines_fops = {
5256         .open           = tracing_saved_cmdlines_open,
5257         .read           = seq_read,
5258         .llseek         = seq_lseek,
5259         .release        = seq_release,
5260 };
5261
5262 static ssize_t
5263 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5264                                  size_t cnt, loff_t *ppos)
5265 {
5266         char buf[64];
5267         int r;
5268
5269         arch_spin_lock(&trace_cmdline_lock);
5270         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5271         arch_spin_unlock(&trace_cmdline_lock);
5272
5273         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5274 }
5275
5276 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5277 {
5278         kfree(s->saved_cmdlines);
5279         kfree(s->map_cmdline_to_pid);
5280         kfree(s);
5281 }
5282
5283 static int tracing_resize_saved_cmdlines(unsigned int val)
5284 {
5285         struct saved_cmdlines_buffer *s, *savedcmd_temp;
5286
5287         s = kmalloc(sizeof(*s), GFP_KERNEL);
5288         if (!s)
5289                 return -ENOMEM;
5290
5291         if (allocate_cmdlines_buffer(val, s) < 0) {
5292                 kfree(s);
5293                 return -ENOMEM;
5294         }
5295
5296         arch_spin_lock(&trace_cmdline_lock);
5297         savedcmd_temp = savedcmd;
5298         savedcmd = s;
5299         arch_spin_unlock(&trace_cmdline_lock);
5300         free_saved_cmdlines_buffer(savedcmd_temp);
5301
5302         return 0;
5303 }
5304
5305 static ssize_t
5306 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5307                                   size_t cnt, loff_t *ppos)
5308 {
5309         unsigned long val;
5310         int ret;
5311
5312         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5313         if (ret)
5314                 return ret;
5315
5316         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
5317         if (!val || val > PID_MAX_DEFAULT)
5318                 return -EINVAL;
5319
5320         ret = tracing_resize_saved_cmdlines((unsigned int)val);
5321         if (ret < 0)
5322                 return ret;
5323
5324         *ppos += cnt;
5325
5326         return cnt;
5327 }
5328
5329 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5330         .open           = tracing_open_generic,
5331         .read           = tracing_saved_cmdlines_size_read,
5332         .write          = tracing_saved_cmdlines_size_write,
5333 };
5334
5335 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5336 static union trace_eval_map_item *
5337 update_eval_map(union trace_eval_map_item *ptr)
5338 {
5339         if (!ptr->map.eval_string) {
5340                 if (ptr->tail.next) {
5341                         ptr = ptr->tail.next;
5342                         /* Set ptr to the next real item (skip head) */
5343                         ptr++;
5344                 } else
5345                         return NULL;
5346         }
5347         return ptr;
5348 }
5349
5350 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5351 {
5352         union trace_eval_map_item *ptr = v;
5353
5354         /*
5355          * Paranoid! If ptr points to end, we don't want to increment past it.
5356          * This really should never happen.
5357          */
5358         ptr = update_eval_map(ptr);
5359         if (WARN_ON_ONCE(!ptr))
5360                 return NULL;
5361
5362         ptr++;
5363
5364         (*pos)++;
5365
5366         ptr = update_eval_map(ptr);
5367
5368         return ptr;
5369 }
5370
5371 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5372 {
5373         union trace_eval_map_item *v;
5374         loff_t l = 0;
5375
5376         mutex_lock(&trace_eval_mutex);
5377
5378         v = trace_eval_maps;
5379         if (v)
5380                 v++;
5381
5382         while (v && l < *pos) {
5383                 v = eval_map_next(m, v, &l);
5384         }
5385
5386         return v;
5387 }
5388
5389 static void eval_map_stop(struct seq_file *m, void *v)
5390 {
5391         mutex_unlock(&trace_eval_mutex);
5392 }
5393
5394 static int eval_map_show(struct seq_file *m, void *v)
5395 {
5396         union trace_eval_map_item *ptr = v;
5397
5398         seq_printf(m, "%s %ld (%s)\n",
5399                    ptr->map.eval_string, ptr->map.eval_value,
5400                    ptr->map.system);
5401
5402         return 0;
5403 }
5404
5405 static const struct seq_operations tracing_eval_map_seq_ops = {
5406         .start          = eval_map_start,
5407         .next           = eval_map_next,
5408         .stop           = eval_map_stop,
5409         .show           = eval_map_show,
5410 };
5411
5412 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5413 {
5414         int ret;
5415
5416         ret = tracing_check_open_get_tr(NULL);
5417         if (ret)
5418                 return ret;
5419
5420         return seq_open(filp, &tracing_eval_map_seq_ops);
5421 }
5422
5423 static const struct file_operations tracing_eval_map_fops = {
5424         .open           = tracing_eval_map_open,
5425         .read           = seq_read,
5426         .llseek         = seq_lseek,
5427         .release        = seq_release,
5428 };
5429
5430 static inline union trace_eval_map_item *
5431 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5432 {
5433         /* Return tail of array given the head */
5434         return ptr + ptr->head.length + 1;
5435 }
5436
5437 static void
5438 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5439                            int len)
5440 {
5441         struct trace_eval_map **stop;
5442         struct trace_eval_map **map;
5443         union trace_eval_map_item *map_array;
5444         union trace_eval_map_item *ptr;
5445
5446         stop = start + len;
5447
5448         /*
5449          * The trace_eval_maps contains the map plus a head and tail item,
5450          * where the head holds the module and length of array, and the
5451          * tail holds a pointer to the next list.
5452          */
5453         map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5454         if (!map_array) {
5455                 pr_warn("Unable to allocate trace eval mapping\n");
5456                 return;
5457         }
5458
5459         mutex_lock(&trace_eval_mutex);
5460
5461         if (!trace_eval_maps)
5462                 trace_eval_maps = map_array;
5463         else {
5464                 ptr = trace_eval_maps;
5465                 for (;;) {
5466                         ptr = trace_eval_jmp_to_tail(ptr);
5467                         if (!ptr->tail.next)
5468                                 break;
5469                         ptr = ptr->tail.next;
5470
5471                 }
5472                 ptr->tail.next = map_array;
5473         }
5474         map_array->head.mod = mod;
5475         map_array->head.length = len;
5476         map_array++;
5477
5478         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5479                 map_array->map = **map;
5480                 map_array++;
5481         }
5482         memset(map_array, 0, sizeof(*map_array));
5483
5484         mutex_unlock(&trace_eval_mutex);
5485 }
5486
5487 static void trace_create_eval_file(struct dentry *d_tracer)
5488 {
5489         trace_create_file("eval_map", 0444, d_tracer,
5490                           NULL, &tracing_eval_map_fops);
5491 }
5492
5493 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5494 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5495 static inline void trace_insert_eval_map_file(struct module *mod,
5496                               struct trace_eval_map **start, int len) { }
5497 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5498
5499 static void trace_insert_eval_map(struct module *mod,
5500                                   struct trace_eval_map **start, int len)
5501 {
5502         struct trace_eval_map **map;
5503
5504         if (len <= 0)
5505                 return;
5506
5507         map = start;
5508
5509         trace_event_eval_update(map, len);
5510
5511         trace_insert_eval_map_file(mod, start, len);
5512 }
5513
5514 static ssize_t
5515 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5516                        size_t cnt, loff_t *ppos)
5517 {
5518         struct trace_array *tr = filp->private_data;
5519         char buf[MAX_TRACER_SIZE+2];
5520         int r;
5521
5522         mutex_lock(&trace_types_lock);
5523         r = sprintf(buf, "%s\n", tr->current_trace->name);
5524         mutex_unlock(&trace_types_lock);
5525
5526         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5527 }
5528
5529 int tracer_init(struct tracer *t, struct trace_array *tr)
5530 {
5531         tracing_reset_online_cpus(&tr->trace_buffer);
5532         return t->init(tr);
5533 }
5534
5535 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
5536 {
5537         int cpu;
5538
5539         for_each_tracing_cpu(cpu)
5540                 per_cpu_ptr(buf->data, cpu)->entries = val;
5541 }
5542
5543 #ifdef CONFIG_TRACER_MAX_TRACE
5544 /* resize @tr's buffer to the size of @size_tr's entries */
5545 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
5546                                         struct trace_buffer *size_buf, int cpu_id)
5547 {
5548         int cpu, ret = 0;
5549
5550         if (cpu_id == RING_BUFFER_ALL_CPUS) {
5551                 for_each_tracing_cpu(cpu) {
5552                         ret = ring_buffer_resize(trace_buf->buffer,
5553                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5554                         if (ret < 0)
5555                                 break;
5556                         per_cpu_ptr(trace_buf->data, cpu)->entries =
5557                                 per_cpu_ptr(size_buf->data, cpu)->entries;
5558                 }
5559         } else {
5560                 ret = ring_buffer_resize(trace_buf->buffer,
5561                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5562                 if (ret == 0)
5563                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5564                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
5565         }
5566
5567         return ret;
5568 }
5569 #endif /* CONFIG_TRACER_MAX_TRACE */
5570
5571 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5572                                         unsigned long size, int cpu)
5573 {
5574         int ret;
5575
5576         /*
5577          * If kernel or user changes the size of the ring buffer
5578          * we use the size that was given, and we can forget about
5579          * expanding it later.
5580          */
5581         ring_buffer_expanded = true;
5582
5583         /* May be called before buffers are initialized */
5584         if (!tr->trace_buffer.buffer)
5585                 return 0;
5586
5587         ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
5588         if (ret < 0)
5589                 return ret;
5590
5591 #ifdef CONFIG_TRACER_MAX_TRACE
5592         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5593             !tr->current_trace->use_max_tr)
5594                 goto out;
5595
5596         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5597         if (ret < 0) {
5598                 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
5599                                                      &tr->trace_buffer, cpu);
5600                 if (r < 0) {
5601                         /*
5602                          * AARGH! We are left with different
5603                          * size max buffer!!!!
5604                          * The max buffer is our "snapshot" buffer.
5605                          * When a tracer needs a snapshot (one of the
5606                          * latency tracers), it swaps the max buffer
5607                          * with the saved snap shot. We succeeded to
5608                          * update the size of the main buffer, but failed to
5609                          * update the size of the max buffer. But when we tried
5610                          * to reset the main buffer to the original size, we
5611                          * failed there too. This is very unlikely to
5612                          * happen, but if it does, warn and kill all
5613                          * tracing.
5614                          */
5615                         WARN_ON(1);
5616                         tracing_disabled = 1;
5617                 }
5618                 return ret;
5619         }
5620
5621         if (cpu == RING_BUFFER_ALL_CPUS)
5622                 set_buffer_entries(&tr->max_buffer, size);
5623         else
5624                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5625
5626  out:
5627 #endif /* CONFIG_TRACER_MAX_TRACE */
5628
5629         if (cpu == RING_BUFFER_ALL_CPUS)
5630                 set_buffer_entries(&tr->trace_buffer, size);
5631         else
5632                 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
5633
5634         return ret;
5635 }
5636
5637 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5638                                           unsigned long size, int cpu_id)
5639 {
5640         int ret = size;
5641
5642         mutex_lock(&trace_types_lock);
5643
5644         if (cpu_id != RING_BUFFER_ALL_CPUS) {
5645                 /* make sure, this cpu is enabled in the mask */
5646                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5647                         ret = -EINVAL;
5648                         goto out;
5649                 }
5650         }
5651
5652         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5653         if (ret < 0)
5654                 ret = -ENOMEM;
5655
5656 out:
5657         mutex_unlock(&trace_types_lock);
5658
5659         return ret;
5660 }
5661
5662
5663 /**
5664  * tracing_update_buffers - used by tracing facility to expand ring buffers
5665  *
5666  * To save on memory when the tracing is never used on a system with it
5667  * configured in. The ring buffers are set to a minimum size. But once
5668  * a user starts to use the tracing facility, then they need to grow
5669  * to their default size.
5670  *
5671  * This function is to be called when a tracer is about to be used.
5672  */
5673 int tracing_update_buffers(void)
5674 {
5675         int ret = 0;
5676
5677         mutex_lock(&trace_types_lock);
5678         if (!ring_buffer_expanded)
5679                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5680                                                 RING_BUFFER_ALL_CPUS);
5681         mutex_unlock(&trace_types_lock);
5682
5683         return ret;
5684 }
5685
5686 struct trace_option_dentry;
5687
5688 static void
5689 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5690
5691 /*
5692  * Used to clear out the tracer before deletion of an instance.
5693  * Must have trace_types_lock held.
5694  */
5695 static void tracing_set_nop(struct trace_array *tr)
5696 {
5697         if (tr->current_trace == &nop_trace)
5698                 return;
5699         
5700         tr->current_trace->enabled--;
5701
5702         if (tr->current_trace->reset)
5703                 tr->current_trace->reset(tr);
5704
5705         tr->current_trace = &nop_trace;
5706 }
5707
5708 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5709 {
5710         /* Only enable if the directory has been created already. */
5711         if (!tr->dir)
5712                 return;
5713
5714         create_trace_option_files(tr, t);
5715 }
5716
5717 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
5718 {
5719         struct tracer *t;
5720 #ifdef CONFIG_TRACER_MAX_TRACE
5721         bool had_max_tr;
5722 #endif
5723         int ret = 0;
5724
5725         mutex_lock(&trace_types_lock);
5726
5727         if (!ring_buffer_expanded) {
5728                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5729                                                 RING_BUFFER_ALL_CPUS);
5730                 if (ret < 0)
5731                         goto out;
5732                 ret = 0;
5733         }
5734
5735         for (t = trace_types; t; t = t->next) {
5736                 if (strcmp(t->name, buf) == 0)
5737                         break;
5738         }
5739         if (!t) {
5740                 ret = -EINVAL;
5741                 goto out;
5742         }
5743         if (t == tr->current_trace)
5744                 goto out;
5745
5746 #ifdef CONFIG_TRACER_SNAPSHOT
5747         if (t->use_max_tr) {
5748                 arch_spin_lock(&tr->max_lock);
5749                 if (tr->cond_snapshot)
5750                         ret = -EBUSY;
5751                 arch_spin_unlock(&tr->max_lock);
5752                 if (ret)
5753                         goto out;
5754         }
5755 #endif
5756         /* Some tracers won't work on kernel command line */
5757         if (system_state < SYSTEM_RUNNING && t->noboot) {
5758                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5759                         t->name);
5760                 goto out;
5761         }
5762
5763         /* Some tracers are only allowed for the top level buffer */
5764         if (!trace_ok_for_array(t, tr)) {
5765                 ret = -EINVAL;
5766                 goto out;
5767         }
5768
5769         /* If trace pipe files are being read, we can't change the tracer */
5770         if (tr->current_trace->ref) {
5771                 ret = -EBUSY;
5772                 goto out;
5773         }
5774
5775         trace_branch_disable();
5776
5777         tr->current_trace->enabled--;
5778
5779         if (tr->current_trace->reset)
5780                 tr->current_trace->reset(tr);
5781
5782         /* Current trace needs to be nop_trace before synchronize_rcu */
5783         tr->current_trace = &nop_trace;
5784
5785 #ifdef CONFIG_TRACER_MAX_TRACE
5786         had_max_tr = tr->allocated_snapshot;
5787
5788         if (had_max_tr && !t->use_max_tr) {
5789                 /*
5790                  * We need to make sure that the update_max_tr sees that
5791                  * current_trace changed to nop_trace to keep it from
5792                  * swapping the buffers after we resize it.
5793                  * The update_max_tr is called from interrupts disabled
5794                  * so a synchronized_sched() is sufficient.
5795                  */
5796                 synchronize_rcu();
5797                 free_snapshot(tr);
5798         }
5799 #endif
5800
5801 #ifdef CONFIG_TRACER_MAX_TRACE
5802         if (t->use_max_tr && !had_max_tr) {
5803                 ret = tracing_alloc_snapshot_instance(tr);
5804                 if (ret < 0)
5805                         goto out;
5806         }
5807 #endif
5808
5809         if (t->init) {
5810                 ret = tracer_init(t, tr);
5811                 if (ret)
5812                         goto out;
5813         }
5814
5815         tr->current_trace = t;
5816         tr->current_trace->enabled++;
5817         trace_branch_enable(tr);
5818  out:
5819         mutex_unlock(&trace_types_lock);
5820
5821         return ret;
5822 }
5823
5824 static ssize_t
5825 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5826                         size_t cnt, loff_t *ppos)
5827 {
5828         struct trace_array *tr = filp->private_data;
5829         char buf[MAX_TRACER_SIZE+1];
5830         int i;
5831         size_t ret;
5832         int err;
5833
5834         ret = cnt;
5835
5836         if (cnt > MAX_TRACER_SIZE)
5837                 cnt = MAX_TRACER_SIZE;
5838
5839         if (copy_from_user(buf, ubuf, cnt))
5840                 return -EFAULT;
5841
5842         buf[cnt] = 0;
5843
5844         /* strip ending whitespace. */
5845         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
5846                 buf[i] = 0;
5847
5848         err = tracing_set_tracer(tr, buf);
5849         if (err)
5850                 return err;
5851
5852         *ppos += ret;
5853
5854         return ret;
5855 }
5856
5857 static ssize_t
5858 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5859                    size_t cnt, loff_t *ppos)
5860 {
5861         char buf[64];
5862         int r;
5863
5864         r = snprintf(buf, sizeof(buf), "%ld\n",
5865                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5866         if (r > sizeof(buf))
5867                 r = sizeof(buf);
5868         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5869 }
5870
5871 static ssize_t
5872 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5873                     size_t cnt, loff_t *ppos)
5874 {
5875         unsigned long val;
5876         int ret;
5877
5878         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5879         if (ret)
5880                 return ret;
5881
5882         *ptr = val * 1000;
5883
5884         return cnt;
5885 }
5886
5887 static ssize_t
5888 tracing_thresh_read(struct file *filp, char __user *ubuf,
5889                     size_t cnt, loff_t *ppos)
5890 {
5891         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
5892 }
5893
5894 static ssize_t
5895 tracing_thresh_write(struct file *filp, const char __user *ubuf,
5896                      size_t cnt, loff_t *ppos)
5897 {
5898         struct trace_array *tr = filp->private_data;
5899         int ret;
5900
5901         mutex_lock(&trace_types_lock);
5902         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
5903         if (ret < 0)
5904                 goto out;
5905
5906         if (tr->current_trace->update_thresh) {
5907                 ret = tr->current_trace->update_thresh(tr);
5908                 if (ret < 0)
5909                         goto out;
5910         }
5911
5912         ret = cnt;
5913 out:
5914         mutex_unlock(&trace_types_lock);
5915
5916         return ret;
5917 }
5918
5919 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5920
5921 static ssize_t
5922 tracing_max_lat_read(struct file *filp, char __user *ubuf,
5923                      size_t cnt, loff_t *ppos)
5924 {
5925         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
5926 }
5927
5928 static ssize_t
5929 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
5930                       size_t cnt, loff_t *ppos)
5931 {
5932         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
5933 }
5934
5935 #endif
5936
5937 static int tracing_open_pipe(struct inode *inode, struct file *filp)
5938 {
5939         struct trace_array *tr = inode->i_private;
5940         struct trace_iterator *iter;
5941         int ret;
5942
5943         ret = tracing_check_open_get_tr(tr);
5944         if (ret)
5945                 return ret;
5946
5947         mutex_lock(&trace_types_lock);
5948
5949         /* create a buffer to store the information to pass to userspace */
5950         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5951         if (!iter) {
5952                 ret = -ENOMEM;
5953                 __trace_array_put(tr);
5954                 goto out;
5955         }
5956
5957         trace_seq_init(&iter->seq);
5958         iter->trace = tr->current_trace;
5959
5960         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5961                 ret = -ENOMEM;
5962                 goto fail;
5963         }
5964
5965         /* trace pipe does not show start of buffer */
5966         cpumask_setall(iter->started);
5967
5968         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5969                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
5970
5971         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
5972         if (trace_clocks[tr->clock_id].in_ns)
5973                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5974
5975         iter->tr = tr;
5976         iter->trace_buffer = &tr->trace_buffer;
5977         iter->cpu_file = tracing_get_cpu(inode);
5978         mutex_init(&iter->mutex);
5979         filp->private_data = iter;
5980
5981         if (iter->trace->pipe_open)
5982                 iter->trace->pipe_open(iter);
5983
5984         nonseekable_open(inode, filp);
5985
5986         tr->current_trace->ref++;
5987 out:
5988         mutex_unlock(&trace_types_lock);
5989         return ret;
5990
5991 fail:
5992         kfree(iter);
5993         __trace_array_put(tr);
5994         mutex_unlock(&trace_types_lock);
5995         return ret;
5996 }
5997
5998 static int tracing_release_pipe(struct inode *inode, struct file *file)
5999 {
6000         struct trace_iterator *iter = file->private_data;
6001         struct trace_array *tr = inode->i_private;
6002
6003         mutex_lock(&trace_types_lock);
6004
6005         tr->current_trace->ref--;
6006
6007         if (iter->trace->pipe_close)
6008                 iter->trace->pipe_close(iter);
6009
6010         mutex_unlock(&trace_types_lock);
6011
6012         free_cpumask_var(iter->started);
6013         mutex_destroy(&iter->mutex);
6014         kfree(iter);
6015
6016         trace_array_put(tr);
6017
6018         return 0;
6019 }
6020
6021 static __poll_t
6022 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6023 {
6024         struct trace_array *tr = iter->tr;
6025
6026         /* Iterators are static, they should be filled or empty */
6027         if (trace_buffer_iter(iter, iter->cpu_file))
6028                 return EPOLLIN | EPOLLRDNORM;
6029
6030         if (tr->trace_flags & TRACE_ITER_BLOCK)
6031                 /*
6032                  * Always select as readable when in blocking mode
6033                  */
6034                 return EPOLLIN | EPOLLRDNORM;
6035         else
6036                 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
6037                                              filp, poll_table);
6038 }
6039
6040 static __poll_t
6041 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6042 {
6043         struct trace_iterator *iter = filp->private_data;
6044
6045         return trace_poll(iter, filp, poll_table);
6046 }
6047
6048 /* Must be called with iter->mutex held. */
6049 static int tracing_wait_pipe(struct file *filp)
6050 {
6051         struct trace_iterator *iter = filp->private_data;
6052         int ret;
6053
6054         while (trace_empty(iter)) {
6055
6056                 if ((filp->f_flags & O_NONBLOCK)) {
6057                         return -EAGAIN;
6058                 }
6059
6060                 /*
6061                  * We block until we read something and tracing is disabled.
6062                  * We still block if tracing is disabled, but we have never
6063                  * read anything. This allows a user to cat this file, and
6064                  * then enable tracing. But after we have read something,
6065                  * we give an EOF when tracing is again disabled.
6066                  *
6067                  * iter->pos will be 0 if we haven't read anything.
6068                  */
6069                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6070                         break;
6071
6072                 mutex_unlock(&iter->mutex);
6073
6074                 ret = wait_on_pipe(iter, 0);
6075
6076                 mutex_lock(&iter->mutex);
6077
6078                 if (ret)
6079                         return ret;
6080         }
6081
6082         return 1;
6083 }
6084
6085 /*
6086  * Consumer reader.
6087  */
6088 static ssize_t
6089 tracing_read_pipe(struct file *filp, char __user *ubuf,
6090                   size_t cnt, loff_t *ppos)
6091 {
6092         struct trace_iterator *iter = filp->private_data;
6093         ssize_t sret;
6094
6095         /*
6096          * Avoid more than one consumer on a single file descriptor
6097          * This is just a matter of traces coherency, the ring buffer itself
6098          * is protected.
6099          */
6100         mutex_lock(&iter->mutex);
6101
6102         /* return any leftover data */
6103         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6104         if (sret != -EBUSY)
6105                 goto out;
6106
6107         trace_seq_init(&iter->seq);
6108
6109         if (iter->trace->read) {
6110                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6111                 if (sret)
6112                         goto out;
6113         }
6114
6115 waitagain:
6116         sret = tracing_wait_pipe(filp);
6117         if (sret <= 0)
6118                 goto out;
6119
6120         /* stop when tracing is finished */
6121         if (trace_empty(iter)) {
6122                 sret = 0;
6123                 goto out;
6124         }
6125
6126         if (cnt >= PAGE_SIZE)
6127                 cnt = PAGE_SIZE - 1;
6128
6129         /* reset all but tr, trace, and overruns */
6130         memset(&iter->seq, 0,
6131                sizeof(struct trace_iterator) -
6132                offsetof(struct trace_iterator, seq));
6133         cpumask_clear(iter->started);
6134         trace_seq_init(&iter->seq);
6135         iter->pos = -1;
6136
6137         trace_event_read_lock();
6138         trace_access_lock(iter->cpu_file);
6139         while (trace_find_next_entry_inc(iter) != NULL) {
6140                 enum print_line_t ret;
6141                 int save_len = iter->seq.seq.len;
6142
6143                 ret = print_trace_line(iter);
6144                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6145                         /* don't print partial lines */
6146                         iter->seq.seq.len = save_len;
6147                         break;
6148                 }
6149                 if (ret != TRACE_TYPE_NO_CONSUME)
6150                         trace_consume(iter);
6151
6152                 if (trace_seq_used(&iter->seq) >= cnt)
6153                         break;
6154
6155                 /*
6156                  * Setting the full flag means we reached the trace_seq buffer
6157                  * size and we should leave by partial output condition above.
6158                  * One of the trace_seq_* functions is not used properly.
6159                  */
6160                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6161                           iter->ent->type);
6162         }
6163         trace_access_unlock(iter->cpu_file);
6164         trace_event_read_unlock();
6165
6166         /* Now copy what we have to the user */
6167         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6168         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6169                 trace_seq_init(&iter->seq);
6170
6171         /*
6172          * If there was nothing to send to user, in spite of consuming trace
6173          * entries, go back to wait for more entries.
6174          */
6175         if (sret == -EBUSY)
6176                 goto waitagain;
6177
6178 out:
6179         mutex_unlock(&iter->mutex);
6180
6181         return sret;
6182 }
6183
6184 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6185                                      unsigned int idx)
6186 {
6187         __free_page(spd->pages[idx]);
6188 }
6189
6190 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
6191         .confirm                = generic_pipe_buf_confirm,
6192         .release                = generic_pipe_buf_release,
6193         .steal                  = generic_pipe_buf_steal,
6194         .get                    = generic_pipe_buf_get,
6195 };
6196
6197 static size_t
6198 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6199 {
6200         size_t count;
6201         int save_len;
6202         int ret;
6203
6204         /* Seq buffer is page-sized, exactly what we need. */
6205         for (;;) {
6206                 save_len = iter->seq.seq.len;
6207                 ret = print_trace_line(iter);
6208
6209                 if (trace_seq_has_overflowed(&iter->seq)) {
6210                         iter->seq.seq.len = save_len;
6211                         break;
6212                 }
6213
6214                 /*
6215                  * This should not be hit, because it should only
6216                  * be set if the iter->seq overflowed. But check it
6217                  * anyway to be safe.
6218                  */
6219                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6220                         iter->seq.seq.len = save_len;
6221                         break;
6222                 }
6223
6224                 count = trace_seq_used(&iter->seq) - save_len;
6225                 if (rem < count) {
6226                         rem = 0;
6227                         iter->seq.seq.len = save_len;
6228                         break;
6229                 }
6230
6231                 if (ret != TRACE_TYPE_NO_CONSUME)
6232                         trace_consume(iter);
6233                 rem -= count;
6234                 if (!trace_find_next_entry_inc(iter))   {
6235                         rem = 0;
6236                         iter->ent = NULL;
6237                         break;
6238                 }
6239         }
6240
6241         return rem;
6242 }
6243
6244 static ssize_t tracing_splice_read_pipe(struct file *filp,
6245                                         loff_t *ppos,
6246                                         struct pipe_inode_info *pipe,
6247                                         size_t len,
6248                                         unsigned int flags)
6249 {
6250         struct page *pages_def[PIPE_DEF_BUFFERS];
6251         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6252         struct trace_iterator *iter = filp->private_data;
6253         struct splice_pipe_desc spd = {
6254                 .pages          = pages_def,
6255                 .partial        = partial_def,
6256                 .nr_pages       = 0, /* This gets updated below. */
6257                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6258                 .ops            = &tracing_pipe_buf_ops,
6259                 .spd_release    = tracing_spd_release_pipe,
6260         };
6261         ssize_t ret;
6262         size_t rem;
6263         unsigned int i;
6264
6265         if (splice_grow_spd(pipe, &spd))
6266                 return -ENOMEM;
6267
6268         mutex_lock(&iter->mutex);
6269
6270         if (iter->trace->splice_read) {
6271                 ret = iter->trace->splice_read(iter, filp,
6272                                                ppos, pipe, len, flags);
6273                 if (ret)
6274                         goto out_err;
6275         }
6276
6277         ret = tracing_wait_pipe(filp);
6278         if (ret <= 0)
6279                 goto out_err;
6280
6281         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6282                 ret = -EFAULT;
6283                 goto out_err;
6284         }
6285
6286         trace_event_read_lock();
6287         trace_access_lock(iter->cpu_file);
6288
6289         /* Fill as many pages as possible. */
6290         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6291                 spd.pages[i] = alloc_page(GFP_KERNEL);
6292                 if (!spd.pages[i])
6293                         break;
6294
6295                 rem = tracing_fill_pipe_page(rem, iter);
6296
6297                 /* Copy the data into the page, so we can start over. */
6298                 ret = trace_seq_to_buffer(&iter->seq,
6299                                           page_address(spd.pages[i]),
6300                                           trace_seq_used(&iter->seq));
6301                 if (ret < 0) {
6302                         __free_page(spd.pages[i]);
6303                         break;
6304                 }
6305                 spd.partial[i].offset = 0;
6306                 spd.partial[i].len = trace_seq_used(&iter->seq);
6307
6308                 trace_seq_init(&iter->seq);
6309         }
6310
6311         trace_access_unlock(iter->cpu_file);
6312         trace_event_read_unlock();
6313         mutex_unlock(&iter->mutex);
6314
6315         spd.nr_pages = i;
6316
6317         if (i)
6318                 ret = splice_to_pipe(pipe, &spd);
6319         else
6320                 ret = 0;
6321 out:
6322         splice_shrink_spd(&spd);
6323         return ret;
6324
6325 out_err:
6326         mutex_unlock(&iter->mutex);
6327         goto out;
6328 }
6329
6330 static ssize_t
6331 tracing_entries_read(struct file *filp, char __user *ubuf,
6332                      size_t cnt, loff_t *ppos)
6333 {
6334         struct inode *inode = file_inode(filp);
6335         struct trace_array *tr = inode->i_private;
6336         int cpu = tracing_get_cpu(inode);
6337         char buf[64];
6338         int r = 0;
6339         ssize_t ret;
6340
6341         mutex_lock(&trace_types_lock);
6342
6343         if (cpu == RING_BUFFER_ALL_CPUS) {
6344                 int cpu, buf_size_same;
6345                 unsigned long size;
6346
6347                 size = 0;
6348                 buf_size_same = 1;
6349                 /* check if all cpu sizes are same */
6350                 for_each_tracing_cpu(cpu) {
6351                         /* fill in the size from first enabled cpu */
6352                         if (size == 0)
6353                                 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
6354                         if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
6355                                 buf_size_same = 0;
6356                                 break;
6357                         }
6358                 }
6359
6360                 if (buf_size_same) {
6361                         if (!ring_buffer_expanded)
6362                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
6363                                             size >> 10,
6364                                             trace_buf_size >> 10);
6365                         else
6366                                 r = sprintf(buf, "%lu\n", size >> 10);
6367                 } else
6368                         r = sprintf(buf, "X\n");
6369         } else
6370                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
6371
6372         mutex_unlock(&trace_types_lock);
6373
6374         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6375         return ret;
6376 }
6377
6378 static ssize_t
6379 tracing_entries_write(struct file *filp, const char __user *ubuf,
6380                       size_t cnt, loff_t *ppos)
6381 {
6382         struct inode *inode = file_inode(filp);
6383         struct trace_array *tr = inode->i_private;
6384         unsigned long val;
6385         int ret;
6386
6387         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6388         if (ret)
6389                 return ret;
6390
6391         /* must have at least 1 entry */
6392         if (!val)
6393                 return -EINVAL;
6394
6395         /* value is in KB */
6396         val <<= 10;
6397         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6398         if (ret < 0)
6399                 return ret;
6400
6401         *ppos += cnt;
6402
6403         return cnt;
6404 }
6405
6406 static ssize_t
6407 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6408                                 size_t cnt, loff_t *ppos)
6409 {
6410         struct trace_array *tr = filp->private_data;
6411         char buf[64];
6412         int r, cpu;
6413         unsigned long size = 0, expanded_size = 0;
6414
6415         mutex_lock(&trace_types_lock);
6416         for_each_tracing_cpu(cpu) {
6417                 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
6418                 if (!ring_buffer_expanded)
6419                         expanded_size += trace_buf_size >> 10;
6420         }
6421         if (ring_buffer_expanded)
6422                 r = sprintf(buf, "%lu\n", size);
6423         else
6424                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6425         mutex_unlock(&trace_types_lock);
6426
6427         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6428 }
6429
6430 static ssize_t
6431 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6432                           size_t cnt, loff_t *ppos)
6433 {
6434         /*
6435          * There is no need to read what the user has written, this function
6436          * is just to make sure that there is no error when "echo" is used
6437          */
6438
6439         *ppos += cnt;
6440
6441         return cnt;
6442 }
6443
6444 static int
6445 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6446 {
6447         struct trace_array *tr = inode->i_private;
6448
6449         /* disable tracing ? */
6450         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6451                 tracer_tracing_off(tr);
6452         /* resize the ring buffer to 0 */
6453         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6454
6455         trace_array_put(tr);
6456
6457         return 0;
6458 }
6459
6460 static ssize_t
6461 tracing_mark_write(struct file *filp, const char __user *ubuf,
6462                                         size_t cnt, loff_t *fpos)
6463 {
6464         struct trace_array *tr = filp->private_data;
6465         struct ring_buffer_event *event;
6466         enum event_trigger_type tt = ETT_NONE;
6467         struct ring_buffer *buffer;
6468         struct print_entry *entry;
6469         unsigned long irq_flags;
6470         ssize_t written;
6471         int size;
6472         int len;
6473
6474 /* Used in tracing_mark_raw_write() as well */
6475 #define FAULTED_STR "<faulted>"
6476 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
6477
6478         if (tracing_disabled)
6479                 return -EINVAL;
6480
6481         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6482                 return -EINVAL;
6483
6484         if (cnt > TRACE_BUF_SIZE)
6485                 cnt = TRACE_BUF_SIZE;
6486
6487         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6488
6489         local_save_flags(irq_flags);
6490         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6491
6492         /* If less than "<faulted>", then make sure we can still add that */
6493         if (cnt < FAULTED_SIZE)
6494                 size += FAULTED_SIZE - cnt;
6495
6496         buffer = tr->trace_buffer.buffer;
6497         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6498                                             irq_flags, preempt_count());
6499         if (unlikely(!event))
6500                 /* Ring buffer disabled, return as if not open for write */
6501                 return -EBADF;
6502
6503         entry = ring_buffer_event_data(event);
6504         entry->ip = _THIS_IP_;
6505
6506         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6507         if (len) {
6508                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6509                 cnt = FAULTED_SIZE;
6510                 written = -EFAULT;
6511         } else
6512                 written = cnt;
6513         len = cnt;
6514
6515         if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6516                 /* do not add \n before testing triggers, but add \0 */
6517                 entry->buf[cnt] = '\0';
6518                 tt = event_triggers_call(tr->trace_marker_file, entry, event);
6519         }
6520
6521         if (entry->buf[cnt - 1] != '\n') {
6522                 entry->buf[cnt] = '\n';
6523                 entry->buf[cnt + 1] = '\0';
6524         } else
6525                 entry->buf[cnt] = '\0';
6526
6527         __buffer_unlock_commit(buffer, event);
6528
6529         if (tt)
6530                 event_triggers_post_call(tr->trace_marker_file, tt);
6531
6532         if (written > 0)
6533                 *fpos += written;
6534
6535         return written;
6536 }
6537
6538 /* Limit it for now to 3K (including tag) */
6539 #define RAW_DATA_MAX_SIZE (1024*3)
6540
6541 static ssize_t
6542 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6543                                         size_t cnt, loff_t *fpos)
6544 {
6545         struct trace_array *tr = filp->private_data;
6546         struct ring_buffer_event *event;
6547         struct ring_buffer *buffer;
6548         struct raw_data_entry *entry;
6549         unsigned long irq_flags;
6550         ssize_t written;
6551         int size;
6552         int len;
6553
6554 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6555
6556         if (tracing_disabled)
6557                 return -EINVAL;
6558
6559         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6560                 return -EINVAL;
6561
6562         /* The marker must at least have a tag id */
6563         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6564                 return -EINVAL;
6565
6566         if (cnt > TRACE_BUF_SIZE)
6567                 cnt = TRACE_BUF_SIZE;
6568
6569         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6570
6571         local_save_flags(irq_flags);
6572         size = sizeof(*entry) + cnt;
6573         if (cnt < FAULT_SIZE_ID)
6574                 size += FAULT_SIZE_ID - cnt;
6575
6576         buffer = tr->trace_buffer.buffer;
6577         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6578                                             irq_flags, preempt_count());
6579         if (!event)
6580                 /* Ring buffer disabled, return as if not open for write */
6581                 return -EBADF;
6582
6583         entry = ring_buffer_event_data(event);
6584
6585         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6586         if (len) {
6587                 entry->id = -1;
6588                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6589                 written = -EFAULT;
6590         } else
6591                 written = cnt;
6592
6593         __buffer_unlock_commit(buffer, event);
6594
6595         if (written > 0)
6596                 *fpos += written;
6597
6598         return written;
6599 }
6600
6601 static int tracing_clock_show(struct seq_file *m, void *v)
6602 {
6603         struct trace_array *tr = m->private;
6604         int i;
6605
6606         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6607                 seq_printf(m,
6608                         "%s%s%s%s", i ? " " : "",
6609                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6610                         i == tr->clock_id ? "]" : "");
6611         seq_putc(m, '\n');
6612
6613         return 0;
6614 }
6615
6616 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6617 {
6618         int i;
6619
6620         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6621                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
6622                         break;
6623         }
6624         if (i == ARRAY_SIZE(trace_clocks))
6625                 return -EINVAL;
6626
6627         mutex_lock(&trace_types_lock);
6628
6629         tr->clock_id = i;
6630
6631         ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
6632
6633         /*
6634          * New clock may not be consistent with the previous clock.
6635          * Reset the buffer so that it doesn't have incomparable timestamps.
6636          */
6637         tracing_reset_online_cpus(&tr->trace_buffer);
6638
6639 #ifdef CONFIG_TRACER_MAX_TRACE
6640         if (tr->max_buffer.buffer)
6641                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6642         tracing_reset_online_cpus(&tr->max_buffer);
6643 #endif
6644
6645         mutex_unlock(&trace_types_lock);
6646
6647         return 0;
6648 }
6649
6650 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6651                                    size_t cnt, loff_t *fpos)
6652 {
6653         struct seq_file *m = filp->private_data;
6654         struct trace_array *tr = m->private;
6655         char buf[64];
6656         const char *clockstr;
6657         int ret;
6658
6659         if (cnt >= sizeof(buf))
6660                 return -EINVAL;
6661
6662         if (copy_from_user(buf, ubuf, cnt))
6663                 return -EFAULT;
6664
6665         buf[cnt] = 0;
6666
6667         clockstr = strstrip(buf);
6668
6669         ret = tracing_set_clock(tr, clockstr);
6670         if (ret)
6671                 return ret;
6672
6673         *fpos += cnt;
6674
6675         return cnt;
6676 }
6677
6678 static int tracing_clock_open(struct inode *inode, struct file *file)
6679 {
6680         struct trace_array *tr = inode->i_private;
6681         int ret;
6682
6683         ret = tracing_check_open_get_tr(tr);
6684         if (ret)
6685                 return ret;
6686
6687         ret = single_open(file, tracing_clock_show, inode->i_private);
6688         if (ret < 0)
6689                 trace_array_put(tr);
6690
6691         return ret;
6692 }
6693
6694 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
6695 {
6696         struct trace_array *tr = m->private;
6697
6698         mutex_lock(&trace_types_lock);
6699
6700         if (ring_buffer_time_stamp_abs(tr->trace_buffer.buffer))
6701                 seq_puts(m, "delta [absolute]\n");
6702         else
6703                 seq_puts(m, "[delta] absolute\n");
6704
6705         mutex_unlock(&trace_types_lock);
6706
6707         return 0;
6708 }
6709
6710 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
6711 {
6712         struct trace_array *tr = inode->i_private;
6713         int ret;
6714
6715         ret = tracing_check_open_get_tr(tr);
6716         if (ret)
6717                 return ret;
6718
6719         ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
6720         if (ret < 0)
6721                 trace_array_put(tr);
6722
6723         return ret;
6724 }
6725
6726 int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
6727 {
6728         int ret = 0;
6729
6730         mutex_lock(&trace_types_lock);
6731
6732         if (abs && tr->time_stamp_abs_ref++)
6733                 goto out;
6734
6735         if (!abs) {
6736                 if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) {
6737                         ret = -EINVAL;
6738                         goto out;
6739                 }
6740
6741                 if (--tr->time_stamp_abs_ref)
6742                         goto out;
6743         }
6744
6745         ring_buffer_set_time_stamp_abs(tr->trace_buffer.buffer, abs);
6746
6747 #ifdef CONFIG_TRACER_MAX_TRACE
6748         if (tr->max_buffer.buffer)
6749                 ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs);
6750 #endif
6751  out:
6752         mutex_unlock(&trace_types_lock);
6753
6754         return ret;
6755 }
6756
6757 struct ftrace_buffer_info {
6758         struct trace_iterator   iter;
6759         void                    *spare;
6760         unsigned int            spare_cpu;
6761         unsigned int            read;
6762 };
6763
6764 #ifdef CONFIG_TRACER_SNAPSHOT
6765 static int tracing_snapshot_open(struct inode *inode, struct file *file)
6766 {
6767         struct trace_array *tr = inode->i_private;
6768         struct trace_iterator *iter;
6769         struct seq_file *m;
6770         int ret;
6771
6772         ret = tracing_check_open_get_tr(tr);
6773         if (ret)
6774                 return ret;
6775
6776         if (file->f_mode & FMODE_READ) {
6777                 iter = __tracing_open(inode, file, true);
6778                 if (IS_ERR(iter))
6779                         ret = PTR_ERR(iter);
6780         } else {
6781                 /* Writes still need the seq_file to hold the private data */
6782                 ret = -ENOMEM;
6783                 m = kzalloc(sizeof(*m), GFP_KERNEL);
6784                 if (!m)
6785                         goto out;
6786                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6787                 if (!iter) {
6788                         kfree(m);
6789                         goto out;
6790                 }
6791                 ret = 0;
6792
6793                 iter->tr = tr;
6794                 iter->trace_buffer = &tr->max_buffer;
6795                 iter->cpu_file = tracing_get_cpu(inode);
6796                 m->private = iter;
6797                 file->private_data = m;
6798         }
6799 out:
6800         if (ret < 0)
6801                 trace_array_put(tr);
6802
6803         return ret;
6804 }
6805
6806 static ssize_t
6807 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6808                        loff_t *ppos)
6809 {
6810         struct seq_file *m = filp->private_data;
6811         struct trace_iterator *iter = m->private;
6812         struct trace_array *tr = iter->tr;
6813         unsigned long val;
6814         int ret;
6815
6816         ret = tracing_update_buffers();
6817         if (ret < 0)
6818                 return ret;
6819
6820         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6821         if (ret)
6822                 return ret;
6823
6824         mutex_lock(&trace_types_lock);
6825
6826         if (tr->current_trace->use_max_tr) {
6827                 ret = -EBUSY;
6828                 goto out;
6829         }
6830
6831         arch_spin_lock(&tr->max_lock);
6832         if (tr->cond_snapshot)
6833                 ret = -EBUSY;
6834         arch_spin_unlock(&tr->max_lock);
6835         if (ret)
6836                 goto out;
6837
6838         switch (val) {
6839         case 0:
6840                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6841                         ret = -EINVAL;
6842                         break;
6843                 }
6844                 if (tr->allocated_snapshot)
6845                         free_snapshot(tr);
6846                 break;
6847         case 1:
6848 /* Only allow per-cpu swap if the ring buffer supports it */
6849 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
6850                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6851                         ret = -EINVAL;
6852                         break;
6853                 }
6854 #endif
6855                 if (tr->allocated_snapshot)
6856                         ret = resize_buffer_duplicate_size(&tr->max_buffer,
6857                                         &tr->trace_buffer, iter->cpu_file);
6858                 else
6859                         ret = tracing_alloc_snapshot_instance(tr);
6860                 if (ret < 0)
6861                         break;
6862                 local_irq_disable();
6863                 /* Now, we're going to swap */
6864                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6865                         update_max_tr(tr, current, smp_processor_id(), NULL);
6866                 else
6867                         update_max_tr_single(tr, current, iter->cpu_file);
6868                 local_irq_enable();
6869                 break;
6870         default:
6871                 if (tr->allocated_snapshot) {
6872                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6873                                 tracing_reset_online_cpus(&tr->max_buffer);
6874                         else
6875                                 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
6876                 }
6877                 break;
6878         }
6879
6880         if (ret >= 0) {
6881                 *ppos += cnt;
6882                 ret = cnt;
6883         }
6884 out:
6885         mutex_unlock(&trace_types_lock);
6886         return ret;
6887 }
6888
6889 static int tracing_snapshot_release(struct inode *inode, struct file *file)
6890 {
6891         struct seq_file *m = file->private_data;
6892         int ret;
6893
6894         ret = tracing_release(inode, file);
6895
6896         if (file->f_mode & FMODE_READ)
6897                 return ret;
6898
6899         /* If write only, the seq_file is just a stub */
6900         if (m)
6901                 kfree(m->private);
6902         kfree(m);
6903
6904         return 0;
6905 }
6906
6907 static int tracing_buffers_open(struct inode *inode, struct file *filp);
6908 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
6909                                     size_t count, loff_t *ppos);
6910 static int tracing_buffers_release(struct inode *inode, struct file *file);
6911 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6912                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
6913
6914 static int snapshot_raw_open(struct inode *inode, struct file *filp)
6915 {
6916         struct ftrace_buffer_info *info;
6917         int ret;
6918
6919         /* The following checks for tracefs lockdown */
6920         ret = tracing_buffers_open(inode, filp);
6921         if (ret < 0)
6922                 return ret;
6923
6924         info = filp->private_data;
6925
6926         if (info->iter.trace->use_max_tr) {
6927                 tracing_buffers_release(inode, filp);
6928                 return -EBUSY;
6929         }
6930
6931         info->iter.snapshot = true;
6932         info->iter.trace_buffer = &info->iter.tr->max_buffer;
6933
6934         return ret;
6935 }
6936
6937 #endif /* CONFIG_TRACER_SNAPSHOT */
6938
6939
6940 static const struct file_operations tracing_thresh_fops = {
6941         .open           = tracing_open_generic,
6942         .read           = tracing_thresh_read,
6943         .write          = tracing_thresh_write,
6944         .llseek         = generic_file_llseek,
6945 };
6946
6947 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6948 static const struct file_operations tracing_max_lat_fops = {
6949         .open           = tracing_open_generic,
6950         .read           = tracing_max_lat_read,
6951         .write          = tracing_max_lat_write,
6952         .llseek         = generic_file_llseek,
6953 };
6954 #endif
6955
6956 static const struct file_operations set_tracer_fops = {
6957         .open           = tracing_open_generic,
6958         .read           = tracing_set_trace_read,
6959         .write          = tracing_set_trace_write,
6960         .llseek         = generic_file_llseek,
6961 };
6962
6963 static const struct file_operations tracing_pipe_fops = {
6964         .open           = tracing_open_pipe,
6965         .poll           = tracing_poll_pipe,
6966         .read           = tracing_read_pipe,
6967         .splice_read    = tracing_splice_read_pipe,
6968         .release        = tracing_release_pipe,
6969         .llseek         = no_llseek,
6970 };
6971
6972 static const struct file_operations tracing_entries_fops = {
6973         .open           = tracing_open_generic_tr,
6974         .read           = tracing_entries_read,
6975         .write          = tracing_entries_write,
6976         .llseek         = generic_file_llseek,
6977         .release        = tracing_release_generic_tr,
6978 };
6979
6980 static const struct file_operations tracing_total_entries_fops = {
6981         .open           = tracing_open_generic_tr,
6982         .read           = tracing_total_entries_read,
6983         .llseek         = generic_file_llseek,
6984         .release        = tracing_release_generic_tr,
6985 };
6986
6987 static const struct file_operations tracing_free_buffer_fops = {
6988         .open           = tracing_open_generic_tr,
6989         .write          = tracing_free_buffer_write,
6990         .release        = tracing_free_buffer_release,
6991 };
6992
6993 static const struct file_operations tracing_mark_fops = {
6994         .open           = tracing_open_generic_tr,
6995         .write          = tracing_mark_write,
6996         .llseek         = generic_file_llseek,
6997         .release        = tracing_release_generic_tr,
6998 };
6999
7000 static const struct file_operations tracing_mark_raw_fops = {
7001         .open           = tracing_open_generic_tr,
7002         .write          = tracing_mark_raw_write,
7003         .llseek         = generic_file_llseek,
7004         .release        = tracing_release_generic_tr,
7005 };
7006
7007 static const struct file_operations trace_clock_fops = {
7008         .open           = tracing_clock_open,
7009         .read           = seq_read,
7010         .llseek         = seq_lseek,
7011         .release        = tracing_single_release_tr,
7012         .write          = tracing_clock_write,
7013 };
7014
7015 static const struct file_operations trace_time_stamp_mode_fops = {
7016         .open           = tracing_time_stamp_mode_open,
7017         .read           = seq_read,
7018         .llseek         = seq_lseek,
7019         .release        = tracing_single_release_tr,
7020 };
7021
7022 #ifdef CONFIG_TRACER_SNAPSHOT
7023 static const struct file_operations snapshot_fops = {
7024         .open           = tracing_snapshot_open,
7025         .read           = seq_read,
7026         .write          = tracing_snapshot_write,
7027         .llseek         = tracing_lseek,
7028         .release        = tracing_snapshot_release,
7029 };
7030
7031 static const struct file_operations snapshot_raw_fops = {
7032         .open           = snapshot_raw_open,
7033         .read           = tracing_buffers_read,
7034         .release        = tracing_buffers_release,
7035         .splice_read    = tracing_buffers_splice_read,
7036         .llseek         = no_llseek,
7037 };
7038
7039 #endif /* CONFIG_TRACER_SNAPSHOT */
7040
7041 #define TRACING_LOG_ERRS_MAX    8
7042 #define TRACING_LOG_LOC_MAX     128
7043
7044 #define CMD_PREFIX "  Command: "
7045
7046 struct err_info {
7047         const char      **errs; /* ptr to loc-specific array of err strings */
7048         u8              type;   /* index into errs -> specific err string */
7049         u8              pos;    /* MAX_FILTER_STR_VAL = 256 */
7050         u64             ts;
7051 };
7052
7053 struct tracing_log_err {
7054         struct list_head        list;
7055         struct err_info         info;
7056         char                    loc[TRACING_LOG_LOC_MAX]; /* err location */
7057         char                    cmd[MAX_FILTER_STR_VAL]; /* what caused err */
7058 };
7059
7060 static DEFINE_MUTEX(tracing_err_log_lock);
7061
7062 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
7063 {
7064         struct tracing_log_err *err;
7065
7066         if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7067                 err = kzalloc(sizeof(*err), GFP_KERNEL);
7068                 if (!err)
7069                         err = ERR_PTR(-ENOMEM);
7070                 tr->n_err_log_entries++;
7071
7072                 return err;
7073         }
7074
7075         err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7076         list_del(&err->list);
7077
7078         return err;
7079 }
7080
7081 /**
7082  * err_pos - find the position of a string within a command for error careting
7083  * @cmd: The tracing command that caused the error
7084  * @str: The string to position the caret at within @cmd
7085  *
7086  * Finds the position of the first occurence of @str within @cmd.  The
7087  * return value can be passed to tracing_log_err() for caret placement
7088  * within @cmd.
7089  *
7090  * Returns the index within @cmd of the first occurence of @str or 0
7091  * if @str was not found.
7092  */
7093 unsigned int err_pos(char *cmd, const char *str)
7094 {
7095         char *found;
7096
7097         if (WARN_ON(!strlen(cmd)))
7098                 return 0;
7099
7100         found = strstr(cmd, str);
7101         if (found)
7102                 return found - cmd;
7103
7104         return 0;
7105 }
7106
7107 /**
7108  * tracing_log_err - write an error to the tracing error log
7109  * @tr: The associated trace array for the error (NULL for top level array)
7110  * @loc: A string describing where the error occurred
7111  * @cmd: The tracing command that caused the error
7112  * @errs: The array of loc-specific static error strings
7113  * @type: The index into errs[], which produces the specific static err string
7114  * @pos: The position the caret should be placed in the cmd
7115  *
7116  * Writes an error into tracing/error_log of the form:
7117  *
7118  * <loc>: error: <text>
7119  *   Command: <cmd>
7120  *              ^
7121  *
7122  * tracing/error_log is a small log file containing the last
7123  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7124  * unless there has been a tracing error, and the error log can be
7125  * cleared and have its memory freed by writing the empty string in
7126  * truncation mode to it i.e. echo > tracing/error_log.
7127  *
7128  * NOTE: the @errs array along with the @type param are used to
7129  * produce a static error string - this string is not copied and saved
7130  * when the error is logged - only a pointer to it is saved.  See
7131  * existing callers for examples of how static strings are typically
7132  * defined for use with tracing_log_err().
7133  */
7134 void tracing_log_err(struct trace_array *tr,
7135                      const char *loc, const char *cmd,
7136                      const char **errs, u8 type, u8 pos)
7137 {
7138         struct tracing_log_err *err;
7139
7140         if (!tr)
7141                 tr = &global_trace;
7142
7143         mutex_lock(&tracing_err_log_lock);
7144         err = get_tracing_log_err(tr);
7145         if (PTR_ERR(err) == -ENOMEM) {
7146                 mutex_unlock(&tracing_err_log_lock);
7147                 return;
7148         }
7149
7150         snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7151         snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7152
7153         err->info.errs = errs;
7154         err->info.type = type;
7155         err->info.pos = pos;
7156         err->info.ts = local_clock();
7157
7158         list_add_tail(&err->list, &tr->err_log);
7159         mutex_unlock(&tracing_err_log_lock);
7160 }
7161
7162 static void clear_tracing_err_log(struct trace_array *tr)
7163 {
7164         struct tracing_log_err *err, *next;
7165
7166         mutex_lock(&tracing_err_log_lock);
7167         list_for_each_entry_safe(err, next, &tr->err_log, list) {
7168                 list_del(&err->list);
7169                 kfree(err);
7170         }
7171
7172         tr->n_err_log_entries = 0;
7173         mutex_unlock(&tracing_err_log_lock);
7174 }
7175
7176 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7177 {
7178         struct trace_array *tr = m->private;
7179
7180         mutex_lock(&tracing_err_log_lock);
7181
7182         return seq_list_start(&tr->err_log, *pos);
7183 }
7184
7185 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7186 {
7187         struct trace_array *tr = m->private;
7188
7189         return seq_list_next(v, &tr->err_log, pos);
7190 }
7191
7192 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7193 {
7194         mutex_unlock(&tracing_err_log_lock);
7195 }
7196
7197 static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7198 {
7199         u8 i;
7200
7201         for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7202                 seq_putc(m, ' ');
7203         for (i = 0; i < pos; i++)
7204                 seq_putc(m, ' ');
7205         seq_puts(m, "^\n");
7206 }
7207
7208 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7209 {
7210         struct tracing_log_err *err = v;
7211
7212         if (err) {
7213                 const char *err_text = err->info.errs[err->info.type];
7214                 u64 sec = err->info.ts;
7215                 u32 nsec;
7216
7217                 nsec = do_div(sec, NSEC_PER_SEC);
7218                 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7219                            err->loc, err_text);
7220                 seq_printf(m, "%s", err->cmd);
7221                 tracing_err_log_show_pos(m, err->info.pos);
7222         }
7223
7224         return 0;
7225 }
7226
7227 static const struct seq_operations tracing_err_log_seq_ops = {
7228         .start  = tracing_err_log_seq_start,
7229         .next   = tracing_err_log_seq_next,
7230         .stop   = tracing_err_log_seq_stop,
7231         .show   = tracing_err_log_seq_show
7232 };
7233
7234 static int tracing_err_log_open(struct inode *inode, struct file *file)
7235 {
7236         struct trace_array *tr = inode->i_private;
7237         int ret = 0;
7238
7239         ret = tracing_check_open_get_tr(tr);
7240         if (ret)
7241                 return ret;
7242
7243         /* If this file was opened for write, then erase contents */
7244         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7245                 clear_tracing_err_log(tr);
7246
7247         if (file->f_mode & FMODE_READ) {
7248                 ret = seq_open(file, &tracing_err_log_seq_ops);
7249                 if (!ret) {
7250                         struct seq_file *m = file->private_data;
7251                         m->private = tr;
7252                 } else {
7253                         trace_array_put(tr);
7254                 }
7255         }
7256         return ret;
7257 }
7258
7259 static ssize_t tracing_err_log_write(struct file *file,
7260                                      const char __user *buffer,
7261                                      size_t count, loff_t *ppos)
7262 {
7263         return count;
7264 }
7265
7266 static int tracing_err_log_release(struct inode *inode, struct file *file)
7267 {
7268         struct trace_array *tr = inode->i_private;
7269
7270         trace_array_put(tr);
7271
7272         if (file->f_mode & FMODE_READ)
7273                 seq_release(inode, file);
7274
7275         return 0;
7276 }
7277
7278 static const struct file_operations tracing_err_log_fops = {
7279         .open           = tracing_err_log_open,
7280         .write          = tracing_err_log_write,
7281         .read           = seq_read,
7282         .llseek         = seq_lseek,
7283         .release        = tracing_err_log_release,
7284 };
7285
7286 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7287 {
7288         struct trace_array *tr = inode->i_private;
7289         struct ftrace_buffer_info *info;
7290         int ret;
7291
7292         ret = tracing_check_open_get_tr(tr);
7293         if (ret)
7294                 return ret;
7295
7296         info = kzalloc(sizeof(*info), GFP_KERNEL);
7297         if (!info) {
7298                 trace_array_put(tr);
7299                 return -ENOMEM;
7300         }
7301
7302         mutex_lock(&trace_types_lock);
7303
7304         info->iter.tr           = tr;
7305         info->iter.cpu_file     = tracing_get_cpu(inode);
7306         info->iter.trace        = tr->current_trace;
7307         info->iter.trace_buffer = &tr->trace_buffer;
7308         info->spare             = NULL;
7309         /* Force reading ring buffer for first read */
7310         info->read              = (unsigned int)-1;
7311
7312         filp->private_data = info;
7313
7314         tr->current_trace->ref++;
7315
7316         mutex_unlock(&trace_types_lock);
7317
7318         ret = nonseekable_open(inode, filp);
7319         if (ret < 0)
7320                 trace_array_put(tr);
7321
7322         return ret;
7323 }
7324
7325 static __poll_t
7326 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7327 {
7328         struct ftrace_buffer_info *info = filp->private_data;
7329         struct trace_iterator *iter = &info->iter;
7330
7331         return trace_poll(iter, filp, poll_table);
7332 }
7333
7334 static ssize_t
7335 tracing_buffers_read(struct file *filp, char __user *ubuf,
7336                      size_t count, loff_t *ppos)
7337 {
7338         struct ftrace_buffer_info *info = filp->private_data;
7339         struct trace_iterator *iter = &info->iter;
7340         ssize_t ret = 0;
7341         ssize_t size;
7342
7343         if (!count)
7344                 return 0;
7345
7346 #ifdef CONFIG_TRACER_MAX_TRACE
7347         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7348                 return -EBUSY;
7349 #endif
7350
7351         if (!info->spare) {
7352                 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
7353                                                           iter->cpu_file);
7354                 if (IS_ERR(info->spare)) {
7355                         ret = PTR_ERR(info->spare);
7356                         info->spare = NULL;
7357                 } else {
7358                         info->spare_cpu = iter->cpu_file;
7359                 }
7360         }
7361         if (!info->spare)
7362                 return ret;
7363
7364         /* Do we have previous read data to read? */
7365         if (info->read < PAGE_SIZE)
7366                 goto read;
7367
7368  again:
7369         trace_access_lock(iter->cpu_file);
7370         ret = ring_buffer_read_page(iter->trace_buffer->buffer,
7371                                     &info->spare,
7372                                     count,
7373                                     iter->cpu_file, 0);
7374         trace_access_unlock(iter->cpu_file);
7375
7376         if (ret < 0) {
7377                 if (trace_empty(iter)) {
7378                         if ((filp->f_flags & O_NONBLOCK))
7379                                 return -EAGAIN;
7380
7381                         ret = wait_on_pipe(iter, 0);
7382                         if (ret)
7383                                 return ret;
7384
7385                         goto again;
7386                 }
7387                 return 0;
7388         }
7389
7390         info->read = 0;
7391  read:
7392         size = PAGE_SIZE - info->read;
7393         if (size > count)
7394                 size = count;
7395
7396         ret = copy_to_user(ubuf, info->spare + info->read, size);
7397         if (ret == size)
7398                 return -EFAULT;
7399
7400         size -= ret;
7401
7402         *ppos += size;
7403         info->read += size;
7404
7405         return size;
7406 }
7407
7408 static int tracing_buffers_release(struct inode *inode, struct file *file)
7409 {
7410         struct ftrace_buffer_info *info = file->private_data;
7411         struct trace_iterator *iter = &info->iter;
7412
7413         mutex_lock(&trace_types_lock);
7414
7415         iter->tr->current_trace->ref--;
7416
7417         __trace_array_put(iter->tr);
7418
7419         if (info->spare)
7420                 ring_buffer_free_read_page(iter->trace_buffer->buffer,
7421                                            info->spare_cpu, info->spare);
7422         kfree(info);
7423
7424         mutex_unlock(&trace_types_lock);
7425
7426         return 0;
7427 }
7428
7429 struct buffer_ref {
7430         struct ring_buffer      *buffer;
7431         void                    *page;
7432         int                     cpu;
7433         refcount_t              refcount;
7434 };
7435
7436 static void buffer_ref_release(struct buffer_ref *ref)
7437 {
7438         if (!refcount_dec_and_test(&ref->refcount))
7439                 return;
7440         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
7441         kfree(ref);
7442 }
7443
7444 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
7445                                     struct pipe_buffer *buf)
7446 {
7447         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7448
7449         buffer_ref_release(ref);
7450         buf->private = 0;
7451 }
7452
7453 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
7454                                 struct pipe_buffer *buf)
7455 {
7456         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7457
7458         if (refcount_read(&ref->refcount) > INT_MAX/2)
7459                 return false;
7460
7461         refcount_inc(&ref->refcount);
7462         return true;
7463 }
7464
7465 /* Pipe buffer operations for a buffer. */
7466 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
7467         .confirm                = generic_pipe_buf_confirm,
7468         .release                = buffer_pipe_buf_release,
7469         .steal                  = generic_pipe_buf_nosteal,
7470         .get                    = buffer_pipe_buf_get,
7471 };
7472
7473 /*
7474  * Callback from splice_to_pipe(), if we need to release some pages
7475  * at the end of the spd in case we error'ed out in filling the pipe.
7476  */
7477 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
7478 {
7479         struct buffer_ref *ref =
7480                 (struct buffer_ref *)spd->partial[i].private;
7481
7482         buffer_ref_release(ref);
7483         spd->partial[i].private = 0;
7484 }
7485
7486 static ssize_t
7487 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7488                             struct pipe_inode_info *pipe, size_t len,
7489                             unsigned int flags)
7490 {
7491         struct ftrace_buffer_info *info = file->private_data;
7492         struct trace_iterator *iter = &info->iter;
7493         struct partial_page partial_def[PIPE_DEF_BUFFERS];
7494         struct page *pages_def[PIPE_DEF_BUFFERS];
7495         struct splice_pipe_desc spd = {
7496                 .pages          = pages_def,
7497                 .partial        = partial_def,
7498                 .nr_pages_max   = PIPE_DEF_BUFFERS,
7499                 .ops            = &buffer_pipe_buf_ops,
7500                 .spd_release    = buffer_spd_release,
7501         };
7502         struct buffer_ref *ref;
7503         int entries, i;
7504         ssize_t ret = 0;
7505
7506 #ifdef CONFIG_TRACER_MAX_TRACE
7507         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7508                 return -EBUSY;
7509 #endif
7510
7511         if (*ppos & (PAGE_SIZE - 1))
7512                 return -EINVAL;
7513
7514         if (len & (PAGE_SIZE - 1)) {
7515                 if (len < PAGE_SIZE)
7516                         return -EINVAL;
7517                 len &= PAGE_MASK;
7518         }
7519
7520         if (splice_grow_spd(pipe, &spd))
7521                 return -ENOMEM;
7522
7523  again:
7524         trace_access_lock(iter->cpu_file);
7525         entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
7526
7527         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
7528                 struct page *page;
7529                 int r;
7530
7531                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
7532                 if (!ref) {
7533                         ret = -ENOMEM;
7534                         break;
7535                 }
7536
7537                 refcount_set(&ref->refcount, 1);
7538                 ref->buffer = iter->trace_buffer->buffer;
7539                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
7540                 if (IS_ERR(ref->page)) {
7541                         ret = PTR_ERR(ref->page);
7542                         ref->page = NULL;
7543                         kfree(ref);
7544                         break;
7545                 }
7546                 ref->cpu = iter->cpu_file;
7547
7548                 r = ring_buffer_read_page(ref->buffer, &ref->page,
7549                                           len, iter->cpu_file, 1);
7550                 if (r < 0) {
7551                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
7552                                                    ref->page);
7553                         kfree(ref);
7554                         break;
7555                 }
7556
7557                 page = virt_to_page(ref->page);
7558
7559                 spd.pages[i] = page;
7560                 spd.partial[i].len = PAGE_SIZE;
7561                 spd.partial[i].offset = 0;
7562                 spd.partial[i].private = (unsigned long)ref;
7563                 spd.nr_pages++;
7564                 *ppos += PAGE_SIZE;
7565
7566                 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
7567         }
7568
7569         trace_access_unlock(iter->cpu_file);
7570         spd.nr_pages = i;
7571
7572         /* did we read anything? */
7573         if (!spd.nr_pages) {
7574                 if (ret)
7575                         goto out;
7576
7577                 ret = -EAGAIN;
7578                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
7579                         goto out;
7580
7581                 ret = wait_on_pipe(iter, iter->tr->buffer_percent);
7582                 if (ret)
7583                         goto out;
7584
7585                 goto again;
7586         }
7587
7588         ret = splice_to_pipe(pipe, &spd);
7589 out:
7590         splice_shrink_spd(&spd);
7591
7592         return ret;
7593 }
7594
7595 static const struct file_operations tracing_buffers_fops = {
7596         .open           = tracing_buffers_open,
7597         .read           = tracing_buffers_read,
7598         .poll           = tracing_buffers_poll,
7599         .release        = tracing_buffers_release,
7600         .splice_read    = tracing_buffers_splice_read,
7601         .llseek         = no_llseek,
7602 };
7603
7604 static ssize_t
7605 tracing_stats_read(struct file *filp, char __user *ubuf,
7606                    size_t count, loff_t *ppos)
7607 {
7608         struct inode *inode = file_inode(filp);
7609         struct trace_array *tr = inode->i_private;
7610         struct trace_buffer *trace_buf = &tr->trace_buffer;
7611         int cpu = tracing_get_cpu(inode);
7612         struct trace_seq *s;
7613         unsigned long cnt;
7614         unsigned long long t;
7615         unsigned long usec_rem;
7616
7617         s = kmalloc(sizeof(*s), GFP_KERNEL);
7618         if (!s)
7619                 return -ENOMEM;
7620
7621         trace_seq_init(s);
7622
7623         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
7624         trace_seq_printf(s, "entries: %ld\n", cnt);
7625
7626         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
7627         trace_seq_printf(s, "overrun: %ld\n", cnt);
7628
7629         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
7630         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
7631
7632         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
7633         trace_seq_printf(s, "bytes: %ld\n", cnt);
7634
7635         if (trace_clocks[tr->clock_id].in_ns) {
7636                 /* local or global for trace_clock */
7637                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7638                 usec_rem = do_div(t, USEC_PER_SEC);
7639                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
7640                                                                 t, usec_rem);
7641
7642                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
7643                 usec_rem = do_div(t, USEC_PER_SEC);
7644                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
7645         } else {
7646                 /* counter or tsc mode for trace_clock */
7647                 trace_seq_printf(s, "oldest event ts: %llu\n",
7648                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7649
7650                 trace_seq_printf(s, "now ts: %llu\n",
7651                                 ring_buffer_time_stamp(trace_buf->buffer, cpu));
7652         }
7653
7654         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
7655         trace_seq_printf(s, "dropped events: %ld\n", cnt);
7656
7657         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
7658         trace_seq_printf(s, "read events: %ld\n", cnt);
7659
7660         count = simple_read_from_buffer(ubuf, count, ppos,
7661                                         s->buffer, trace_seq_used(s));
7662
7663         kfree(s);
7664
7665         return count;
7666 }
7667
7668 static const struct file_operations tracing_stats_fops = {
7669         .open           = tracing_open_generic_tr,
7670         .read           = tracing_stats_read,
7671         .llseek         = generic_file_llseek,
7672         .release        = tracing_release_generic_tr,
7673 };
7674
7675 #ifdef CONFIG_DYNAMIC_FTRACE
7676
7677 static ssize_t
7678 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
7679                   size_t cnt, loff_t *ppos)
7680 {
7681         ssize_t ret;
7682         char *buf;
7683         int r;
7684
7685         /* 256 should be plenty to hold the amount needed */
7686         buf = kmalloc(256, GFP_KERNEL);
7687         if (!buf)
7688                 return -ENOMEM;
7689
7690         r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
7691                       ftrace_update_tot_cnt,
7692                       ftrace_number_of_pages,
7693                       ftrace_number_of_groups);
7694
7695         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7696         kfree(buf);
7697         return ret;
7698 }
7699
7700 static const struct file_operations tracing_dyn_info_fops = {
7701         .open           = tracing_open_generic,
7702         .read           = tracing_read_dyn_info,
7703         .llseek         = generic_file_llseek,
7704 };
7705 #endif /* CONFIG_DYNAMIC_FTRACE */
7706
7707 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
7708 static void
7709 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
7710                 struct trace_array *tr, struct ftrace_probe_ops *ops,
7711                 void *data)
7712 {
7713         tracing_snapshot_instance(tr);
7714 }
7715
7716 static void
7717 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
7718                       struct trace_array *tr, struct ftrace_probe_ops *ops,
7719                       void *data)
7720 {
7721         struct ftrace_func_mapper *mapper = data;
7722         long *count = NULL;
7723
7724         if (mapper)
7725                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7726
7727         if (count) {
7728
7729                 if (*count <= 0)
7730                         return;
7731
7732                 (*count)--;
7733         }
7734
7735         tracing_snapshot_instance(tr);
7736 }
7737
7738 static int
7739 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
7740                       struct ftrace_probe_ops *ops, void *data)
7741 {
7742         struct ftrace_func_mapper *mapper = data;
7743         long *count = NULL;
7744
7745         seq_printf(m, "%ps:", (void *)ip);
7746
7747         seq_puts(m, "snapshot");
7748
7749         if (mapper)
7750                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7751
7752         if (count)
7753                 seq_printf(m, ":count=%ld\n", *count);
7754         else
7755                 seq_puts(m, ":unlimited\n");
7756
7757         return 0;
7758 }
7759
7760 static int
7761 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
7762                      unsigned long ip, void *init_data, void **data)
7763 {
7764         struct ftrace_func_mapper *mapper = *data;
7765
7766         if (!mapper) {
7767                 mapper = allocate_ftrace_func_mapper();
7768                 if (!mapper)
7769                         return -ENOMEM;
7770                 *data = mapper;
7771         }
7772
7773         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
7774 }
7775
7776 static void
7777 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
7778                      unsigned long ip, void *data)
7779 {
7780         struct ftrace_func_mapper *mapper = data;
7781
7782         if (!ip) {
7783                 if (!mapper)
7784                         return;
7785                 free_ftrace_func_mapper(mapper, NULL);
7786                 return;
7787         }
7788
7789         ftrace_func_mapper_remove_ip(mapper, ip);
7790 }
7791
7792 static struct ftrace_probe_ops snapshot_probe_ops = {
7793         .func                   = ftrace_snapshot,
7794         .print                  = ftrace_snapshot_print,
7795 };
7796
7797 static struct ftrace_probe_ops snapshot_count_probe_ops = {
7798         .func                   = ftrace_count_snapshot,
7799         .print                  = ftrace_snapshot_print,
7800         .init                   = ftrace_snapshot_init,
7801         .free                   = ftrace_snapshot_free,
7802 };
7803
7804 static int
7805 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
7806                                char *glob, char *cmd, char *param, int enable)
7807 {
7808         struct ftrace_probe_ops *ops;
7809         void *count = (void *)-1;
7810         char *number;
7811         int ret;
7812
7813         if (!tr)
7814                 return -ENODEV;
7815
7816         /* hash funcs only work with set_ftrace_filter */
7817         if (!enable)
7818                 return -EINVAL;
7819
7820         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
7821
7822         if (glob[0] == '!')
7823                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
7824
7825         if (!param)
7826                 goto out_reg;
7827
7828         number = strsep(&param, ":");
7829
7830         if (!strlen(number))
7831                 goto out_reg;
7832
7833         /*
7834          * We use the callback data field (which is a pointer)
7835          * as our counter.
7836          */
7837         ret = kstrtoul(number, 0, (unsigned long *)&count);
7838         if (ret)
7839                 return ret;
7840
7841  out_reg:
7842         ret = tracing_alloc_snapshot_instance(tr);
7843         if (ret < 0)
7844                 goto out;
7845
7846         ret = register_ftrace_function_probe(glob, tr, ops, count);
7847
7848  out:
7849         return ret < 0 ? ret : 0;
7850 }
7851
7852 static struct ftrace_func_command ftrace_snapshot_cmd = {
7853         .name                   = "snapshot",
7854         .func                   = ftrace_trace_snapshot_callback,
7855 };
7856
7857 static __init int register_snapshot_cmd(void)
7858 {
7859         return register_ftrace_command(&ftrace_snapshot_cmd);
7860 }
7861 #else
7862 static inline __init int register_snapshot_cmd(void) { return 0; }
7863 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
7864
7865 static struct dentry *tracing_get_dentry(struct trace_array *tr)
7866 {
7867         if (WARN_ON(!tr->dir))
7868                 return ERR_PTR(-ENODEV);
7869
7870         /* Top directory uses NULL as the parent */
7871         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
7872                 return NULL;
7873
7874         /* All sub buffers have a descriptor */
7875         return tr->dir;
7876 }
7877
7878 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
7879 {
7880         struct dentry *d_tracer;
7881
7882         if (tr->percpu_dir)
7883                 return tr->percpu_dir;
7884
7885         d_tracer = tracing_get_dentry(tr);
7886         if (IS_ERR(d_tracer))
7887                 return NULL;
7888
7889         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
7890
7891         WARN_ONCE(!tr->percpu_dir,
7892                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
7893
7894         return tr->percpu_dir;
7895 }
7896
7897 static struct dentry *
7898 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
7899                       void *data, long cpu, const struct file_operations *fops)
7900 {
7901         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
7902
7903         if (ret) /* See tracing_get_cpu() */
7904                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
7905         return ret;
7906 }
7907
7908 static void
7909 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
7910 {
7911         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
7912         struct dentry *d_cpu;
7913         char cpu_dir[30]; /* 30 characters should be more than enough */
7914
7915         if (!d_percpu)
7916                 return;
7917
7918         snprintf(cpu_dir, 30, "cpu%ld", cpu);
7919         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
7920         if (!d_cpu) {
7921                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
7922                 return;
7923         }
7924
7925         /* per cpu trace_pipe */
7926         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
7927                                 tr, cpu, &tracing_pipe_fops);
7928
7929         /* per cpu trace */
7930         trace_create_cpu_file("trace", 0644, d_cpu,
7931                                 tr, cpu, &tracing_fops);
7932
7933         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
7934                                 tr, cpu, &tracing_buffers_fops);
7935
7936         trace_create_cpu_file("stats", 0444, d_cpu,
7937                                 tr, cpu, &tracing_stats_fops);
7938
7939         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
7940                                 tr, cpu, &tracing_entries_fops);
7941
7942 #ifdef CONFIG_TRACER_SNAPSHOT
7943         trace_create_cpu_file("snapshot", 0644, d_cpu,
7944                                 tr, cpu, &snapshot_fops);
7945
7946         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
7947                                 tr, cpu, &snapshot_raw_fops);
7948 #endif
7949 }
7950
7951 #ifdef CONFIG_FTRACE_SELFTEST
7952 /* Let selftest have access to static functions in this file */
7953 #include "trace_selftest.c"
7954 #endif
7955
7956 static ssize_t
7957 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
7958                         loff_t *ppos)
7959 {
7960         struct trace_option_dentry *topt = filp->private_data;
7961         char *buf;
7962
7963         if (topt->flags->val & topt->opt->bit)
7964                 buf = "1\n";
7965         else
7966                 buf = "0\n";
7967
7968         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7969 }
7970
7971 static ssize_t
7972 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
7973                          loff_t *ppos)
7974 {
7975         struct trace_option_dentry *topt = filp->private_data;
7976         unsigned long val;
7977         int ret;
7978
7979         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7980         if (ret)
7981                 return ret;
7982
7983         if (val != 0 && val != 1)
7984                 return -EINVAL;
7985
7986         if (!!(topt->flags->val & topt->opt->bit) != val) {
7987                 mutex_lock(&trace_types_lock);
7988                 ret = __set_tracer_option(topt->tr, topt->flags,
7989                                           topt->opt, !val);
7990                 mutex_unlock(&trace_types_lock);
7991                 if (ret)
7992                         return ret;
7993         }
7994
7995         *ppos += cnt;
7996
7997         return cnt;
7998 }
7999
8000
8001 static const struct file_operations trace_options_fops = {
8002         .open = tracing_open_generic,
8003         .read = trace_options_read,
8004         .write = trace_options_write,
8005         .llseek = generic_file_llseek,
8006 };
8007
8008 /*
8009  * In order to pass in both the trace_array descriptor as well as the index
8010  * to the flag that the trace option file represents, the trace_array
8011  * has a character array of trace_flags_index[], which holds the index
8012  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8013  * The address of this character array is passed to the flag option file
8014  * read/write callbacks.
8015  *
8016  * In order to extract both the index and the trace_array descriptor,
8017  * get_tr_index() uses the following algorithm.
8018  *
8019  *   idx = *ptr;
8020  *
8021  * As the pointer itself contains the address of the index (remember
8022  * index[1] == 1).
8023  *
8024  * Then to get the trace_array descriptor, by subtracting that index
8025  * from the ptr, we get to the start of the index itself.
8026  *
8027  *   ptr - idx == &index[0]
8028  *
8029  * Then a simple container_of() from that pointer gets us to the
8030  * trace_array descriptor.
8031  */
8032 static void get_tr_index(void *data, struct trace_array **ptr,
8033                          unsigned int *pindex)
8034 {
8035         *pindex = *(unsigned char *)data;
8036
8037         *ptr = container_of(data - *pindex, struct trace_array,
8038                             trace_flags_index);
8039 }
8040
8041 static ssize_t
8042 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8043                         loff_t *ppos)
8044 {
8045         void *tr_index = filp->private_data;
8046         struct trace_array *tr;
8047         unsigned int index;
8048         char *buf;
8049
8050         get_tr_index(tr_index, &tr, &index);
8051
8052         if (tr->trace_flags & (1 << index))
8053                 buf = "1\n";
8054         else
8055                 buf = "0\n";
8056
8057         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8058 }
8059
8060 static ssize_t
8061 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8062                          loff_t *ppos)
8063 {
8064         void *tr_index = filp->private_data;
8065         struct trace_array *tr;
8066         unsigned int index;
8067         unsigned long val;
8068         int ret;
8069
8070         get_tr_index(tr_index, &tr, &index);
8071
8072         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8073         if (ret)
8074                 return ret;
8075
8076         if (val != 0 && val != 1)
8077                 return -EINVAL;
8078
8079         mutex_lock(&trace_types_lock);
8080         ret = set_tracer_flag(tr, 1 << index, val);
8081         mutex_unlock(&trace_types_lock);
8082
8083         if (ret < 0)
8084                 return ret;
8085
8086         *ppos += cnt;
8087
8088         return cnt;
8089 }
8090
8091 static const struct file_operations trace_options_core_fops = {
8092         .open = tracing_open_generic,
8093         .read = trace_options_core_read,
8094         .write = trace_options_core_write,
8095         .llseek = generic_file_llseek,
8096 };
8097
8098 struct dentry *trace_create_file(const char *name,
8099                                  umode_t mode,
8100                                  struct dentry *parent,
8101                                  void *data,
8102                                  const struct file_operations *fops)
8103 {
8104         struct dentry *ret;
8105
8106         ret = tracefs_create_file(name, mode, parent, data, fops);
8107         if (!ret)
8108                 pr_warn("Could not create tracefs '%s' entry\n", name);
8109
8110         return ret;
8111 }
8112
8113
8114 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8115 {
8116         struct dentry *d_tracer;
8117
8118         if (tr->options)
8119                 return tr->options;
8120
8121         d_tracer = tracing_get_dentry(tr);
8122         if (IS_ERR(d_tracer))
8123                 return NULL;
8124
8125         tr->options = tracefs_create_dir("options", d_tracer);
8126         if (!tr->options) {
8127                 pr_warn("Could not create tracefs directory 'options'\n");
8128                 return NULL;
8129         }
8130
8131         return tr->options;
8132 }
8133
8134 static void
8135 create_trace_option_file(struct trace_array *tr,
8136                          struct trace_option_dentry *topt,
8137                          struct tracer_flags *flags,
8138                          struct tracer_opt *opt)
8139 {
8140         struct dentry *t_options;
8141
8142         t_options = trace_options_init_dentry(tr);
8143         if (!t_options)
8144                 return;
8145
8146         topt->flags = flags;
8147         topt->opt = opt;
8148         topt->tr = tr;
8149
8150         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
8151                                     &trace_options_fops);
8152
8153 }
8154
8155 static void
8156 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8157 {
8158         struct trace_option_dentry *topts;
8159         struct trace_options *tr_topts;
8160         struct tracer_flags *flags;
8161         struct tracer_opt *opts;
8162         int cnt;
8163         int i;
8164
8165         if (!tracer)
8166                 return;
8167
8168         flags = tracer->flags;
8169
8170         if (!flags || !flags->opts)
8171                 return;
8172
8173         /*
8174          * If this is an instance, only create flags for tracers
8175          * the instance may have.
8176          */
8177         if (!trace_ok_for_array(tracer, tr))
8178                 return;
8179
8180         for (i = 0; i < tr->nr_topts; i++) {
8181                 /* Make sure there's no duplicate flags. */
8182                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8183                         return;
8184         }
8185
8186         opts = flags->opts;
8187
8188         for (cnt = 0; opts[cnt].name; cnt++)
8189                 ;
8190
8191         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8192         if (!topts)
8193                 return;
8194
8195         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8196                             GFP_KERNEL);
8197         if (!tr_topts) {
8198                 kfree(topts);
8199                 return;
8200         }
8201
8202         tr->topts = tr_topts;
8203         tr->topts[tr->nr_topts].tracer = tracer;
8204         tr->topts[tr->nr_topts].topts = topts;
8205         tr->nr_topts++;
8206
8207         for (cnt = 0; opts[cnt].name; cnt++) {
8208                 create_trace_option_file(tr, &topts[cnt], flags,
8209                                          &opts[cnt]);
8210                 WARN_ONCE(topts[cnt].entry == NULL,
8211                           "Failed to create trace option: %s",
8212                           opts[cnt].name);
8213         }
8214 }
8215
8216 static struct dentry *
8217 create_trace_option_core_file(struct trace_array *tr,
8218                               const char *option, long index)
8219 {
8220         struct dentry *t_options;
8221
8222         t_options = trace_options_init_dentry(tr);
8223         if (!t_options)
8224                 return NULL;
8225
8226         return trace_create_file(option, 0644, t_options,
8227                                  (void *)&tr->trace_flags_index[index],
8228                                  &trace_options_core_fops);
8229 }
8230
8231 static void create_trace_options_dir(struct trace_array *tr)
8232 {
8233         struct dentry *t_options;
8234         bool top_level = tr == &global_trace;
8235         int i;
8236
8237         t_options = trace_options_init_dentry(tr);
8238         if (!t_options)
8239                 return;
8240
8241         for (i = 0; trace_options[i]; i++) {
8242                 if (top_level ||
8243                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8244                         create_trace_option_core_file(tr, trace_options[i], i);
8245         }
8246 }
8247
8248 static ssize_t
8249 rb_simple_read(struct file *filp, char __user *ubuf,
8250                size_t cnt, loff_t *ppos)
8251 {
8252         struct trace_array *tr = filp->private_data;
8253         char buf[64];
8254         int r;
8255
8256         r = tracer_tracing_is_on(tr);
8257         r = sprintf(buf, "%d\n", r);
8258
8259         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8260 }
8261
8262 static ssize_t
8263 rb_simple_write(struct file *filp, const char __user *ubuf,
8264                 size_t cnt, loff_t *ppos)
8265 {
8266         struct trace_array *tr = filp->private_data;
8267         struct ring_buffer *buffer = tr->trace_buffer.buffer;
8268         unsigned long val;
8269         int ret;
8270
8271         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8272         if (ret)
8273                 return ret;
8274
8275         if (buffer) {
8276                 mutex_lock(&trace_types_lock);
8277                 if (!!val == tracer_tracing_is_on(tr)) {
8278                         val = 0; /* do nothing */
8279                 } else if (val) {
8280                         tracer_tracing_on(tr);
8281                         if (tr->current_trace->start)
8282                                 tr->current_trace->start(tr);
8283                 } else {
8284                         tracer_tracing_off(tr);
8285                         if (tr->current_trace->stop)
8286                                 tr->current_trace->stop(tr);
8287                 }
8288                 mutex_unlock(&trace_types_lock);
8289         }
8290
8291         (*ppos)++;
8292
8293         return cnt;
8294 }
8295
8296 static const struct file_operations rb_simple_fops = {
8297         .open           = tracing_open_generic_tr,
8298         .read           = rb_simple_read,
8299         .write          = rb_simple_write,
8300         .release        = tracing_release_generic_tr,
8301         .llseek         = default_llseek,
8302 };
8303
8304 static ssize_t
8305 buffer_percent_read(struct file *filp, char __user *ubuf,
8306                     size_t cnt, loff_t *ppos)
8307 {
8308         struct trace_array *tr = filp->private_data;
8309         char buf[64];
8310         int r;
8311
8312         r = tr->buffer_percent;
8313         r = sprintf(buf, "%d\n", r);
8314
8315         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8316 }
8317
8318 static ssize_t
8319 buffer_percent_write(struct file *filp, const char __user *ubuf,
8320                      size_t cnt, loff_t *ppos)
8321 {
8322         struct trace_array *tr = filp->private_data;
8323         unsigned long val;
8324         int ret;
8325
8326         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8327         if (ret)
8328                 return ret;
8329
8330         if (val > 100)
8331                 return -EINVAL;
8332
8333         if (!val)
8334                 val = 1;
8335
8336         tr->buffer_percent = val;
8337
8338         (*ppos)++;
8339
8340         return cnt;
8341 }
8342
8343 static const struct file_operations buffer_percent_fops = {
8344         .open           = tracing_open_generic_tr,
8345         .read           = buffer_percent_read,
8346         .write          = buffer_percent_write,
8347         .release        = tracing_release_generic_tr,
8348         .llseek         = default_llseek,
8349 };
8350
8351 static struct dentry *trace_instance_dir;
8352
8353 static void
8354 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
8355
8356 static int
8357 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
8358 {
8359         enum ring_buffer_flags rb_flags;
8360
8361         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
8362
8363         buf->tr = tr;
8364
8365         buf->buffer = ring_buffer_alloc(size, rb_flags);
8366         if (!buf->buffer)
8367                 return -ENOMEM;
8368
8369         buf->data = alloc_percpu(struct trace_array_cpu);
8370         if (!buf->data) {
8371                 ring_buffer_free(buf->buffer);
8372                 buf->buffer = NULL;
8373                 return -ENOMEM;
8374         }
8375
8376         /* Allocate the first page for all buffers */
8377         set_buffer_entries(&tr->trace_buffer,
8378                            ring_buffer_size(tr->trace_buffer.buffer, 0));
8379
8380         return 0;
8381 }
8382
8383 static int allocate_trace_buffers(struct trace_array *tr, int size)
8384 {
8385         int ret;
8386
8387         ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
8388         if (ret)
8389                 return ret;
8390
8391 #ifdef CONFIG_TRACER_MAX_TRACE
8392         ret = allocate_trace_buffer(tr, &tr->max_buffer,
8393                                     allocate_snapshot ? size : 1);
8394         if (WARN_ON(ret)) {
8395                 ring_buffer_free(tr->trace_buffer.buffer);
8396                 tr->trace_buffer.buffer = NULL;
8397                 free_percpu(tr->trace_buffer.data);
8398                 tr->trace_buffer.data = NULL;
8399                 return -ENOMEM;
8400         }
8401         tr->allocated_snapshot = allocate_snapshot;
8402
8403         /*
8404          * Only the top level trace array gets its snapshot allocated
8405          * from the kernel command line.
8406          */
8407         allocate_snapshot = false;
8408 #endif
8409         return 0;
8410 }
8411
8412 static void free_trace_buffer(struct trace_buffer *buf)
8413 {
8414         if (buf->buffer) {
8415                 ring_buffer_free(buf->buffer);
8416                 buf->buffer = NULL;
8417                 free_percpu(buf->data);
8418                 buf->data = NULL;
8419         }
8420 }
8421
8422 static void free_trace_buffers(struct trace_array *tr)
8423 {
8424         if (!tr)
8425                 return;
8426
8427         free_trace_buffer(&tr->trace_buffer);
8428
8429 #ifdef CONFIG_TRACER_MAX_TRACE
8430         free_trace_buffer(&tr->max_buffer);
8431 #endif
8432 }
8433
8434 static void init_trace_flags_index(struct trace_array *tr)
8435 {
8436         int i;
8437
8438         /* Used by the trace options files */
8439         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
8440                 tr->trace_flags_index[i] = i;
8441 }
8442
8443 static void __update_tracer_options(struct trace_array *tr)
8444 {
8445         struct tracer *t;
8446
8447         for (t = trace_types; t; t = t->next)
8448                 add_tracer_options(tr, t);
8449 }
8450
8451 static void update_tracer_options(struct trace_array *tr)
8452 {
8453         mutex_lock(&trace_types_lock);
8454         __update_tracer_options(tr);
8455         mutex_unlock(&trace_types_lock);
8456 }
8457
8458 static struct trace_array *trace_array_create(const char *name)
8459 {
8460         struct trace_array *tr;
8461         int ret;
8462
8463         ret = -ENOMEM;
8464         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
8465         if (!tr)
8466                 return ERR_PTR(ret);
8467
8468         tr->name = kstrdup(name, GFP_KERNEL);
8469         if (!tr->name)
8470                 goto out_free_tr;
8471
8472         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
8473                 goto out_free_tr;
8474
8475         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
8476
8477         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
8478
8479         raw_spin_lock_init(&tr->start_lock);
8480
8481         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8482
8483         tr->current_trace = &nop_trace;
8484
8485         INIT_LIST_HEAD(&tr->systems);
8486         INIT_LIST_HEAD(&tr->events);
8487         INIT_LIST_HEAD(&tr->hist_vars);
8488         INIT_LIST_HEAD(&tr->err_log);
8489
8490         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
8491                 goto out_free_tr;
8492
8493         tr->dir = tracefs_create_dir(name, trace_instance_dir);
8494         if (!tr->dir)
8495                 goto out_free_tr;
8496
8497         ret = event_trace_add_tracer(tr->dir, tr);
8498         if (ret) {
8499                 tracefs_remove_recursive(tr->dir);
8500                 goto out_free_tr;
8501         }
8502
8503         ftrace_init_trace_array(tr);
8504
8505         init_tracer_tracefs(tr, tr->dir);
8506         init_trace_flags_index(tr);
8507         __update_tracer_options(tr);
8508
8509         list_add(&tr->list, &ftrace_trace_arrays);
8510
8511         tr->ref++;
8512
8513
8514         return tr;
8515
8516  out_free_tr:
8517         free_trace_buffers(tr);
8518         free_cpumask_var(tr->tracing_cpumask);
8519         kfree(tr->name);
8520         kfree(tr);
8521
8522         return ERR_PTR(ret);
8523 }
8524
8525 static int instance_mkdir(const char *name)
8526 {
8527         struct trace_array *tr;
8528         int ret;
8529
8530         mutex_lock(&event_mutex);
8531         mutex_lock(&trace_types_lock);
8532
8533         ret = -EEXIST;
8534         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8535                 if (tr->name && strcmp(tr->name, name) == 0)
8536                         goto out_unlock;
8537         }
8538
8539         tr = trace_array_create(name);
8540
8541         ret = PTR_ERR_OR_ZERO(tr);
8542
8543 out_unlock:
8544         mutex_unlock(&trace_types_lock);
8545         mutex_unlock(&event_mutex);
8546         return ret;
8547 }
8548
8549 /**
8550  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
8551  * @name: The name of the trace array to be looked up/created.
8552  *
8553  * Returns pointer to trace array with given name.
8554  * NULL, if it cannot be created.
8555  *
8556  * NOTE: This function increments the reference counter associated with the
8557  * trace array returned. This makes sure it cannot be freed while in use.
8558  * Use trace_array_put() once the trace array is no longer needed.
8559  *
8560  */
8561 struct trace_array *trace_array_get_by_name(const char *name)
8562 {
8563         struct trace_array *tr;
8564
8565         mutex_lock(&event_mutex);
8566         mutex_lock(&trace_types_lock);
8567
8568         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8569                 if (tr->name && strcmp(tr->name, name) == 0)
8570                         goto out_unlock;
8571         }
8572
8573         tr = trace_array_create(name);
8574
8575         if (IS_ERR(tr))
8576                 tr = NULL;
8577 out_unlock:
8578         if (tr)
8579                 tr->ref++;
8580
8581         mutex_unlock(&trace_types_lock);
8582         mutex_unlock(&event_mutex);
8583         return tr;
8584 }
8585 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
8586
8587 static int __remove_instance(struct trace_array *tr)
8588 {
8589         int i;
8590
8591         /* Reference counter for a newly created trace array = 1. */
8592         if (tr->ref > 1 || (tr->current_trace && tr->current_trace->ref))
8593                 return -EBUSY;
8594
8595         list_del(&tr->list);
8596
8597         /* Disable all the flags that were enabled coming in */
8598         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
8599                 if ((1 << i) & ZEROED_TRACE_FLAGS)
8600                         set_tracer_flag(tr, 1 << i, 0);
8601         }
8602
8603         tracing_set_nop(tr);
8604         clear_ftrace_function_probes(tr);
8605         event_trace_del_tracer(tr);
8606         ftrace_clear_pids(tr);
8607         ftrace_destroy_function_files(tr);
8608         tracefs_remove_recursive(tr->dir);
8609         free_trace_buffers(tr);
8610
8611         for (i = 0; i < tr->nr_topts; i++) {
8612                 kfree(tr->topts[i].topts);
8613         }
8614         kfree(tr->topts);
8615
8616         free_cpumask_var(tr->tracing_cpumask);
8617         kfree(tr->name);
8618         kfree(tr);
8619         tr = NULL;
8620
8621         return 0;
8622 }
8623
8624 int trace_array_destroy(struct trace_array *this_tr)
8625 {
8626         struct trace_array *tr;
8627         int ret;
8628
8629         if (!this_tr)
8630                 return -EINVAL;
8631
8632         mutex_lock(&event_mutex);
8633         mutex_lock(&trace_types_lock);
8634
8635         ret = -ENODEV;
8636
8637         /* Making sure trace array exists before destroying it. */
8638         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8639                 if (tr == this_tr) {
8640                         ret = __remove_instance(tr);
8641                         break;
8642                 }
8643         }
8644
8645         mutex_unlock(&trace_types_lock);
8646         mutex_unlock(&event_mutex);
8647
8648         return ret;
8649 }
8650 EXPORT_SYMBOL_GPL(trace_array_destroy);
8651
8652 static int instance_rmdir(const char *name)
8653 {
8654         struct trace_array *tr;
8655         int ret;
8656
8657         mutex_lock(&event_mutex);
8658         mutex_lock(&trace_types_lock);
8659
8660         ret = -ENODEV;
8661         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8662                 if (tr->name && strcmp(tr->name, name) == 0) {
8663                         ret = __remove_instance(tr);
8664                         break;
8665                 }
8666         }
8667
8668         mutex_unlock(&trace_types_lock);
8669         mutex_unlock(&event_mutex);
8670
8671         return ret;
8672 }
8673
8674 static __init void create_trace_instances(struct dentry *d_tracer)
8675 {
8676         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
8677                                                          instance_mkdir,
8678                                                          instance_rmdir);
8679         if (WARN_ON(!trace_instance_dir))
8680                 return;
8681 }
8682
8683 static void
8684 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
8685 {
8686         struct trace_event_file *file;
8687         int cpu;
8688
8689         trace_create_file("available_tracers", 0444, d_tracer,
8690                         tr, &show_traces_fops);
8691
8692         trace_create_file("current_tracer", 0644, d_tracer,
8693                         tr, &set_tracer_fops);
8694
8695         trace_create_file("tracing_cpumask", 0644, d_tracer,
8696                           tr, &tracing_cpumask_fops);
8697
8698         trace_create_file("trace_options", 0644, d_tracer,
8699                           tr, &tracing_iter_fops);
8700
8701         trace_create_file("trace", 0644, d_tracer,
8702                           tr, &tracing_fops);
8703
8704         trace_create_file("trace_pipe", 0444, d_tracer,
8705                           tr, &tracing_pipe_fops);
8706
8707         trace_create_file("buffer_size_kb", 0644, d_tracer,
8708                           tr, &tracing_entries_fops);
8709
8710         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
8711                           tr, &tracing_total_entries_fops);
8712
8713         trace_create_file("free_buffer", 0200, d_tracer,
8714                           tr, &tracing_free_buffer_fops);
8715
8716         trace_create_file("trace_marker", 0220, d_tracer,
8717                           tr, &tracing_mark_fops);
8718
8719         file = __find_event_file(tr, "ftrace", "print");
8720         if (file && file->dir)
8721                 trace_create_file("trigger", 0644, file->dir, file,
8722                                   &event_trigger_fops);
8723         tr->trace_marker_file = file;
8724
8725         trace_create_file("trace_marker_raw", 0220, d_tracer,
8726                           tr, &tracing_mark_raw_fops);
8727
8728         trace_create_file("trace_clock", 0644, d_tracer, tr,
8729                           &trace_clock_fops);
8730
8731         trace_create_file("tracing_on", 0644, d_tracer,
8732                           tr, &rb_simple_fops);
8733
8734         trace_create_file("timestamp_mode", 0444, d_tracer, tr,
8735                           &trace_time_stamp_mode_fops);
8736
8737         tr->buffer_percent = 50;
8738
8739         trace_create_file("buffer_percent", 0444, d_tracer,
8740                         tr, &buffer_percent_fops);
8741
8742         create_trace_options_dir(tr);
8743
8744 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
8745         trace_create_maxlat_file(tr, d_tracer);
8746 #endif
8747
8748         if (ftrace_create_function_files(tr, d_tracer))
8749                 WARN(1, "Could not allocate function filter files");
8750
8751 #ifdef CONFIG_TRACER_SNAPSHOT
8752         trace_create_file("snapshot", 0644, d_tracer,
8753                           tr, &snapshot_fops);
8754 #endif
8755
8756         trace_create_file("error_log", 0644, d_tracer,
8757                           tr, &tracing_err_log_fops);
8758
8759         for_each_tracing_cpu(cpu)
8760                 tracing_init_tracefs_percpu(tr, cpu);
8761
8762         ftrace_init_tracefs(tr, d_tracer);
8763 }
8764
8765 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
8766 {
8767         struct vfsmount *mnt;
8768         struct file_system_type *type;
8769
8770         /*
8771          * To maintain backward compatibility for tools that mount
8772          * debugfs to get to the tracing facility, tracefs is automatically
8773          * mounted to the debugfs/tracing directory.
8774          */
8775         type = get_fs_type("tracefs");
8776         if (!type)
8777                 return NULL;
8778         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
8779         put_filesystem(type);
8780         if (IS_ERR(mnt))
8781                 return NULL;
8782         mntget(mnt);
8783
8784         return mnt;
8785 }
8786
8787 /**
8788  * tracing_init_dentry - initialize top level trace array
8789  *
8790  * This is called when creating files or directories in the tracing
8791  * directory. It is called via fs_initcall() by any of the boot up code
8792  * and expects to return the dentry of the top level tracing directory.
8793  */
8794 struct dentry *tracing_init_dentry(void)
8795 {
8796         struct trace_array *tr = &global_trace;
8797
8798         if (security_locked_down(LOCKDOWN_TRACEFS)) {
8799                 pr_warn("Tracing disabled due to lockdown\n");
8800                 return ERR_PTR(-EPERM);
8801         }
8802
8803         /* The top level trace array uses  NULL as parent */
8804         if (tr->dir)
8805                 return NULL;
8806
8807         if (WARN_ON(!tracefs_initialized()) ||
8808                 (IS_ENABLED(CONFIG_DEBUG_FS) &&
8809                  WARN_ON(!debugfs_initialized())))
8810                 return ERR_PTR(-ENODEV);
8811
8812         /*
8813          * As there may still be users that expect the tracing
8814          * files to exist in debugfs/tracing, we must automount
8815          * the tracefs file system there, so older tools still
8816          * work with the newer kerenl.
8817          */
8818         tr->dir = debugfs_create_automount("tracing", NULL,
8819                                            trace_automount, NULL);
8820
8821         return NULL;
8822 }
8823
8824 extern struct trace_eval_map *__start_ftrace_eval_maps[];
8825 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
8826
8827 static void __init trace_eval_init(void)
8828 {
8829         int len;
8830
8831         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
8832         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
8833 }
8834
8835 #ifdef CONFIG_MODULES
8836 static void trace_module_add_evals(struct module *mod)
8837 {
8838         if (!mod->num_trace_evals)
8839                 return;
8840
8841         /*
8842          * Modules with bad taint do not have events created, do
8843          * not bother with enums either.
8844          */
8845         if (trace_module_has_bad_taint(mod))
8846                 return;
8847
8848         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
8849 }
8850
8851 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
8852 static void trace_module_remove_evals(struct module *mod)
8853 {
8854         union trace_eval_map_item *map;
8855         union trace_eval_map_item **last = &trace_eval_maps;
8856
8857         if (!mod->num_trace_evals)
8858                 return;
8859
8860         mutex_lock(&trace_eval_mutex);
8861
8862         map = trace_eval_maps;
8863
8864         while (map) {
8865                 if (map->head.mod == mod)
8866                         break;
8867                 map = trace_eval_jmp_to_tail(map);
8868                 last = &map->tail.next;
8869                 map = map->tail.next;
8870         }
8871         if (!map)
8872                 goto out;
8873
8874         *last = trace_eval_jmp_to_tail(map)->tail.next;
8875         kfree(map);
8876  out:
8877         mutex_unlock(&trace_eval_mutex);
8878 }
8879 #else
8880 static inline void trace_module_remove_evals(struct module *mod) { }
8881 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
8882
8883 static int trace_module_notify(struct notifier_block *self,
8884                                unsigned long val, void *data)
8885 {
8886         struct module *mod = data;
8887
8888         switch (val) {
8889         case MODULE_STATE_COMING:
8890                 trace_module_add_evals(mod);
8891                 break;
8892         case MODULE_STATE_GOING:
8893                 trace_module_remove_evals(mod);
8894                 break;
8895         }
8896
8897         return 0;
8898 }
8899
8900 static struct notifier_block trace_module_nb = {
8901         .notifier_call = trace_module_notify,
8902         .priority = 0,
8903 };
8904 #endif /* CONFIG_MODULES */
8905
8906 static __init int tracer_init_tracefs(void)
8907 {
8908         struct dentry *d_tracer;
8909
8910         trace_access_lock_init();
8911
8912         d_tracer = tracing_init_dentry();
8913         if (IS_ERR(d_tracer))
8914                 return 0;
8915
8916         event_trace_init();
8917
8918         init_tracer_tracefs(&global_trace, d_tracer);
8919         ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
8920
8921         trace_create_file("tracing_thresh", 0644, d_tracer,
8922                         &global_trace, &tracing_thresh_fops);
8923
8924         trace_create_file("README", 0444, d_tracer,
8925                         NULL, &tracing_readme_fops);
8926
8927         trace_create_file("saved_cmdlines", 0444, d_tracer,
8928                         NULL, &tracing_saved_cmdlines_fops);
8929
8930         trace_create_file("saved_cmdlines_size", 0644, d_tracer,
8931                           NULL, &tracing_saved_cmdlines_size_fops);
8932
8933         trace_create_file("saved_tgids", 0444, d_tracer,
8934                         NULL, &tracing_saved_tgids_fops);
8935
8936         trace_eval_init();
8937
8938         trace_create_eval_file(d_tracer);
8939
8940 #ifdef CONFIG_MODULES
8941         register_module_notifier(&trace_module_nb);
8942 #endif
8943
8944 #ifdef CONFIG_DYNAMIC_FTRACE
8945         trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
8946                         NULL, &tracing_dyn_info_fops);
8947 #endif
8948
8949         create_trace_instances(d_tracer);
8950
8951         update_tracer_options(&global_trace);
8952
8953         return 0;
8954 }
8955
8956 static int trace_panic_handler(struct notifier_block *this,
8957                                unsigned long event, void *unused)
8958 {
8959         if (ftrace_dump_on_oops)
8960                 ftrace_dump(ftrace_dump_on_oops);
8961         return NOTIFY_OK;
8962 }
8963
8964 static struct notifier_block trace_panic_notifier = {
8965         .notifier_call  = trace_panic_handler,
8966         .next           = NULL,
8967         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
8968 };
8969
8970 static int trace_die_handler(struct notifier_block *self,
8971                              unsigned long val,
8972                              void *data)
8973 {
8974         switch (val) {
8975         case DIE_OOPS:
8976                 if (ftrace_dump_on_oops)
8977                         ftrace_dump(ftrace_dump_on_oops);
8978                 break;
8979         default:
8980                 break;
8981         }
8982         return NOTIFY_OK;
8983 }
8984
8985 static struct notifier_block trace_die_notifier = {
8986         .notifier_call = trace_die_handler,
8987         .priority = 200
8988 };
8989
8990 /*
8991  * printk is set to max of 1024, we really don't need it that big.
8992  * Nothing should be printing 1000 characters anyway.
8993  */
8994 #define TRACE_MAX_PRINT         1000
8995
8996 /*
8997  * Define here KERN_TRACE so that we have one place to modify
8998  * it if we decide to change what log level the ftrace dump
8999  * should be at.
9000  */
9001 #define KERN_TRACE              KERN_EMERG
9002
9003 void
9004 trace_printk_seq(struct trace_seq *s)
9005 {
9006         /* Probably should print a warning here. */
9007         if (s->seq.len >= TRACE_MAX_PRINT)
9008                 s->seq.len = TRACE_MAX_PRINT;
9009
9010         /*
9011          * More paranoid code. Although the buffer size is set to
9012          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9013          * an extra layer of protection.
9014          */
9015         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9016                 s->seq.len = s->seq.size - 1;
9017
9018         /* should be zero ended, but we are paranoid. */
9019         s->buffer[s->seq.len] = 0;
9020
9021         printk(KERN_TRACE "%s", s->buffer);
9022
9023         trace_seq_init(s);
9024 }
9025
9026 void trace_init_global_iter(struct trace_iterator *iter)
9027 {
9028         iter->tr = &global_trace;
9029         iter->trace = iter->tr->current_trace;
9030         iter->cpu_file = RING_BUFFER_ALL_CPUS;
9031         iter->trace_buffer = &global_trace.trace_buffer;
9032
9033         if (iter->trace && iter->trace->open)
9034                 iter->trace->open(iter);
9035
9036         /* Annotate start of buffers if we had overruns */
9037         if (ring_buffer_overruns(iter->trace_buffer->buffer))
9038                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
9039
9040         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
9041         if (trace_clocks[iter->tr->clock_id].in_ns)
9042                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9043 }
9044
9045 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9046 {
9047         /* use static because iter can be a bit big for the stack */
9048         static struct trace_iterator iter;
9049         static atomic_t dump_running;
9050         struct trace_array *tr = &global_trace;
9051         unsigned int old_userobj;
9052         unsigned long flags;
9053         int cnt = 0, cpu;
9054
9055         /* Only allow one dump user at a time. */
9056         if (atomic_inc_return(&dump_running) != 1) {
9057                 atomic_dec(&dump_running);
9058                 return;
9059         }
9060
9061         /*
9062          * Always turn off tracing when we dump.
9063          * We don't need to show trace output of what happens
9064          * between multiple crashes.
9065          *
9066          * If the user does a sysrq-z, then they can re-enable
9067          * tracing with echo 1 > tracing_on.
9068          */
9069         tracing_off();
9070
9071         local_irq_save(flags);
9072         printk_nmi_direct_enter();
9073
9074         /* Simulate the iterator */
9075         trace_init_global_iter(&iter);
9076
9077         for_each_tracing_cpu(cpu) {
9078                 atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
9079         }
9080
9081         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9082
9083         /* don't look at user memory in panic mode */
9084         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9085
9086         switch (oops_dump_mode) {
9087         case DUMP_ALL:
9088                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9089                 break;
9090         case DUMP_ORIG:
9091                 iter.cpu_file = raw_smp_processor_id();
9092                 break;
9093         case DUMP_NONE:
9094                 goto out_enable;
9095         default:
9096                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9097                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9098         }
9099
9100         printk(KERN_TRACE "Dumping ftrace buffer:\n");
9101
9102         /* Did function tracer already get disabled? */
9103         if (ftrace_is_dead()) {
9104                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9105                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
9106         }
9107
9108         /*
9109          * We need to stop all tracing on all CPUS to read the
9110          * the next buffer. This is a bit expensive, but is
9111          * not done often. We fill all what we can read,
9112          * and then release the locks again.
9113          */
9114
9115         while (!trace_empty(&iter)) {
9116
9117                 if (!cnt)
9118                         printk(KERN_TRACE "---------------------------------\n");
9119
9120                 cnt++;
9121
9122                 trace_iterator_reset(&iter);
9123                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
9124
9125                 if (trace_find_next_entry_inc(&iter) != NULL) {
9126                         int ret;
9127
9128                         ret = print_trace_line(&iter);
9129                         if (ret != TRACE_TYPE_NO_CONSUME)
9130                                 trace_consume(&iter);
9131                 }
9132                 touch_nmi_watchdog();
9133
9134                 trace_printk_seq(&iter.seq);
9135         }
9136
9137         if (!cnt)
9138                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
9139         else
9140                 printk(KERN_TRACE "---------------------------------\n");
9141
9142  out_enable:
9143         tr->trace_flags |= old_userobj;
9144
9145         for_each_tracing_cpu(cpu) {
9146                 atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
9147         }
9148         atomic_dec(&dump_running);
9149         printk_nmi_direct_exit();
9150         local_irq_restore(flags);
9151 }
9152 EXPORT_SYMBOL_GPL(ftrace_dump);
9153
9154 int trace_run_command(const char *buf, int (*createfn)(int, char **))
9155 {
9156         char **argv;
9157         int argc, ret;
9158
9159         argc = 0;
9160         ret = 0;
9161         argv = argv_split(GFP_KERNEL, buf, &argc);
9162         if (!argv)
9163                 return -ENOMEM;
9164
9165         if (argc)
9166                 ret = createfn(argc, argv);
9167
9168         argv_free(argv);
9169
9170         return ret;
9171 }
9172
9173 #define WRITE_BUFSIZE  4096
9174
9175 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9176                                 size_t count, loff_t *ppos,
9177                                 int (*createfn)(int, char **))
9178 {
9179         char *kbuf, *buf, *tmp;
9180         int ret = 0;
9181         size_t done = 0;
9182         size_t size;
9183
9184         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9185         if (!kbuf)
9186                 return -ENOMEM;
9187
9188         while (done < count) {
9189                 size = count - done;
9190
9191                 if (size >= WRITE_BUFSIZE)
9192                         size = WRITE_BUFSIZE - 1;
9193
9194                 if (copy_from_user(kbuf, buffer + done, size)) {
9195                         ret = -EFAULT;
9196                         goto out;
9197                 }
9198                 kbuf[size] = '\0';
9199                 buf = kbuf;
9200                 do {
9201                         tmp = strchr(buf, '\n');
9202                         if (tmp) {
9203                                 *tmp = '\0';
9204                                 size = tmp - buf + 1;
9205                         } else {
9206                                 size = strlen(buf);
9207                                 if (done + size < count) {
9208                                         if (buf != kbuf)
9209                                                 break;
9210                                         /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
9211                                         pr_warn("Line length is too long: Should be less than %d\n",
9212                                                 WRITE_BUFSIZE - 2);
9213                                         ret = -EINVAL;
9214                                         goto out;
9215                                 }
9216                         }
9217                         done += size;
9218
9219                         /* Remove comments */
9220                         tmp = strchr(buf, '#');
9221
9222                         if (tmp)
9223                                 *tmp = '\0';
9224
9225                         ret = trace_run_command(buf, createfn);
9226                         if (ret)
9227                                 goto out;
9228                         buf += size;
9229
9230                 } while (done < count);
9231         }
9232         ret = done;
9233
9234 out:
9235         kfree(kbuf);
9236
9237         return ret;
9238 }
9239
9240 __init static int tracer_alloc_buffers(void)
9241 {
9242         int ring_buf_size;
9243         int ret = -ENOMEM;
9244
9245
9246         if (security_locked_down(LOCKDOWN_TRACEFS)) {
9247                 pr_warn("Tracing disabled due to lockdown\n");
9248                 return -EPERM;
9249         }
9250
9251         /*
9252          * Make sure we don't accidently add more trace options
9253          * than we have bits for.
9254          */
9255         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
9256
9257         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
9258                 goto out;
9259
9260         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
9261                 goto out_free_buffer_mask;
9262
9263         /* Only allocate trace_printk buffers if a trace_printk exists */
9264         if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
9265                 /* Must be called before global_trace.buffer is allocated */
9266                 trace_printk_init_buffers();
9267
9268         /* To save memory, keep the ring buffer size to its minimum */
9269         if (ring_buffer_expanded)
9270                 ring_buf_size = trace_buf_size;
9271         else
9272                 ring_buf_size = 1;
9273
9274         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
9275         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
9276
9277         raw_spin_lock_init(&global_trace.start_lock);
9278
9279         /*
9280          * The prepare callbacks allocates some memory for the ring buffer. We
9281          * don't free the buffer if the if the CPU goes down. If we were to free
9282          * the buffer, then the user would lose any trace that was in the
9283          * buffer. The memory will be removed once the "instance" is removed.
9284          */
9285         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
9286                                       "trace/RB:preapre", trace_rb_cpu_prepare,
9287                                       NULL);
9288         if (ret < 0)
9289                 goto out_free_cpumask;
9290         /* Used for event triggers */
9291         ret = -ENOMEM;
9292         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
9293         if (!temp_buffer)
9294                 goto out_rm_hp_state;
9295
9296         if (trace_create_savedcmd() < 0)
9297                 goto out_free_temp_buffer;
9298
9299         /* TODO: make the number of buffers hot pluggable with CPUS */
9300         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
9301                 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
9302                 WARN_ON(1);
9303                 goto out_free_savedcmd;
9304         }
9305
9306         if (global_trace.buffer_disabled)
9307                 tracing_off();
9308
9309         if (trace_boot_clock) {
9310                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
9311                 if (ret < 0)
9312                         pr_warn("Trace clock %s not defined, going back to default\n",
9313                                 trace_boot_clock);
9314         }
9315
9316         /*
9317          * register_tracer() might reference current_trace, so it
9318          * needs to be set before we register anything. This is
9319          * just a bootstrap of current_trace anyway.
9320          */
9321         global_trace.current_trace = &nop_trace;
9322
9323         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9324
9325         ftrace_init_global_array_ops(&global_trace);
9326
9327         init_trace_flags_index(&global_trace);
9328
9329         register_tracer(&nop_trace);
9330
9331         /* Function tracing may start here (via kernel command line) */
9332         init_function_trace();
9333
9334         /* All seems OK, enable tracing */
9335         tracing_disabled = 0;
9336
9337         atomic_notifier_chain_register(&panic_notifier_list,
9338                                        &trace_panic_notifier);
9339
9340         register_die_notifier(&trace_die_notifier);
9341
9342         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
9343
9344         INIT_LIST_HEAD(&global_trace.systems);
9345         INIT_LIST_HEAD(&global_trace.events);
9346         INIT_LIST_HEAD(&global_trace.hist_vars);
9347         INIT_LIST_HEAD(&global_trace.err_log);
9348         list_add(&global_trace.list, &ftrace_trace_arrays);
9349
9350         apply_trace_boot_options();
9351
9352         register_snapshot_cmd();
9353
9354         return 0;
9355
9356 out_free_savedcmd:
9357         free_saved_cmdlines_buffer(savedcmd);
9358 out_free_temp_buffer:
9359         ring_buffer_free(temp_buffer);
9360 out_rm_hp_state:
9361         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
9362 out_free_cpumask:
9363         free_cpumask_var(global_trace.tracing_cpumask);
9364 out_free_buffer_mask:
9365         free_cpumask_var(tracing_buffer_mask);
9366 out:
9367         return ret;
9368 }
9369
9370 void __init early_trace_init(void)
9371 {
9372         if (tracepoint_printk) {
9373                 tracepoint_print_iter =
9374                         kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
9375                 if (WARN_ON(!tracepoint_print_iter))
9376                         tracepoint_printk = 0;
9377                 else
9378                         static_key_enable(&tracepoint_printk_key.key);
9379         }
9380         tracer_alloc_buffers();
9381 }
9382
9383 void __init trace_init(void)
9384 {
9385         trace_event_init();
9386 }
9387
9388 __init static int clear_boot_tracer(void)
9389 {
9390         /*
9391          * The default tracer at boot buffer is an init section.
9392          * This function is called in lateinit. If we did not
9393          * find the boot tracer, then clear it out, to prevent
9394          * later registration from accessing the buffer that is
9395          * about to be freed.
9396          */
9397         if (!default_bootup_tracer)
9398                 return 0;
9399
9400         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
9401                default_bootup_tracer);
9402         default_bootup_tracer = NULL;
9403
9404         return 0;
9405 }
9406
9407 fs_initcall(tracer_init_tracefs);
9408 late_initcall_sync(clear_boot_tracer);
9409
9410 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
9411 __init static int tracing_set_default_clock(void)
9412 {
9413         /* sched_clock_stable() is determined in late_initcall */
9414         if (!trace_boot_clock && !sched_clock_stable()) {
9415                 printk(KERN_WARNING
9416                        "Unstable clock detected, switching default tracing clock to \"global\"\n"
9417                        "If you want to keep using the local clock, then add:\n"
9418                        "  \"trace_clock=local\"\n"
9419                        "on the kernel command line\n");
9420                 tracing_set_clock(&global_trace, "global");
9421         }
9422
9423         return 0;
9424 }
9425 late_initcall_sync(tracing_set_default_clock);
9426 #endif