]> asedeno.scripts.mit.edu Git - linux.git/blob - arch/x86/net/bpf_jit_comp.c
x86/bpf: Clean up non-standard comments, to make the code more readable
[linux.git] / arch / x86 / net / bpf_jit_comp.c
1 /*
2  * bpf_jit_comp.c: BPF JIT compiler
3  *
4  * Copyright (C) 2011-2013 Eric Dumazet (eric.dumazet@gmail.com)
5  * Internal BPF Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License
9  * as published by the Free Software Foundation; version 2
10  * of the License.
11  */
12 #include <linux/netdevice.h>
13 #include <linux/filter.h>
14 #include <linux/if_vlan.h>
15 #include <linux/bpf.h>
16
17 #include <asm/set_memory.h>
18 #include <asm/nospec-branch.h>
19
20 /*
21  * Assembly code in arch/x86/net/bpf_jit.S
22  */
23 extern u8 sk_load_word[], sk_load_half[], sk_load_byte[];
24 extern u8 sk_load_word_positive_offset[], sk_load_half_positive_offset[];
25 extern u8 sk_load_byte_positive_offset[];
26 extern u8 sk_load_word_negative_offset[], sk_load_half_negative_offset[];
27 extern u8 sk_load_byte_negative_offset[];
28
29 static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len)
30 {
31         if (len == 1)
32                 *ptr = bytes;
33         else if (len == 2)
34                 *(u16 *)ptr = bytes;
35         else {
36                 *(u32 *)ptr = bytes;
37                 barrier();
38         }
39         return ptr + len;
40 }
41
42 #define EMIT(bytes, len) \
43         do { prog = emit_code(prog, bytes, len); cnt += len; } while (0)
44
45 #define EMIT1(b1)               EMIT(b1, 1)
46 #define EMIT2(b1, b2)           EMIT((b1) + ((b2) << 8), 2)
47 #define EMIT3(b1, b2, b3)       EMIT((b1) + ((b2) << 8) + ((b3) << 16), 3)
48 #define EMIT4(b1, b2, b3, b4)   EMIT((b1) + ((b2) << 8) + ((b3) << 16) + ((b4) << 24), 4)
49
50 #define EMIT1_off32(b1, off) \
51         do { EMIT1(b1); EMIT(off, 4); } while (0)
52 #define EMIT2_off32(b1, b2, off) \
53         do { EMIT2(b1, b2); EMIT(off, 4); } while (0)
54 #define EMIT3_off32(b1, b2, b3, off) \
55         do { EMIT3(b1, b2, b3); EMIT(off, 4); } while (0)
56 #define EMIT4_off32(b1, b2, b3, b4, off) \
57         do { EMIT4(b1, b2, b3, b4); EMIT(off, 4); } while (0)
58
59 static bool is_imm8(int value)
60 {
61         return value <= 127 && value >= -128;
62 }
63
64 static bool is_simm32(s64 value)
65 {
66         return value == (s64)(s32)value;
67 }
68
69 static bool is_uimm32(u64 value)
70 {
71         return value == (u64)(u32)value;
72 }
73
74 /* mov dst, src */
75 #define EMIT_mov(DST, SRC)                                                               \
76         do {                                                                             \
77                 if (DST != SRC)                                                          \
78                         EMIT3(add_2mod(0x48, DST, SRC), 0x89, add_2reg(0xC0, DST, SRC)); \
79         } while (0)
80
81 static int bpf_size_to_x86_bytes(int bpf_size)
82 {
83         if (bpf_size == BPF_W)
84                 return 4;
85         else if (bpf_size == BPF_H)
86                 return 2;
87         else if (bpf_size == BPF_B)
88                 return 1;
89         else if (bpf_size == BPF_DW)
90                 return 4; /* imm32 */
91         else
92                 return 0;
93 }
94
95 /*
96  * List of x86 cond jumps opcodes (. + s8)
97  * Add 0x10 (and an extra 0x0f) to generate far jumps (. + s32)
98  */
99 #define X86_JB  0x72
100 #define X86_JAE 0x73
101 #define X86_JE  0x74
102 #define X86_JNE 0x75
103 #define X86_JBE 0x76
104 #define X86_JA  0x77
105 #define X86_JL  0x7C
106 #define X86_JGE 0x7D
107 #define X86_JLE 0x7E
108 #define X86_JG  0x7F
109
110 #define CHOOSE_LOAD_FUNC(K, func) \
111         ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset)
112
113 /* Pick a register outside of BPF range for JIT internal work */
114 #define AUX_REG (MAX_BPF_JIT_REG + 1)
115
116 /*
117  * The following table maps BPF registers to x86-64 registers.
118  *
119  * x86-64 register R12 is unused, since if used as base address
120  * register in load/store instructions, it always needs an
121  * extra byte of encoding and is callee saved.
122  *
123  * R9  caches skb->len - skb->data_len
124  * R10 caches skb->data, and used for blinding (if enabled)
125  */
126 static const int reg2hex[] = {
127         [BPF_REG_0] = 0,  /* RAX */
128         [BPF_REG_1] = 7,  /* RDI */
129         [BPF_REG_2] = 6,  /* RSI */
130         [BPF_REG_3] = 2,  /* RDX */
131         [BPF_REG_4] = 1,  /* RCX */
132         [BPF_REG_5] = 0,  /* R8  */
133         [BPF_REG_6] = 3,  /* RBX callee saved */
134         [BPF_REG_7] = 5,  /* R13 callee saved */
135         [BPF_REG_8] = 6,  /* R14 callee saved */
136         [BPF_REG_9] = 7,  /* R15 callee saved */
137         [BPF_REG_FP] = 5, /* RBP readonly */
138         [BPF_REG_AX] = 2, /* R10 temp register */
139         [AUX_REG] = 3,    /* R11 temp register */
140 };
141
142 /*
143  * is_ereg() == true if BPF register 'reg' maps to x86-64 r8..r15
144  * which need extra byte of encoding.
145  * rax,rcx,...,rbp have simpler encoding
146  */
147 static bool is_ereg(u32 reg)
148 {
149         return (1 << reg) & (BIT(BPF_REG_5) |
150                              BIT(AUX_REG) |
151                              BIT(BPF_REG_7) |
152                              BIT(BPF_REG_8) |
153                              BIT(BPF_REG_9) |
154                              BIT(BPF_REG_AX));
155 }
156
157 static bool is_axreg(u32 reg)
158 {
159         return reg == BPF_REG_0;
160 }
161
162 /* Add modifiers if 'reg' maps to x86-64 registers R8..R15 */
163 static u8 add_1mod(u8 byte, u32 reg)
164 {
165         if (is_ereg(reg))
166                 byte |= 1;
167         return byte;
168 }
169
170 static u8 add_2mod(u8 byte, u32 r1, u32 r2)
171 {
172         if (is_ereg(r1))
173                 byte |= 1;
174         if (is_ereg(r2))
175                 byte |= 4;
176         return byte;
177 }
178
179 /* Encode 'dst_reg' register into x86-64 opcode 'byte' */
180 static u8 add_1reg(u8 byte, u32 dst_reg)
181 {
182         return byte + reg2hex[dst_reg];
183 }
184
185 /* Encode 'dst_reg' and 'src_reg' registers into x86-64 opcode 'byte' */
186 static u8 add_2reg(u8 byte, u32 dst_reg, u32 src_reg)
187 {
188         return byte + reg2hex[dst_reg] + (reg2hex[src_reg] << 3);
189 }
190
191 static void jit_fill_hole(void *area, unsigned int size)
192 {
193         /* Fill whole space with INT3 instructions */
194         memset(area, 0xcc, size);
195 }
196
197 struct jit_context {
198         int cleanup_addr; /* Epilogue code offset */
199         bool seen_ld_abs;
200         bool seen_ax_reg;
201 };
202
203 /* Maximum number of bytes emitted while JITing one eBPF insn */
204 #define BPF_MAX_INSN_SIZE       128
205 #define BPF_INSN_SAFETY         64
206
207 #define AUX_STACK_SPACE \
208         (32 /* Space for RBX, R13, R14, R15 */ + \
209           8 /* Space for skb_copy_bits() buffer */)
210
211 #define PROLOGUE_SIZE 37
212
213 /*
214  * Emit x86-64 prologue code for BPF program and check its size.
215  * bpf_tail_call helper will skip it while jumping into another program
216  */
217 static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf)
218 {
219         u8 *prog = *pprog;
220         int cnt = 0;
221
222         /* push rbp */
223         EMIT1(0x55);
224
225         /* mov rbp,rsp */
226         EMIT3(0x48, 0x89, 0xE5);
227
228         /* sub rsp, rounded_stack_depth + AUX_STACK_SPACE */
229         EMIT3_off32(0x48, 0x81, 0xEC,
230                     round_up(stack_depth, 8) + AUX_STACK_SPACE);
231
232         /* sub rbp, AUX_STACK_SPACE */
233         EMIT4(0x48, 0x83, 0xED, AUX_STACK_SPACE);
234
235         /* All classic BPF filters use R6(rbx) save it */
236
237         /* mov qword ptr [rbp+0],rbx */
238         EMIT4(0x48, 0x89, 0x5D, 0);
239
240         /*
241          * bpf_convert_filter() maps classic BPF register X to R7 and uses R8
242          * as temporary, so all tcpdump filters need to spill/fill R7(R13) and
243          * R8(R14). R9(R15) spill could be made conditional, but there is only
244          * one 'bpf_error' return path out of helper functions inside bpf_jit.S
245          * The overhead of extra spill is negligible for any filter other
246          * than synthetic ones. Therefore not worth adding complexity.
247          */
248
249         /* mov qword ptr [rbp+8],r13 */
250         EMIT4(0x4C, 0x89, 0x6D, 8);
251         /* mov qword ptr [rbp+16],r14 */
252         EMIT4(0x4C, 0x89, 0x75, 16);
253         /* mov qword ptr [rbp+24],r15 */
254         EMIT4(0x4C, 0x89, 0x7D, 24);
255
256         if (!ebpf_from_cbpf) {
257                 /*
258                  * Clear the tail call counter (tail_call_cnt): for eBPF tail
259                  * calls we need to reset the counter to 0. It's done in two
260                  * instructions, resetting RAX register to 0, and moving it
261                  * to the counter location.
262                  */
263
264                 /* xor eax, eax */
265                 EMIT2(0x31, 0xc0);
266                 /* mov qword ptr [rbp+32], rax */
267                 EMIT4(0x48, 0x89, 0x45, 32);
268
269                 BUILD_BUG_ON(cnt != PROLOGUE_SIZE);
270         }
271
272         *pprog = prog;
273 }
274
275 /*
276  * Generate the following code:
277  *
278  * ... bpf_tail_call(void *ctx, struct bpf_array *array, u64 index) ...
279  *   if (index >= array->map.max_entries)
280  *     goto out;
281  *   if (++tail_call_cnt > MAX_TAIL_CALL_CNT)
282  *     goto out;
283  *   prog = array->ptrs[index];
284  *   if (prog == NULL)
285  *     goto out;
286  *   goto *(prog->bpf_func + prologue_size);
287  * out:
288  */
289 static void emit_bpf_tail_call(u8 **pprog)
290 {
291         u8 *prog = *pprog;
292         int label1, label2, label3;
293         int cnt = 0;
294
295         /*
296          * rdi - pointer to ctx
297          * rsi - pointer to bpf_array
298          * rdx - index in bpf_array
299          */
300
301         /*
302          * if (index >= array->map.max_entries)
303          *      goto out;
304          */
305         EMIT2(0x89, 0xD2);                        /* mov edx, edx */
306         EMIT3(0x39, 0x56,                         /* cmp dword ptr [rsi + 16], edx */
307               offsetof(struct bpf_array, map.max_entries));
308 #define OFFSET1 (41 + RETPOLINE_RAX_BPF_JIT_SIZE) /* Number of bytes to jump */
309         EMIT2(X86_JBE, OFFSET1);                  /* jbe out */
310         label1 = cnt;
311
312         /*
313          * if (tail_call_cnt > MAX_TAIL_CALL_CNT)
314          *      goto out;
315          */
316         EMIT2_off32(0x8B, 0x85, 36);              /* mov eax, dword ptr [rbp + 36] */
317         EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT);     /* cmp eax, MAX_TAIL_CALL_CNT */
318 #define OFFSET2 (30 + RETPOLINE_RAX_BPF_JIT_SIZE)
319         EMIT2(X86_JA, OFFSET2);                   /* ja out */
320         label2 = cnt;
321         EMIT3(0x83, 0xC0, 0x01);                  /* add eax, 1 */
322         EMIT2_off32(0x89, 0x85, 36);              /* mov dword ptr [rbp + 36], eax */
323
324         /* prog = array->ptrs[index]; */
325         EMIT4_off32(0x48, 0x8B, 0x84, 0xD6,       /* mov rax, [rsi + rdx * 8 + offsetof(...)] */
326                     offsetof(struct bpf_array, ptrs));
327
328         /*
329          * if (prog == NULL)
330          *      goto out;
331          */
332         EMIT3(0x48, 0x85, 0xC0);                  /* test rax,rax */
333 #define OFFSET3 (8 + RETPOLINE_RAX_BPF_JIT_SIZE)
334         EMIT2(X86_JE, OFFSET3);                   /* je out */
335         label3 = cnt;
336
337         /* goto *(prog->bpf_func + prologue_size); */
338         EMIT4(0x48, 0x8B, 0x40,                   /* mov rax, qword ptr [rax + 32] */
339               offsetof(struct bpf_prog, bpf_func));
340         EMIT4(0x48, 0x83, 0xC0, PROLOGUE_SIZE);   /* add rax, prologue_size */
341
342         /*
343          * Wow we're ready to jump into next BPF program
344          * rdi == ctx (1st arg)
345          * rax == prog->bpf_func + prologue_size
346          */
347         RETPOLINE_RAX_BPF_JIT();
348
349         /* out: */
350         BUILD_BUG_ON(cnt - label1 != OFFSET1);
351         BUILD_BUG_ON(cnt - label2 != OFFSET2);
352         BUILD_BUG_ON(cnt - label3 != OFFSET3);
353         *pprog = prog;
354 }
355
356
357 static void emit_load_skb_data_hlen(u8 **pprog)
358 {
359         u8 *prog = *pprog;
360         int cnt = 0;
361
362         /*
363          * r9d = skb->len - skb->data_len (headlen)
364          * r10 = skb->data
365          */
366         /* mov %r9d, off32(%rdi) */
367         EMIT3_off32(0x44, 0x8b, 0x8f, offsetof(struct sk_buff, len));
368
369         /* sub %r9d, off32(%rdi) */
370         EMIT3_off32(0x44, 0x2b, 0x8f, offsetof(struct sk_buff, data_len));
371
372         /* mov %r10, off32(%rdi) */
373         EMIT3_off32(0x4c, 0x8b, 0x97, offsetof(struct sk_buff, data));
374         *pprog = prog;
375 }
376
377 static void emit_mov_imm32(u8 **pprog, bool sign_propagate,
378                            u32 dst_reg, const u32 imm32)
379 {
380         u8 *prog = *pprog;
381         u8 b1, b2, b3;
382         int cnt = 0;
383
384         /*
385          * Optimization: if imm32 is positive, use 'mov %eax, imm32'
386          * (which zero-extends imm32) to save 2 bytes.
387          */
388         if (sign_propagate && (s32)imm32 < 0) {
389                 /* 'mov %rax, imm32' sign extends imm32 */
390                 b1 = add_1mod(0x48, dst_reg);
391                 b2 = 0xC7;
392                 b3 = 0xC0;
393                 EMIT3_off32(b1, b2, add_1reg(b3, dst_reg), imm32);
394                 goto done;
395         }
396
397         /*
398          * Optimization: if imm32 is zero, use 'xor %eax, %eax'
399          * to save 3 bytes.
400          */
401         if (imm32 == 0) {
402                 if (is_ereg(dst_reg))
403                         EMIT1(add_2mod(0x40, dst_reg, dst_reg));
404                 b2 = 0x31; /* xor */
405                 b3 = 0xC0;
406                 EMIT2(b2, add_2reg(b3, dst_reg, dst_reg));
407                 goto done;
408         }
409
410         /* mov %eax, imm32 */
411         if (is_ereg(dst_reg))
412                 EMIT1(add_1mod(0x40, dst_reg));
413         EMIT1_off32(add_1reg(0xB8, dst_reg), imm32);
414 done:
415         *pprog = prog;
416 }
417
418 static void emit_mov_imm64(u8 **pprog, u32 dst_reg,
419                            const u32 imm32_hi, const u32 imm32_lo)
420 {
421         u8 *prog = *pprog;
422         int cnt = 0;
423
424         if (is_uimm32(((u64)imm32_hi << 32) | (u32)imm32_lo)) {
425                 /*
426                  * For emitting plain u32, where sign bit must not be
427                  * propagated LLVM tends to load imm64 over mov32
428                  * directly, so save couple of bytes by just doing
429                  * 'mov %eax, imm32' instead.
430                  */
431                 emit_mov_imm32(&prog, false, dst_reg, imm32_lo);
432         } else {
433                 /* movabsq %rax, imm64 */
434                 EMIT2(add_1mod(0x48, dst_reg), add_1reg(0xB8, dst_reg));
435                 EMIT(imm32_lo, 4);
436                 EMIT(imm32_hi, 4);
437         }
438
439         *pprog = prog;
440 }
441
442 static void emit_mov_reg(u8 **pprog, bool is64, u32 dst_reg, u32 src_reg)
443 {
444         u8 *prog = *pprog;
445         int cnt = 0;
446
447         if (is64) {
448                 /* mov dst, src */
449                 EMIT_mov(dst_reg, src_reg);
450         } else {
451                 /* mov32 dst, src */
452                 if (is_ereg(dst_reg) || is_ereg(src_reg))
453                         EMIT1(add_2mod(0x40, dst_reg, src_reg));
454                 EMIT2(0x89, add_2reg(0xC0, dst_reg, src_reg));
455         }
456
457         *pprog = prog;
458 }
459
460 static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
461                   int oldproglen, struct jit_context *ctx)
462 {
463         struct bpf_insn *insn = bpf_prog->insnsi;
464         int insn_cnt = bpf_prog->len;
465         bool seen_ld_abs = ctx->seen_ld_abs | (oldproglen == 0);
466         bool seen_ax_reg = ctx->seen_ax_reg | (oldproglen == 0);
467         bool seen_exit = false;
468         u8 temp[BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY];
469         int i, cnt = 0;
470         int proglen = 0;
471         u8 *prog = temp;
472
473         emit_prologue(&prog, bpf_prog->aux->stack_depth,
474                       bpf_prog_was_classic(bpf_prog));
475
476         if (seen_ld_abs)
477                 emit_load_skb_data_hlen(&prog);
478
479         for (i = 0; i < insn_cnt; i++, insn++) {
480                 const s32 imm32 = insn->imm;
481                 u32 dst_reg = insn->dst_reg;
482                 u32 src_reg = insn->src_reg;
483                 u8 b2 = 0, b3 = 0;
484                 s64 jmp_offset;
485                 u8 jmp_cond;
486                 bool reload_skb_data;
487                 int ilen;
488                 u8 *func;
489
490                 if (dst_reg == BPF_REG_AX || src_reg == BPF_REG_AX)
491                         ctx->seen_ax_reg = seen_ax_reg = true;
492
493                 switch (insn->code) {
494                         /* ALU */
495                 case BPF_ALU | BPF_ADD | BPF_X:
496                 case BPF_ALU | BPF_SUB | BPF_X:
497                 case BPF_ALU | BPF_AND | BPF_X:
498                 case BPF_ALU | BPF_OR | BPF_X:
499                 case BPF_ALU | BPF_XOR | BPF_X:
500                 case BPF_ALU64 | BPF_ADD | BPF_X:
501                 case BPF_ALU64 | BPF_SUB | BPF_X:
502                 case BPF_ALU64 | BPF_AND | BPF_X:
503                 case BPF_ALU64 | BPF_OR | BPF_X:
504                 case BPF_ALU64 | BPF_XOR | BPF_X:
505                         switch (BPF_OP(insn->code)) {
506                         case BPF_ADD: b2 = 0x01; break;
507                         case BPF_SUB: b2 = 0x29; break;
508                         case BPF_AND: b2 = 0x21; break;
509                         case BPF_OR: b2 = 0x09; break;
510                         case BPF_XOR: b2 = 0x31; break;
511                         }
512                         if (BPF_CLASS(insn->code) == BPF_ALU64)
513                                 EMIT1(add_2mod(0x48, dst_reg, src_reg));
514                         else if (is_ereg(dst_reg) || is_ereg(src_reg))
515                                 EMIT1(add_2mod(0x40, dst_reg, src_reg));
516                         EMIT2(b2, add_2reg(0xC0, dst_reg, src_reg));
517                         break;
518
519                 case BPF_ALU64 | BPF_MOV | BPF_X:
520                 case BPF_ALU | BPF_MOV | BPF_X:
521                         emit_mov_reg(&prog,
522                                      BPF_CLASS(insn->code) == BPF_ALU64,
523                                      dst_reg, src_reg);
524                         break;
525
526                         /* neg dst */
527                 case BPF_ALU | BPF_NEG:
528                 case BPF_ALU64 | BPF_NEG:
529                         if (BPF_CLASS(insn->code) == BPF_ALU64)
530                                 EMIT1(add_1mod(0x48, dst_reg));
531                         else if (is_ereg(dst_reg))
532                                 EMIT1(add_1mod(0x40, dst_reg));
533                         EMIT2(0xF7, add_1reg(0xD8, dst_reg));
534                         break;
535
536                 case BPF_ALU | BPF_ADD | BPF_K:
537                 case BPF_ALU | BPF_SUB | BPF_K:
538                 case BPF_ALU | BPF_AND | BPF_K:
539                 case BPF_ALU | BPF_OR | BPF_K:
540                 case BPF_ALU | BPF_XOR | BPF_K:
541                 case BPF_ALU64 | BPF_ADD | BPF_K:
542                 case BPF_ALU64 | BPF_SUB | BPF_K:
543                 case BPF_ALU64 | BPF_AND | BPF_K:
544                 case BPF_ALU64 | BPF_OR | BPF_K:
545                 case BPF_ALU64 | BPF_XOR | BPF_K:
546                         if (BPF_CLASS(insn->code) == BPF_ALU64)
547                                 EMIT1(add_1mod(0x48, dst_reg));
548                         else if (is_ereg(dst_reg))
549                                 EMIT1(add_1mod(0x40, dst_reg));
550
551                         /*
552                          * b3 holds 'normal' opcode, b2 short form only valid
553                          * in case dst is eax/rax.
554                          */
555                         switch (BPF_OP(insn->code)) {
556                         case BPF_ADD:
557                                 b3 = 0xC0;
558                                 b2 = 0x05;
559                                 break;
560                         case BPF_SUB:
561                                 b3 = 0xE8;
562                                 b2 = 0x2D;
563                                 break;
564                         case BPF_AND:
565                                 b3 = 0xE0;
566                                 b2 = 0x25;
567                                 break;
568                         case BPF_OR:
569                                 b3 = 0xC8;
570                                 b2 = 0x0D;
571                                 break;
572                         case BPF_XOR:
573                                 b3 = 0xF0;
574                                 b2 = 0x35;
575                                 break;
576                         }
577
578                         if (is_imm8(imm32))
579                                 EMIT3(0x83, add_1reg(b3, dst_reg), imm32);
580                         else if (is_axreg(dst_reg))
581                                 EMIT1_off32(b2, imm32);
582                         else
583                                 EMIT2_off32(0x81, add_1reg(b3, dst_reg), imm32);
584                         break;
585
586                 case BPF_ALU64 | BPF_MOV | BPF_K:
587                 case BPF_ALU | BPF_MOV | BPF_K:
588                         emit_mov_imm32(&prog, BPF_CLASS(insn->code) == BPF_ALU64,
589                                        dst_reg, imm32);
590                         break;
591
592                 case BPF_LD | BPF_IMM | BPF_DW:
593                         emit_mov_imm64(&prog, dst_reg, insn[1].imm, insn[0].imm);
594                         insn++;
595                         i++;
596                         break;
597
598                         /* dst %= src, dst /= src, dst %= imm32, dst /= imm32 */
599                 case BPF_ALU | BPF_MOD | BPF_X:
600                 case BPF_ALU | BPF_DIV | BPF_X:
601                 case BPF_ALU | BPF_MOD | BPF_K:
602                 case BPF_ALU | BPF_DIV | BPF_K:
603                 case BPF_ALU64 | BPF_MOD | BPF_X:
604                 case BPF_ALU64 | BPF_DIV | BPF_X:
605                 case BPF_ALU64 | BPF_MOD | BPF_K:
606                 case BPF_ALU64 | BPF_DIV | BPF_K:
607                         EMIT1(0x50); /* push rax */
608                         EMIT1(0x52); /* push rdx */
609
610                         if (BPF_SRC(insn->code) == BPF_X)
611                                 /* mov r11, src_reg */
612                                 EMIT_mov(AUX_REG, src_reg);
613                         else
614                                 /* mov r11, imm32 */
615                                 EMIT3_off32(0x49, 0xC7, 0xC3, imm32);
616
617                         /* mov rax, dst_reg */
618                         EMIT_mov(BPF_REG_0, dst_reg);
619
620                         /*
621                          * xor edx, edx
622                          * equivalent to 'xor rdx, rdx', but one byte less
623                          */
624                         EMIT2(0x31, 0xd2);
625
626                         if (BPF_CLASS(insn->code) == BPF_ALU64)
627                                 /* div r11 */
628                                 EMIT3(0x49, 0xF7, 0xF3);
629                         else
630                                 /* div r11d */
631                                 EMIT3(0x41, 0xF7, 0xF3);
632
633                         if (BPF_OP(insn->code) == BPF_MOD)
634                                 /* mov r11, rdx */
635                                 EMIT3(0x49, 0x89, 0xD3);
636                         else
637                                 /* mov r11, rax */
638                                 EMIT3(0x49, 0x89, 0xC3);
639
640                         EMIT1(0x5A); /* pop rdx */
641                         EMIT1(0x58); /* pop rax */
642
643                         /* mov dst_reg, r11 */
644                         EMIT_mov(dst_reg, AUX_REG);
645                         break;
646
647                 case BPF_ALU | BPF_MUL | BPF_K:
648                 case BPF_ALU | BPF_MUL | BPF_X:
649                 case BPF_ALU64 | BPF_MUL | BPF_K:
650                 case BPF_ALU64 | BPF_MUL | BPF_X:
651                 {
652                         bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
653
654                         if (dst_reg != BPF_REG_0)
655                                 EMIT1(0x50); /* push rax */
656                         if (dst_reg != BPF_REG_3)
657                                 EMIT1(0x52); /* push rdx */
658
659                         /* mov r11, dst_reg */
660                         EMIT_mov(AUX_REG, dst_reg);
661
662                         if (BPF_SRC(insn->code) == BPF_X)
663                                 emit_mov_reg(&prog, is64, BPF_REG_0, src_reg);
664                         else
665                                 emit_mov_imm32(&prog, is64, BPF_REG_0, imm32);
666
667                         if (is64)
668                                 EMIT1(add_1mod(0x48, AUX_REG));
669                         else if (is_ereg(AUX_REG))
670                                 EMIT1(add_1mod(0x40, AUX_REG));
671                         /* mul(q) r11 */
672                         EMIT2(0xF7, add_1reg(0xE0, AUX_REG));
673
674                         if (dst_reg != BPF_REG_3)
675                                 EMIT1(0x5A); /* pop rdx */
676                         if (dst_reg != BPF_REG_0) {
677                                 /* mov dst_reg, rax */
678                                 EMIT_mov(dst_reg, BPF_REG_0);
679                                 EMIT1(0x58); /* pop rax */
680                         }
681                         break;
682                 }
683                         /* Shifts */
684                 case BPF_ALU | BPF_LSH | BPF_K:
685                 case BPF_ALU | BPF_RSH | BPF_K:
686                 case BPF_ALU | BPF_ARSH | BPF_K:
687                 case BPF_ALU64 | BPF_LSH | BPF_K:
688                 case BPF_ALU64 | BPF_RSH | BPF_K:
689                 case BPF_ALU64 | BPF_ARSH | BPF_K:
690                         if (BPF_CLASS(insn->code) == BPF_ALU64)
691                                 EMIT1(add_1mod(0x48, dst_reg));
692                         else if (is_ereg(dst_reg))
693                                 EMIT1(add_1mod(0x40, dst_reg));
694
695                         switch (BPF_OP(insn->code)) {
696                         case BPF_LSH: b3 = 0xE0; break;
697                         case BPF_RSH: b3 = 0xE8; break;
698                         case BPF_ARSH: b3 = 0xF8; break;
699                         }
700
701                         if (imm32 == 1)
702                                 EMIT2(0xD1, add_1reg(b3, dst_reg));
703                         else
704                                 EMIT3(0xC1, add_1reg(b3, dst_reg), imm32);
705                         break;
706
707                 case BPF_ALU | BPF_LSH | BPF_X:
708                 case BPF_ALU | BPF_RSH | BPF_X:
709                 case BPF_ALU | BPF_ARSH | BPF_X:
710                 case BPF_ALU64 | BPF_LSH | BPF_X:
711                 case BPF_ALU64 | BPF_RSH | BPF_X:
712                 case BPF_ALU64 | BPF_ARSH | BPF_X:
713
714                         /* Check for bad case when dst_reg == rcx */
715                         if (dst_reg == BPF_REG_4) {
716                                 /* mov r11, dst_reg */
717                                 EMIT_mov(AUX_REG, dst_reg);
718                                 dst_reg = AUX_REG;
719                         }
720
721                         if (src_reg != BPF_REG_4) { /* common case */
722                                 EMIT1(0x51); /* push rcx */
723
724                                 /* mov rcx, src_reg */
725                                 EMIT_mov(BPF_REG_4, src_reg);
726                         }
727
728                         /* shl %rax, %cl | shr %rax, %cl | sar %rax, %cl */
729                         if (BPF_CLASS(insn->code) == BPF_ALU64)
730                                 EMIT1(add_1mod(0x48, dst_reg));
731                         else if (is_ereg(dst_reg))
732                                 EMIT1(add_1mod(0x40, dst_reg));
733
734                         switch (BPF_OP(insn->code)) {
735                         case BPF_LSH: b3 = 0xE0; break;
736                         case BPF_RSH: b3 = 0xE8; break;
737                         case BPF_ARSH: b3 = 0xF8; break;
738                         }
739                         EMIT2(0xD3, add_1reg(b3, dst_reg));
740
741                         if (src_reg != BPF_REG_4)
742                                 EMIT1(0x59); /* pop rcx */
743
744                         if (insn->dst_reg == BPF_REG_4)
745                                 /* mov dst_reg, r11 */
746                                 EMIT_mov(insn->dst_reg, AUX_REG);
747                         break;
748
749                 case BPF_ALU | BPF_END | BPF_FROM_BE:
750                         switch (imm32) {
751                         case 16:
752                                 /* Emit 'ror %ax, 8' to swap lower 2 bytes */
753                                 EMIT1(0x66);
754                                 if (is_ereg(dst_reg))
755                                         EMIT1(0x41);
756                                 EMIT3(0xC1, add_1reg(0xC8, dst_reg), 8);
757
758                                 /* Emit 'movzwl eax, ax' */
759                                 if (is_ereg(dst_reg))
760                                         EMIT3(0x45, 0x0F, 0xB7);
761                                 else
762                                         EMIT2(0x0F, 0xB7);
763                                 EMIT1(add_2reg(0xC0, dst_reg, dst_reg));
764                                 break;
765                         case 32:
766                                 /* Emit 'bswap eax' to swap lower 4 bytes */
767                                 if (is_ereg(dst_reg))
768                                         EMIT2(0x41, 0x0F);
769                                 else
770                                         EMIT1(0x0F);
771                                 EMIT1(add_1reg(0xC8, dst_reg));
772                                 break;
773                         case 64:
774                                 /* Emit 'bswap rax' to swap 8 bytes */
775                                 EMIT3(add_1mod(0x48, dst_reg), 0x0F,
776                                       add_1reg(0xC8, dst_reg));
777                                 break;
778                         }
779                         break;
780
781                 case BPF_ALU | BPF_END | BPF_FROM_LE:
782                         switch (imm32) {
783                         case 16:
784                                 /*
785                                  * Emit 'movzwl eax, ax' to zero extend 16-bit
786                                  * into 64 bit
787                                  */
788                                 if (is_ereg(dst_reg))
789                                         EMIT3(0x45, 0x0F, 0xB7);
790                                 else
791                                         EMIT2(0x0F, 0xB7);
792                                 EMIT1(add_2reg(0xC0, dst_reg, dst_reg));
793                                 break;
794                         case 32:
795                                 /* Emit 'mov eax, eax' to clear upper 32-bits */
796                                 if (is_ereg(dst_reg))
797                                         EMIT1(0x45);
798                                 EMIT2(0x89, add_2reg(0xC0, dst_reg, dst_reg));
799                                 break;
800                         case 64:
801                                 /* nop */
802                                 break;
803                         }
804                         break;
805
806                         /* ST: *(u8*)(dst_reg + off) = imm */
807                 case BPF_ST | BPF_MEM | BPF_B:
808                         if (is_ereg(dst_reg))
809                                 EMIT2(0x41, 0xC6);
810                         else
811                                 EMIT1(0xC6);
812                         goto st;
813                 case BPF_ST | BPF_MEM | BPF_H:
814                         if (is_ereg(dst_reg))
815                                 EMIT3(0x66, 0x41, 0xC7);
816                         else
817                                 EMIT2(0x66, 0xC7);
818                         goto st;
819                 case BPF_ST | BPF_MEM | BPF_W:
820                         if (is_ereg(dst_reg))
821                                 EMIT2(0x41, 0xC7);
822                         else
823                                 EMIT1(0xC7);
824                         goto st;
825                 case BPF_ST | BPF_MEM | BPF_DW:
826                         EMIT2(add_1mod(0x48, dst_reg), 0xC7);
827
828 st:                     if (is_imm8(insn->off))
829                                 EMIT2(add_1reg(0x40, dst_reg), insn->off);
830                         else
831                                 EMIT1_off32(add_1reg(0x80, dst_reg), insn->off);
832
833                         EMIT(imm32, bpf_size_to_x86_bytes(BPF_SIZE(insn->code)));
834                         break;
835
836                         /* STX: *(u8*)(dst_reg + off) = src_reg */
837                 case BPF_STX | BPF_MEM | BPF_B:
838                         /* Emit 'mov byte ptr [rax + off], al' */
839                         if (is_ereg(dst_reg) || is_ereg(src_reg) ||
840                             /* We have to add extra byte for x86 SIL, DIL regs */
841                             src_reg == BPF_REG_1 || src_reg == BPF_REG_2)
842                                 EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x88);
843                         else
844                                 EMIT1(0x88);
845                         goto stx;
846                 case BPF_STX | BPF_MEM | BPF_H:
847                         if (is_ereg(dst_reg) || is_ereg(src_reg))
848                                 EMIT3(0x66, add_2mod(0x40, dst_reg, src_reg), 0x89);
849                         else
850                                 EMIT2(0x66, 0x89);
851                         goto stx;
852                 case BPF_STX | BPF_MEM | BPF_W:
853                         if (is_ereg(dst_reg) || is_ereg(src_reg))
854                                 EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x89);
855                         else
856                                 EMIT1(0x89);
857                         goto stx;
858                 case BPF_STX | BPF_MEM | BPF_DW:
859                         EMIT2(add_2mod(0x48, dst_reg, src_reg), 0x89);
860 stx:                    if (is_imm8(insn->off))
861                                 EMIT2(add_2reg(0x40, dst_reg, src_reg), insn->off);
862                         else
863                                 EMIT1_off32(add_2reg(0x80, dst_reg, src_reg),
864                                             insn->off);
865                         break;
866
867                         /* LDX: dst_reg = *(u8*)(src_reg + off) */
868                 case BPF_LDX | BPF_MEM | BPF_B:
869                         /* Emit 'movzx rax, byte ptr [rax + off]' */
870                         EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB6);
871                         goto ldx;
872                 case BPF_LDX | BPF_MEM | BPF_H:
873                         /* Emit 'movzx rax, word ptr [rax + off]' */
874                         EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB7);
875                         goto ldx;
876                 case BPF_LDX | BPF_MEM | BPF_W:
877                         /* Emit 'mov eax, dword ptr [rax+0x14]' */
878                         if (is_ereg(dst_reg) || is_ereg(src_reg))
879                                 EMIT2(add_2mod(0x40, src_reg, dst_reg), 0x8B);
880                         else
881                                 EMIT1(0x8B);
882                         goto ldx;
883                 case BPF_LDX | BPF_MEM | BPF_DW:
884                         /* Emit 'mov rax, qword ptr [rax+0x14]' */
885                         EMIT2(add_2mod(0x48, src_reg, dst_reg), 0x8B);
886 ldx:                    /*
887                          * If insn->off == 0 we can save one extra byte, but
888                          * special case of x86 R13 which always needs an offset
889                          * is not worth the hassle
890                          */
891                         if (is_imm8(insn->off))
892                                 EMIT2(add_2reg(0x40, src_reg, dst_reg), insn->off);
893                         else
894                                 EMIT1_off32(add_2reg(0x80, src_reg, dst_reg),
895                                             insn->off);
896                         break;
897
898                         /* STX XADD: lock *(u32*)(dst_reg + off) += src_reg */
899                 case BPF_STX | BPF_XADD | BPF_W:
900                         /* Emit 'lock add dword ptr [rax + off], eax' */
901                         if (is_ereg(dst_reg) || is_ereg(src_reg))
902                                 EMIT3(0xF0, add_2mod(0x40, dst_reg, src_reg), 0x01);
903                         else
904                                 EMIT2(0xF0, 0x01);
905                         goto xadd;
906                 case BPF_STX | BPF_XADD | BPF_DW:
907                         EMIT3(0xF0, add_2mod(0x48, dst_reg, src_reg), 0x01);
908 xadd:                   if (is_imm8(insn->off))
909                                 EMIT2(add_2reg(0x40, dst_reg, src_reg), insn->off);
910                         else
911                                 EMIT1_off32(add_2reg(0x80, dst_reg, src_reg),
912                                             insn->off);
913                         break;
914
915                         /* call */
916                 case BPF_JMP | BPF_CALL:
917                         func = (u8 *) __bpf_call_base + imm32;
918                         jmp_offset = func - (image + addrs[i]);
919                         if (seen_ld_abs) {
920                                 reload_skb_data = bpf_helper_changes_pkt_data(func);
921                                 if (reload_skb_data) {
922                                         EMIT1(0x57); /* push %rdi */
923                                         jmp_offset += 22; /* pop, mov, sub, mov */
924                                 } else {
925                                         EMIT2(0x41, 0x52); /* push %r10 */
926                                         EMIT2(0x41, 0x51); /* push %r9 */
927                                         /*
928                                          * We need to adjust jmp offset, since
929                                          * pop %r9, pop %r10 take 4 bytes after call insn
930                                          */
931                                         jmp_offset += 4;
932                                 }
933                         }
934                         if (!imm32 || !is_simm32(jmp_offset)) {
935                                 pr_err("unsupported BPF func %d addr %p image %p\n",
936                                        imm32, func, image);
937                                 return -EINVAL;
938                         }
939                         EMIT1_off32(0xE8, jmp_offset);
940                         if (seen_ld_abs) {
941                                 if (reload_skb_data) {
942                                         EMIT1(0x5F); /* pop %rdi */
943                                         emit_load_skb_data_hlen(&prog);
944                                 } else {
945                                         EMIT2(0x41, 0x59); /* pop %r9 */
946                                         EMIT2(0x41, 0x5A); /* pop %r10 */
947                                 }
948                         }
949                         break;
950
951                 case BPF_JMP | BPF_TAIL_CALL:
952                         emit_bpf_tail_call(&prog);
953                         break;
954
955                         /* cond jump */
956                 case BPF_JMP | BPF_JEQ | BPF_X:
957                 case BPF_JMP | BPF_JNE | BPF_X:
958                 case BPF_JMP | BPF_JGT | BPF_X:
959                 case BPF_JMP | BPF_JLT | BPF_X:
960                 case BPF_JMP | BPF_JGE | BPF_X:
961                 case BPF_JMP | BPF_JLE | BPF_X:
962                 case BPF_JMP | BPF_JSGT | BPF_X:
963                 case BPF_JMP | BPF_JSLT | BPF_X:
964                 case BPF_JMP | BPF_JSGE | BPF_X:
965                 case BPF_JMP | BPF_JSLE | BPF_X:
966                         /* cmp dst_reg, src_reg */
967                         EMIT3(add_2mod(0x48, dst_reg, src_reg), 0x39,
968                               add_2reg(0xC0, dst_reg, src_reg));
969                         goto emit_cond_jmp;
970
971                 case BPF_JMP | BPF_JSET | BPF_X:
972                         /* test dst_reg, src_reg */
973                         EMIT3(add_2mod(0x48, dst_reg, src_reg), 0x85,
974                               add_2reg(0xC0, dst_reg, src_reg));
975                         goto emit_cond_jmp;
976
977                 case BPF_JMP | BPF_JSET | BPF_K:
978                         /* test dst_reg, imm32 */
979                         EMIT1(add_1mod(0x48, dst_reg));
980                         EMIT2_off32(0xF7, add_1reg(0xC0, dst_reg), imm32);
981                         goto emit_cond_jmp;
982
983                 case BPF_JMP | BPF_JEQ | BPF_K:
984                 case BPF_JMP | BPF_JNE | BPF_K:
985                 case BPF_JMP | BPF_JGT | BPF_K:
986                 case BPF_JMP | BPF_JLT | BPF_K:
987                 case BPF_JMP | BPF_JGE | BPF_K:
988                 case BPF_JMP | BPF_JLE | BPF_K:
989                 case BPF_JMP | BPF_JSGT | BPF_K:
990                 case BPF_JMP | BPF_JSLT | BPF_K:
991                 case BPF_JMP | BPF_JSGE | BPF_K:
992                 case BPF_JMP | BPF_JSLE | BPF_K:
993                         /* cmp dst_reg, imm8/32 */
994                         EMIT1(add_1mod(0x48, dst_reg));
995
996                         if (is_imm8(imm32))
997                                 EMIT3(0x83, add_1reg(0xF8, dst_reg), imm32);
998                         else
999                                 EMIT2_off32(0x81, add_1reg(0xF8, dst_reg), imm32);
1000
1001 emit_cond_jmp:          /* Convert BPF opcode to x86 */
1002                         switch (BPF_OP(insn->code)) {
1003                         case BPF_JEQ:
1004                                 jmp_cond = X86_JE;
1005                                 break;
1006                         case BPF_JSET:
1007                         case BPF_JNE:
1008                                 jmp_cond = X86_JNE;
1009                                 break;
1010                         case BPF_JGT:
1011                                 /* GT is unsigned '>', JA in x86 */
1012                                 jmp_cond = X86_JA;
1013                                 break;
1014                         case BPF_JLT:
1015                                 /* LT is unsigned '<', JB in x86 */
1016                                 jmp_cond = X86_JB;
1017                                 break;
1018                         case BPF_JGE:
1019                                 /* GE is unsigned '>=', JAE in x86 */
1020                                 jmp_cond = X86_JAE;
1021                                 break;
1022                         case BPF_JLE:
1023                                 /* LE is unsigned '<=', JBE in x86 */
1024                                 jmp_cond = X86_JBE;
1025                                 break;
1026                         case BPF_JSGT:
1027                                 /* Signed '>', GT in x86 */
1028                                 jmp_cond = X86_JG;
1029                                 break;
1030                         case BPF_JSLT:
1031                                 /* Signed '<', LT in x86 */
1032                                 jmp_cond = X86_JL;
1033                                 break;
1034                         case BPF_JSGE:
1035                                 /* Signed '>=', GE in x86 */
1036                                 jmp_cond = X86_JGE;
1037                                 break;
1038                         case BPF_JSLE:
1039                                 /* Signed '<=', LE in x86 */
1040                                 jmp_cond = X86_JLE;
1041                                 break;
1042                         default: /* to silence GCC warning */
1043                                 return -EFAULT;
1044                         }
1045                         jmp_offset = addrs[i + insn->off] - addrs[i];
1046                         if (is_imm8(jmp_offset)) {
1047                                 EMIT2(jmp_cond, jmp_offset);
1048                         } else if (is_simm32(jmp_offset)) {
1049                                 EMIT2_off32(0x0F, jmp_cond + 0x10, jmp_offset);
1050                         } else {
1051                                 pr_err("cond_jmp gen bug %llx\n", jmp_offset);
1052                                 return -EFAULT;
1053                         }
1054
1055                         break;
1056
1057                 case BPF_JMP | BPF_JA:
1058                         if (insn->off == -1)
1059                                 /* -1 jmp instructions will always jump
1060                                  * backwards two bytes. Explicitly handling
1061                                  * this case avoids wasting too many passes
1062                                  * when there are long sequences of replaced
1063                                  * dead code.
1064                                  */
1065                                 jmp_offset = -2;
1066                         else
1067                                 jmp_offset = addrs[i + insn->off] - addrs[i];
1068
1069                         if (!jmp_offset)
1070                                 /* Optimize out nop jumps */
1071                                 break;
1072 emit_jmp:
1073                         if (is_imm8(jmp_offset)) {
1074                                 EMIT2(0xEB, jmp_offset);
1075                         } else if (is_simm32(jmp_offset)) {
1076                                 EMIT1_off32(0xE9, jmp_offset);
1077                         } else {
1078                                 pr_err("jmp gen bug %llx\n", jmp_offset);
1079                                 return -EFAULT;
1080                         }
1081                         break;
1082
1083                 case BPF_LD | BPF_IND | BPF_W:
1084                         func = sk_load_word;
1085                         goto common_load;
1086                 case BPF_LD | BPF_ABS | BPF_W:
1087                         func = CHOOSE_LOAD_FUNC(imm32, sk_load_word);
1088 common_load:
1089                         ctx->seen_ld_abs = seen_ld_abs = true;
1090                         jmp_offset = func - (image + addrs[i]);
1091                         if (!func || !is_simm32(jmp_offset)) {
1092                                 pr_err("unsupported BPF func %d addr %p image %p\n",
1093                                        imm32, func, image);
1094                                 return -EINVAL;
1095                         }
1096                         if (BPF_MODE(insn->code) == BPF_ABS) {
1097                                 /* mov %esi, imm32 */
1098                                 EMIT1_off32(0xBE, imm32);
1099                         } else {
1100                                 /* mov %rsi, src_reg */
1101                                 EMIT_mov(BPF_REG_2, src_reg);
1102                                 if (imm32) {
1103                                         if (is_imm8(imm32))
1104                                                 /* add %esi, imm8 */
1105                                                 EMIT3(0x83, 0xC6, imm32);
1106                                         else
1107                                                 /* add %esi, imm32 */
1108                                                 EMIT2_off32(0x81, 0xC6, imm32);
1109                                 }
1110                         }
1111                         /*
1112                          * skb pointer is in R6 (%rbx), it will be copied into
1113                          * %rdi if skb_copy_bits() call is necessary.
1114                          * sk_load_* helpers also use %r10 and %r9d.
1115                          * See bpf_jit.S
1116                          */
1117                         if (seen_ax_reg)
1118                                 /* r10 = skb->data, mov %r10, off32(%rbx) */
1119                                 EMIT3_off32(0x4c, 0x8b, 0x93,
1120                                             offsetof(struct sk_buff, data));
1121                         EMIT1_off32(0xE8, jmp_offset); /* call */
1122                         break;
1123
1124                 case BPF_LD | BPF_IND | BPF_H:
1125                         func = sk_load_half;
1126                         goto common_load;
1127                 case BPF_LD | BPF_ABS | BPF_H:
1128                         func = CHOOSE_LOAD_FUNC(imm32, sk_load_half);
1129                         goto common_load;
1130                 case BPF_LD | BPF_IND | BPF_B:
1131                         func = sk_load_byte;
1132                         goto common_load;
1133                 case BPF_LD | BPF_ABS | BPF_B:
1134                         func = CHOOSE_LOAD_FUNC(imm32, sk_load_byte);
1135                         goto common_load;
1136
1137                 case BPF_JMP | BPF_EXIT:
1138                         if (seen_exit) {
1139                                 jmp_offset = ctx->cleanup_addr - addrs[i];
1140                                 goto emit_jmp;
1141                         }
1142                         seen_exit = true;
1143                         /* Update cleanup_addr */
1144                         ctx->cleanup_addr = proglen;
1145                         /* mov rbx, qword ptr [rbp+0] */
1146                         EMIT4(0x48, 0x8B, 0x5D, 0);
1147                         /* mov r13, qword ptr [rbp+8] */
1148                         EMIT4(0x4C, 0x8B, 0x6D, 8);
1149                         /* mov r14, qword ptr [rbp+16] */
1150                         EMIT4(0x4C, 0x8B, 0x75, 16);
1151                         /* mov r15, qword ptr [rbp+24] */
1152                         EMIT4(0x4C, 0x8B, 0x7D, 24);
1153
1154                         /* add rbp, AUX_STACK_SPACE */
1155                         EMIT4(0x48, 0x83, 0xC5, AUX_STACK_SPACE);
1156                         EMIT1(0xC9); /* leave */
1157                         EMIT1(0xC3); /* ret */
1158                         break;
1159
1160                 default:
1161                         /*
1162                          * By design x86-64 JIT should support all BPF instructions.
1163                          * This error will be seen if new instruction was added
1164                          * to the interpreter, but not to the JIT, or if there is
1165                          * junk in bpf_prog.
1166                          */
1167                         pr_err("bpf_jit: unknown opcode %02x\n", insn->code);
1168                         return -EINVAL;
1169                 }
1170
1171                 ilen = prog - temp;
1172                 if (ilen > BPF_MAX_INSN_SIZE) {
1173                         pr_err("bpf_jit: fatal insn size error\n");
1174                         return -EFAULT;
1175                 }
1176
1177                 if (image) {
1178                         if (unlikely(proglen + ilen > oldproglen)) {
1179                                 pr_err("bpf_jit: fatal error\n");
1180                                 return -EFAULT;
1181                         }
1182                         memcpy(image + proglen, temp, ilen);
1183                 }
1184                 proglen += ilen;
1185                 addrs[i] = proglen;
1186                 prog = temp;
1187         }
1188         return proglen;
1189 }
1190
1191 struct x64_jit_data {
1192         struct bpf_binary_header *header;
1193         int *addrs;
1194         u8 *image;
1195         int proglen;
1196         struct jit_context ctx;
1197 };
1198
1199 struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
1200 {
1201         struct bpf_binary_header *header = NULL;
1202         struct bpf_prog *tmp, *orig_prog = prog;
1203         struct x64_jit_data *jit_data;
1204         int proglen, oldproglen = 0;
1205         struct jit_context ctx = {};
1206         bool tmp_blinded = false;
1207         bool extra_pass = false;
1208         u8 *image = NULL;
1209         int *addrs;
1210         int pass;
1211         int i;
1212
1213         if (!prog->jit_requested)
1214                 return orig_prog;
1215
1216         tmp = bpf_jit_blind_constants(prog);
1217         /*
1218          * If blinding was requested and we failed during blinding,
1219          * we must fall back to the interpreter.
1220          */
1221         if (IS_ERR(tmp))
1222                 return orig_prog;
1223         if (tmp != prog) {
1224                 tmp_blinded = true;
1225                 prog = tmp;
1226         }
1227
1228         jit_data = prog->aux->jit_data;
1229         if (!jit_data) {
1230                 jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL);
1231                 if (!jit_data) {
1232                         prog = orig_prog;
1233                         goto out;
1234                 }
1235                 prog->aux->jit_data = jit_data;
1236         }
1237         addrs = jit_data->addrs;
1238         if (addrs) {
1239                 ctx = jit_data->ctx;
1240                 oldproglen = jit_data->proglen;
1241                 image = jit_data->image;
1242                 header = jit_data->header;
1243                 extra_pass = true;
1244                 goto skip_init_addrs;
1245         }
1246         addrs = kmalloc(prog->len * sizeof(*addrs), GFP_KERNEL);
1247         if (!addrs) {
1248                 prog = orig_prog;
1249                 goto out_addrs;
1250         }
1251
1252         /*
1253          * Before first pass, make a rough estimation of addrs[]
1254          * each BPF instruction is translated to less than 64 bytes
1255          */
1256         for (proglen = 0, i = 0; i < prog->len; i++) {
1257                 proglen += 64;
1258                 addrs[i] = proglen;
1259         }
1260         ctx.cleanup_addr = proglen;
1261 skip_init_addrs:
1262
1263         /*
1264          * JITed image shrinks with every pass and the loop iterates
1265          * until the image stops shrinking. Very large BPF programs
1266          * may converge on the last pass. In such case do one more
1267          * pass to emit the final image.
1268          */
1269         for (pass = 0; pass < 20 || image; pass++) {
1270                 proglen = do_jit(prog, addrs, image, oldproglen, &ctx);
1271                 if (proglen <= 0) {
1272                         image = NULL;
1273                         if (header)
1274                                 bpf_jit_binary_free(header);
1275                         prog = orig_prog;
1276                         goto out_addrs;
1277                 }
1278                 if (image) {
1279                         if (proglen != oldproglen) {
1280                                 pr_err("bpf_jit: proglen=%d != oldproglen=%d\n",
1281                                        proglen, oldproglen);
1282                                 prog = orig_prog;
1283                                 goto out_addrs;
1284                         }
1285                         break;
1286                 }
1287                 if (proglen == oldproglen) {
1288                         header = bpf_jit_binary_alloc(proglen, &image,
1289                                                       1, jit_fill_hole);
1290                         if (!header) {
1291                                 prog = orig_prog;
1292                                 goto out_addrs;
1293                         }
1294                 }
1295                 oldproglen = proglen;
1296                 cond_resched();
1297         }
1298
1299         if (bpf_jit_enable > 1)
1300                 bpf_jit_dump(prog->len, proglen, pass + 1, image);
1301
1302         if (image) {
1303                 if (!prog->is_func || extra_pass) {
1304                         bpf_jit_binary_lock_ro(header);
1305                 } else {
1306                         jit_data->addrs = addrs;
1307                         jit_data->ctx = ctx;
1308                         jit_data->proglen = proglen;
1309                         jit_data->image = image;
1310                         jit_data->header = header;
1311                 }
1312                 prog->bpf_func = (void *)image;
1313                 prog->jited = 1;
1314                 prog->jited_len = proglen;
1315         } else {
1316                 prog = orig_prog;
1317         }
1318
1319         if (!prog->is_func || extra_pass) {
1320 out_addrs:
1321                 kfree(addrs);
1322                 kfree(jit_data);
1323                 prog->aux->jit_data = NULL;
1324         }
1325 out:
1326         if (tmp_blinded)
1327                 bpf_jit_prog_release_other(prog, prog == orig_prog ?
1328                                            tmp : orig_prog);
1329         return prog;
1330 }