Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm...

author Linus Torvalds <torvalds@linux-foundation.org>

Tue, 12 Sep 2017 01:34:47 +0000 (18:34 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Tue, 12 Sep 2017 01:34:47 +0000 (18:34 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Tue, 12 Sep 2017 01:34:47 +0000 (18:34 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Tue, 12 Sep 2017 01:34:47 +0000 (18:34 -0700)
diff --combined arch/alpha/kernel/traps.c

index ddb89a18cf26f76cafc174172adc068bbe9e919e,e94f4b73ac044881ef2aa001fcecefdb41a63d79..49d3b1e63ce5e2cd0423dff25728ed54c2592de1
--- 1/arch/alpha/kernel/traps.c
--- 2/arch/alpha/kernel/traps.c
+++ b/arch/alpha/kernel/traps.c
@@@ -193,10 -193,8 +193,10 @@@ die_if_kernel(char * str, struct pt_reg
   static long dummy_emul(void) { return 0; }
   long (*alpha_fp_emul_imprecise)(struct pt_regs *regs, unsigned long writemask)
     = (void *)dummy_emul;
+ +EXPORT_SYMBOL_GPL(alpha_fp_emul_imprecise);
   long (*alpha_fp_emul) (unsigned long pc)
     = (void *)dummy_emul;
+ +EXPORT_SYMBOL_GPL(alpha_fp_emul);
   #else
   long alpha_fp_emul_imprecise(struct pt_regs *regs, unsigned long writemask);
   long alpha_fp_emul (unsigned long pc);
@@@ -280,7 -278,7 +280,7 @@@ do_entIF(unsigned long type, struct pt_
               case 1: /* bugcheck */
                 info.si_signo = SIGTRAP;
                 info.si_errno = 0;
-               info.si_code = __SI_FAULT;
+               info.si_code = TRAP_FIXME;
                 info.si_addr = (void __user *) regs->pc;
                 info.si_trapno = 0;
                 send_sig_info(SIGTRAP, &info, current);
@@@ -320,7 -318,7 +320,7 @@@
                         break;
                 case GEN_ROPRAND:
                         signo = SIGFPE;
-                       code = __SI_FAULT;
+                       code = FPE_FIXME;
                         break;
   
                 case GEN_DECOVF:
@@@ -342,7 -340,7 +342,7 @@@
                 case GEN_SUBRNG7:
                 default:
                         signo = SIGTRAP;
-                       code = __SI_FAULT;
+                       code = TRAP_FIXME;
                         break;
                 }
   
diff --combined arch/arm64/kernel/signal32.c

index 4e5a664be04b20bdaeb2e638ed3eb85b8ca48732,9b95a935c21dd0ea0d283f141be7dbffb6756716..e09bf5d1560606405e25175d69d8b67929f02657
--- 1/arch/arm64/kernel/signal32.c
--- 2/arch/arm64/kernel/signal32.c
+++ b/arch/arm64/kernel/signal32.c
@@@ -142,25 -142,25 +142,25 @@@ int copy_siginfo_to_user32(compat_sigin
          */
         err = __put_user(from->si_signo, &to->si_signo);
         err |= __put_user(from->si_errno, &to->si_errno);
-       err |= __put_user((short)from->si_code, &to->si_code);
+       err |= __put_user(from->si_code, &to->si_code);
         if (from->si_code < 0)
                 err |= __copy_to_user(&to->_sifields._pad, &from->_sifields._pad,
                                       SI_PAD_SIZE);
-       else switch (from->si_code & __SI_MASK) {
-       case __SI_KILL:
+       else switch (siginfo_layout(from->si_signo, from->si_code)) {
+       case SIL_KILL:
                 err |= __put_user(from->si_pid, &to->si_pid);
                 err |= __put_user(from->si_uid, &to->si_uid);
                 break;
-       case __SI_TIMER:
+       case SIL_TIMER:
                  err |= __put_user(from->si_tid, &to->si_tid);
                  err |= __put_user(from->si_overrun, &to->si_overrun);
                  err |= __put_user(from->si_int, &to->si_int);
                 break;
-       case __SI_POLL:
+       case SIL_POLL:
                 err |= __put_user(from->si_band, &to->si_band);
                 err |= __put_user(from->si_fd, &to->si_fd);
                 break;
-       case __SI_FAULT:
+       case SIL_FAULT:
                 err |= __put_user((compat_uptr_t)(unsigned long)from->si_addr,
                                   &to->si_addr);
   #ifdef BUS_MCEERR_AO
@@@ -173,29 -173,24 +173,24 @@@
                         err |= __put_user(from->si_addr_lsb, &to->si_addr_lsb);
   #endif
                 break;
-       case __SI_CHLD:
+       case SIL_CHLD:
                 err |= __put_user(from->si_pid, &to->si_pid);
                 err |= __put_user(from->si_uid, &to->si_uid);
                 err |= __put_user(from->si_status, &to->si_status);
                 err |= __put_user(from->si_utime, &to->si_utime);
                 err |= __put_user(from->si_stime, &to->si_stime);
                 break;
-       case __SI_RT: /* This is not generated by the kernel as of now. */
-       case __SI_MESGQ: /* But this is */
+       case SIL_RT:
                 err |= __put_user(from->si_pid, &to->si_pid);
                 err |= __put_user(from->si_uid, &to->si_uid);
                 err |= __put_user(from->si_int, &to->si_int);
                 break;
-       case __SI_SYS:
+       case SIL_SYS:
                 err |= __put_user((compat_uptr_t)(unsigned long)
                                 from->si_call_addr, &to->si_call_addr);
                 err |= __put_user(from->si_syscall, &to->si_syscall);
                 err |= __put_user(from->si_arch, &to->si_arch);
                 break;
-       default: /* this is just in case for now ... */
-               err |= __put_user(from->si_pid, &to->si_pid);
-               err |= __put_user(from->si_uid, &to->si_uid);
-               break;
         }
         return err;
   }
@@@ -354,7 -349,7 +349,7 @@@ static int compat_restore_sigframe(stru
         /*
          * Avoid compat_sys_sigreturn() restarting.
          */
- -      regs->syscallno = ~0UL;
+ +      forget_syscall(regs);
   
         err |= !valid_user_regs(&regs->user_regs, current);
   
diff --combined arch/sparc/kernel/traps_64.c

index c74f2dffcc13f5cc77109c6476807fb332a871c6,e882e128faa3a3bf3a501d0344d146b1e311a017..0a56dc257cb9a687292817962f9a94eaebd4bd3c
--- 1/arch/sparc/kernel/traps_64.c
--- 2/arch/sparc/kernel/traps_64.c
+++ b/arch/sparc/kernel/traps_64.c
@@@ -265,45 -265,6 +265,45 @@@ void sun4v_insn_access_exception_tl1(st
         sun4v_insn_access_exception(regs, addr, type_ctx);
   }
   
+ +bool is_no_fault_exception(struct pt_regs *regs)
+ +{
+ +      unsigned char asi;
+ +      u32 insn;
+ +
+ +      if (get_user(insn, (u32 __user *)regs->tpc) == -EFAULT)
+ +              return false;
+ +
+ +      /*
+ +       * Must do a little instruction decoding here in order to
+ +       * decide on a course of action. The bits of interest are:
+ +       *  insn[31:30] = op, where 3 indicates the load/store group
+ +       *  insn[24:19] = op3, which identifies individual opcodes
+ +       *  insn[13] indicates an immediate offset
+ +       *  op3[4]=1 identifies alternate space instructions
+ +       *  op3[5:4]=3 identifies floating point instructions
+ +       *  op3[2]=1 identifies stores
+ +       * See "Opcode Maps" in the appendix of any Sparc V9
+ +       * architecture spec for full details.
+ +       */
+ +      if ((insn & 0xc0800000) == 0xc0800000) {    /* op=3, op3[4]=1   */
+ +              if (insn & 0x2000)                  /* immediate offset */
+ +                      asi = (regs->tstate >> 24); /* saved %asi       */
+ +              else
+ +                      asi = (insn >> 5);          /* immediate asi    */
+ +              if ((asi & 0xf2) == ASI_PNF) {
+ +                      if (insn & 0x1000000) {     /* op3[5:4]=3       */
+ +                              handle_ldf_stq(insn, regs);
+ +                              return true;
+ +                      } else if (insn & 0x200000) { /* op3[2], stores */
+ +                              return false;
+ +                      }
+ +                      handle_ld_nf(insn, regs);
+ +                      return true;
+ +              }
+ +      }
+ +      return false;
+ +}
+ +
   void spitfire_data_access_exception(struct pt_regs *regs, unsigned long sfsr, unsigned long sfar)
   {
         enum ctx_state prev_state = exception_enter();
@@@ -335,9 -296,6 +335,9 @@@
                 die_if_kernel("Dax", regs);
         }
   
+ +      if (is_no_fault_exception(regs))
+ +              return;
+ +
         info.si_signo = SIGSEGV;
         info.si_errno = 0;
         info.si_code = SEGV_MAPERR;
@@@ -394,9 -352,6 +394,9 @@@ void sun4v_data_access_exception(struc
                 regs->tpc &= 0xffffffff;
                 regs->tnpc &= 0xffffffff;
         }
+ +      if (is_no_fault_exception(regs))
+ +              return;
+ +
         info.si_signo = SIGSEGV;
         info.si_errno = 0;
         info.si_code = SEGV_MAPERR;
@@@ -2303,7 -2258,7 +2303,7 @@@ static void do_fpe_common(struct pt_reg
                 info.si_errno = 0;
                 info.si_addr = (void __user *)regs->tpc;
                 info.si_trapno = 0;
-               info.si_code = __SI_FAULT;
+               info.si_code = FPE_FIXME;
                 if ((fsr & 0x1c000) == (1 << 14)) {
                         if (fsr & 0x10)
                                 info.si_code = FPE_FLTINV;
@@@ -2620,9 -2575,6 +2620,9 @@@ void mem_address_unaligned(struct pt_re
                 kernel_unaligned_trap(regs, *((unsigned int *)regs->tpc));
                 goto out;
         }
+ +      if (is_no_fault_exception(regs))
+ +              return;
+ +
         info.si_signo = SIGBUS;
         info.si_errno = 0;
         info.si_code = BUS_ADRALN;
@@@ -2645,9 -2597,6 +2645,9 @@@ void sun4v_do_mna(struct pt_regs *regs
                 kernel_unaligned_trap(regs, *((unsigned int *)regs->tpc));
                 return;
         }
+ +      if (is_no_fault_exception(regs))
+ +              return;
+ +
         info.si_signo = SIGBUS;
         info.si_errno = 0;
         info.si_code = BUS_ADRALN;
@@@ -2784,7 -2733,6 +2784,7 @@@ void do_getpsr(struct pt_regs *regs
         }
   }
   
+ +u64 cpu_mondo_counter[NR_CPUS] = {0};
   struct trap_per_cpu trap_block[NR_CPUS];
   EXPORT_SYMBOL(trap_block);
   
diff --combined include/linux/security.h

index 974bb9b0996c521c86d1ff9635fe0937ec9cbec9,6fff8c924718f1f9de32f7cd88c93124161438ba..707b524874d89b4a034b77f83ed82019390ef7cd
--- 1/include/linux/security.h
--- 2/include/linux/security.h
+++ b/include/linux/security.h
@@@ -85,11 -85,14 +85,13 @@@ extern int cap_capset(struct cred *new
                       const kernel_cap_t *inheritable,
                       const kernel_cap_t *permitted);
   extern int cap_bprm_set_creds(struct linux_binprm *bprm);
- -extern int cap_bprm_secureexec(struct linux_binprm *bprm);
   extern int cap_inode_setxattr(struct dentry *dentry, const char *name,
                               const void *value, size_t size, int flags);
   extern int cap_inode_removexattr(struct dentry *dentry, const char *name);
   extern int cap_inode_need_killpriv(struct dentry *dentry);
   extern int cap_inode_killpriv(struct dentry *dentry);
+ extern int cap_inode_getsecurity(struct inode *inode, const char *name,
+                                void **buffer, bool alloc);
   extern int cap_mmap_addr(unsigned long addr);
   extern int cap_mmap_file(struct file *file, unsigned long reqprot,
                          unsigned long prot, unsigned long flags);
@@@ -231,6 -234,7 +233,6 @@@ int security_bprm_set_creds(struct linu
   int security_bprm_check(struct linux_binprm *bprm);
   void security_bprm_committing_creds(struct linux_binprm *bprm);
   void security_bprm_committed_creds(struct linux_binprm *bprm);
- -int security_bprm_secureexec(struct linux_binprm *bprm);
   int security_sb_alloc(struct super_block *sb);
   void security_sb_free(struct super_block *sb);
   int security_sb_copy_data(char *orig, char *copy);
@@@ -539,6 -543,11 +541,6 @@@ static inline void security_bprm_commit
   {
   }
   
- -static inline int security_bprm_secureexec(struct linux_binprm *bprm)
- -{
- -      return cap_bprm_secureexec(bprm);
- -}
- -
   static inline int security_sb_alloc(struct super_block *sb)
   {
         return 0;
diff --combined include/linux/user_namespace.h

index b3575ce291483284847ccc7eef5fea679fade38b,4005877bb8b6d5ed633eae03abd60090e5a11d96..c18e0125234628f4ad6810eb8f6a19cf356a7f46
--- 1/include/linux/user_namespace.h
--- 2/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@@ -66,7 -66,7 +66,7 @@@ struct user_namespace 
   #endif
         struct ucounts          *ucounts;
         int ucount_max[UCOUNT_COUNTS];
- -};
+ +} __randomize_layout;
   
   struct ucounts {
         struct hlist_node node;
@@@ -112,8 -112,9 +112,9 @@@ extern ssize_t proc_projid_map_write(st
   extern ssize_t proc_setgroups_write(struct file *, const char __user *, size_t, loff_t *);
   extern int proc_setgroups_show(struct seq_file *m, void *v);
   extern bool userns_may_setgroups(const struct user_namespace *ns);
+ extern bool in_userns(const struct user_namespace *ancestor,
+                      const struct user_namespace *child);
   extern bool current_in_userns(const struct user_namespace *target_ns);
- 
   struct ns_common *ns_get_owner(struct ns_common *ns);
   #else
   
@@@ -144,6 -145,12 +145,12 @@@ static inline bool userns_may_setgroups
         return true;
   }
   
+ static inline bool in_userns(const struct user_namespace *ancestor,
+                            const struct user_namespace *child)
+ {
+       return true;
+ }
+ 
   static inline bool current_in_userns(const struct user_namespace *target_ns)
   {
         return true;
diff --combined kernel/exit.c

index a35d8a17e01ff39a5cc65459b9d8a12a19e3c523,c8f23613df5bbe591d8f4192d85dbd29e628c816..3481ababd06aa6cdd2aedd1fee0e56026acddc3b
--- 1/kernel/exit.c
--- 2/kernel/exit.c
+++ b/kernel/exit.c
@@@ -764,6 -764,7 +764,6 @@@ void __noreturn do_exit(long code
   {
         struct task_struct *tsk = current;
         int group_dead;
- -      TASKS_RCU(int tasks_rcu_i);
   
         profile_task_exit(tsk);
         kcov_task_exit(tsk);
@@@ -818,8 -819,7 +818,8 @@@
          * Ensure that we must observe the pi_state in exit_mm() ->
          * mm_release() -> exit_pi_state_list().
          */
- -      raw_spin_unlock_wait(&tsk->pi_lock);
+ +      raw_spin_lock_irq(&tsk->pi_lock);
+ +      raw_spin_unlock_irq(&tsk->pi_lock);
   
         if (unlikely(in_atomic())) {
                 pr_info("note: %s[%d] exited with preempt_count %d\n",
@@@ -881,7 -881,9 +881,7 @@@
          */
         flush_ptrace_hw_breakpoint(tsk);
   
- -      TASKS_RCU(preempt_disable());
- -      TASKS_RCU(tasks_rcu_i = __srcu_read_lock(&tasks_rcu_exit_srcu));
- -      TASKS_RCU(preempt_enable());
+ +      exit_tasks_rcu_start();
         exit_notify(tsk, group_dead);
         proc_exit_connector(tsk);
         mpol_put_task_policy(tsk);
@@@ -916,9 -918,8 +916,9 @@@
         if (tsk->nr_dirtied)
                 __this_cpu_add(dirty_throttle_leaks, tsk->nr_dirtied);
         exit_rcu();
- -      TASKS_RCU(__srcu_read_unlock(&tasks_rcu_exit_srcu, tasks_rcu_i));
+ +      exit_tasks_rcu_finish();
   
+ +      lockdep_free_task(tsk);
         do_task_dead();
   }
   EXPORT_SYMBOL_GPL(do_exit);
@@@ -1615,7 -1616,7 +1615,7 @@@ SYSCALL_DEFINE5(waitid, int, which, pid
         user_access_begin();
         unsafe_put_user(signo, &infop->si_signo, Efault);
         unsafe_put_user(0, &infop->si_errno, Efault);
-       unsafe_put_user((short)info.cause, &infop->si_code, Efault);
+       unsafe_put_user(info.cause, &infop->si_code, Efault);
         unsafe_put_user(info.pid, &infop->si_pid, Efault);
         unsafe_put_user(info.uid, &infop->si_uid, Efault);
         unsafe_put_user(info.status, &infop->si_status, Efault);
@@@ -1741,7 -1742,7 +1741,7 @@@ COMPAT_SYSCALL_DEFINE5(waitid
         user_access_begin();
         unsafe_put_user(signo, &infop->si_signo, Efault);
         unsafe_put_user(0, &infop->si_errno, Efault);
-       unsafe_put_user((short)info.cause, &infop->si_code, Efault);
+       unsafe_put_user(info.cause, &infop->si_code, Efault);
         unsafe_put_user(info.pid, &infop->si_pid, Efault);
         unsafe_put_user(info.uid, &infop->si_uid, Efault);
         unsafe_put_user(info.status, &infop->si_status, Efault);
diff --combined kernel/signal.c

index ed804a470dcd151c18915f956c2f325b6d22bb0f,6bd53c8189f0035fdff515a175c38556877bdb46..800a18f77732c14cf49d81bc615b01cd56d11933
--- 1/kernel/signal.c
--- 2/kernel/signal.c
+++ b/kernel/signal.c
@@@ -1194,11 -1194,7 +1194,11 @@@ force_sig_info(int sig, struct siginfo 
                         recalc_sigpending_and_wake(t);
                 }
         }
- -      if (action->sa.sa_handler == SIG_DFL)
+ +      /*
+ +       * Don't clear SIGNAL_UNKILLABLE for traced tasks, users won't expect
+ +       * debugging to leave init killable.
+ +       */
+ +      if (action->sa.sa_handler == SIG_DFL && !t->ptrace)
                 t->signal->flags &= ~SIGNAL_UNKILLABLE;
         ret = specific_send_sig_info(sig, info, t);
         spin_unlock_irqrestore(&t->sighand->siglock, flags);
@@@ -2686,6 -2682,51 +2686,51 @@@ COMPAT_SYSCALL_DEFINE2(rt_sigpending, c
   }
   #endif
   
+ enum siginfo_layout siginfo_layout(int sig, int si_code)
+ {
+       enum siginfo_layout layout = SIL_KILL;
+       if ((si_code > SI_USER) && (si_code < SI_KERNEL)) {
+               static const struct {
+                       unsigned char limit, layout;
+               } filter[] = {
+                       [SIGILL]  = { NSIGILL,  SIL_FAULT },
+                       [SIGFPE]  = { NSIGFPE,  SIL_FAULT },
+                       [SIGSEGV] = { NSIGSEGV, SIL_FAULT },
+                       [SIGBUS]  = { NSIGBUS,  SIL_FAULT },
+                       [SIGTRAP] = { NSIGTRAP, SIL_FAULT },
+ #if defined(SIGMET) && defined(NSIGEMT)
+                       [SIGEMT]  = { NSIGEMT,  SIL_FAULT },
+ #endif
+                       [SIGCHLD] = { NSIGCHLD, SIL_CHLD },
+                       [SIGPOLL] = { NSIGPOLL, SIL_POLL },
+ #ifdef __ARCH_SIGSYS
+                       [SIGSYS]  = { NSIGSYS,  SIL_SYS },
+ #endif
+               };
+               if ((sig < ARRAY_SIZE(filter)) && (si_code <= filter[sig].limit))
+                       layout = filter[sig].layout;
+               else if (si_code <= NSIGPOLL)
+                       layout = SIL_POLL;
+       } else {
+               if (si_code == SI_TIMER)
+                       layout = SIL_TIMER;
+               else if (si_code == SI_SIGIO)
+                       layout = SIL_POLL;
+               else if (si_code < 0)
+                       layout = SIL_RT;
+               /* Tests to support buggy kernel ABIs */
+ #ifdef TRAP_FIXME
+               if ((sig == SIGTRAP) && (si_code == TRAP_FIXME))
+                       layout = SIL_FAULT;
+ #endif
+ #ifdef FPE_FIXME
+               if ((sig == SIGFPE) && (si_code == FPE_FIXME))
+                       layout = SIL_FAULT;
+ #endif
+       }
+       return layout;
+ }
+ 
   #ifndef HAVE_ARCH_COPY_SIGINFO_TO_USER
   
   int copy_siginfo_to_user(siginfo_t __user *to, const siginfo_t *from)
@@@ -2708,22 -2749,20 +2753,20 @@@
          */
         err = __put_user(from->si_signo, &to->si_signo);
         err |= __put_user(from->si_errno, &to->si_errno);
-       err |= __put_user((short)from->si_code, &to->si_code);
-       switch (from->si_code & __SI_MASK) {
-       case __SI_KILL:
+       err |= __put_user(from->si_code, &to->si_code);
+       switch (siginfo_layout(from->si_signo, from->si_code)) {
+       case SIL_KILL:
                 err |= __put_user(from->si_pid, &to->si_pid);
                 err |= __put_user(from->si_uid, &to->si_uid);
                 break;
-       case __SI_TIMER:
-                err |= __put_user(from->si_tid, &to->si_tid);
-                err |= __put_user(from->si_overrun, &to->si_overrun);
-                err |= __put_user(from->si_ptr, &to->si_ptr);
+       case SIL_TIMER:
+               /* Unreached SI_TIMER is negative */
                 break;
-       case __SI_POLL:
+       case SIL_POLL:
                 err |= __put_user(from->si_band, &to->si_band);
                 err |= __put_user(from->si_fd, &to->si_fd);
                 break;
-       case __SI_FAULT:
+       case SIL_FAULT:
                 err |= __put_user(from->si_addr, &to->si_addr);
   #ifdef __ARCH_SI_TRAPNO
                 err |= __put_user(from->si_trapno, &to->si_trapno);
@@@ -2748,30 -2787,25 +2791,25 @@@
                         err |= __put_user(from->si_pkey, &to->si_pkey);
   #endif
                 break;
-       case __SI_CHLD:
+       case SIL_CHLD:
                 err |= __put_user(from->si_pid, &to->si_pid);
                 err |= __put_user(from->si_uid, &to->si_uid);
                 err |= __put_user(from->si_status, &to->si_status);
                 err |= __put_user(from->si_utime, &to->si_utime);
                 err |= __put_user(from->si_stime, &to->si_stime);
                 break;
-       case __SI_RT: /* This is not generated by the kernel as of now. */
-       case __SI_MESGQ: /* But this is */
+       case SIL_RT:
                 err |= __put_user(from->si_pid, &to->si_pid);
                 err |= __put_user(from->si_uid, &to->si_uid);
                 err |= __put_user(from->si_ptr, &to->si_ptr);
                 break;
   #ifdef __ARCH_SIGSYS
-       case __SI_SYS:
+       case SIL_SYS:
                 err |= __put_user(from->si_call_addr, &to->si_call_addr);
                 err |= __put_user(from->si_syscall, &to->si_syscall);
                 err |= __put_user(from->si_arch, &to->si_arch);
                 break;
   #endif
-       default: /* this is just in case for now ... */
-               err |= __put_user(from->si_pid, &to->si_pid);
-               err |= __put_user(from->si_uid, &to->si_uid);
-               break;
         }
         return err;
   }
@@@ -3307,15 -3341,12 +3345,15 @@@ SYSCALL_DEFINE1(sigpending, old_sigset_
   #ifdef CONFIG_COMPAT
   COMPAT_SYSCALL_DEFINE1(sigpending, compat_old_sigset_t __user *, set32)
   {
+ +#ifdef __BIG_ENDIAN
         sigset_t set;
- -      int err = do_sigpending(&set, sizeof(old_sigset_t)); 
- -      if (err == 0)
- -              if (copy_to_user(set32, &set, sizeof(old_sigset_t)))
- -                      err = -EFAULT;
+ +      int err = do_sigpending(&set, sizeof(set.sig[0]));
+ +      if (!err)
+ +              err = put_user(set.sig[0], set32);
         return err;
+ +#else
+ +      return sys_rt_sigpending((sigset_t __user *)set32, sizeof(*set32));
+ +#endif
   }
   #endif
   
diff --combined security/commoncap.c

index d8e26fb9781d2bc7ae945602cb88a84ff786710d,c37d27dd1e2c1edccdebab4ee2dcb0bf5541bdb1..6bf72b175b49caf336e7ef8f3d83cf256c955120
--- 1/security/commoncap.c
--- 2/security/commoncap.c
+++ b/security/commoncap.c
@@@ -82,8 -82,11 +82,11 @@@ int cap_capable(const struct cred *cred
                 if (ns == cred->user_ns)
                         return cap_raised(cred->cap_effective, cap) ? 0 : -EPERM;
   
-               /* Have we tried all of the parent namespaces? */
-               if (ns == &init_user_ns)
+               /*
+                * If we're already at a lower level than we're looking for,
+                * we're done searching.
+                */
+               if (ns->level <= cred->user_ns->level)
                         return -EPERM;
   
                 /* 
@@@ -285,6 -288,15 +288,6 @@@ int cap_capset(struct cred *new
         return 0;
   }
   
- -/*
- - * Clear proposed capability sets for execve().
- - */
- -static inline void bprm_clear_caps(struct linux_binprm *bprm)
- -{
- -      cap_clear(bprm->cred->cap_permitted);
- -      bprm->cap_effective = false;
- -}
- -
   /**
    * cap_inode_need_killpriv - Determine if inode change affects privileges
    * @dentry: The inode/dentry in being changed with change marked ATTR_KILL_PRIV
@@@ -323,6 -335,209 +326,209 @@@ int cap_inode_killpriv(struct dentry *d
         return error;
   }
   
+ static bool rootid_owns_currentns(kuid_t kroot)
+ {
+       struct user_namespace *ns;
+ 
+       if (!uid_valid(kroot))
+               return false;
+ 
+       for (ns = current_user_ns(); ; ns = ns->parent) {
+               if (from_kuid(ns, kroot) == 0)
+                       return true;
+               if (ns == &init_user_ns)
+                       break;
+       }
+ 
+       return false;
+ }
+ 
+ static __u32 sansflags(__u32 m)
+ {
+       return m & ~VFS_CAP_FLAGS_EFFECTIVE;
+ }
+ 
+ static bool is_v2header(size_t size, __le32 magic)
+ {
+       __u32 m = le32_to_cpu(magic);
+       if (size != XATTR_CAPS_SZ_2)
+               return false;
+       return sansflags(m) == VFS_CAP_REVISION_2;
+ }
+ 
+ static bool is_v3header(size_t size, __le32 magic)
+ {
+       __u32 m = le32_to_cpu(magic);
+ 
+       if (size != XATTR_CAPS_SZ_3)
+               return false;
+       return sansflags(m) == VFS_CAP_REVISION_3;
+ }
+ 
+ /*
+  * getsecurity: We are called for security.* before any attempt to read the
+  * xattr from the inode itself.
+  *
+  * This gives us a chance to read the on-disk value and convert it.  If we
+  * return -EOPNOTSUPP, then vfs_getxattr() will call the i_op handler.
+  *
+  * Note we are not called by vfs_getxattr_alloc(), but that is only called
+  * by the integrity subsystem, which really wants the unconverted values -
+  * so that's good.
+  */
+ int cap_inode_getsecurity(struct inode *inode, const char *name, void **buffer,
+                         bool alloc)
+ {
+       int size, ret;
+       kuid_t kroot;
+       uid_t root, mappedroot;
+       char *tmpbuf = NULL;
+       struct vfs_cap_data *cap;
+       struct vfs_ns_cap_data *nscap;
+       struct dentry *dentry;
+       struct user_namespace *fs_ns;
+ 
+       if (strcmp(name, "capability") != 0)
+               return -EOPNOTSUPP;
+ 
+       dentry = d_find_alias(inode);
+       if (!dentry)
+               return -EINVAL;
+ 
+       size = sizeof(struct vfs_ns_cap_data);
+       ret = (int) vfs_getxattr_alloc(dentry, XATTR_NAME_CAPS,
+                                &tmpbuf, size, GFP_NOFS);
+       dput(dentry);
+ 
+       if (ret < 0)
+               return ret;
+ 
+       fs_ns = inode->i_sb->s_user_ns;
+       cap = (struct vfs_cap_data *) tmpbuf;
+       if (is_v2header((size_t) ret, cap->magic_etc)) {
+               /* If this is sizeof(vfs_cap_data) then we're ok with the
+                * on-disk value, so return that.  */
+               if (alloc)
+                       *buffer = tmpbuf;
+               else
+                       kfree(tmpbuf);
+               return ret;
+       } else if (!is_v3header((size_t) ret, cap->magic_etc)) {
+               kfree(tmpbuf);
+               return -EINVAL;
+       }
+ 
+       nscap = (struct vfs_ns_cap_data *) tmpbuf;
+       root = le32_to_cpu(nscap->rootid);
+       kroot = make_kuid(fs_ns, root);
+ 
+       /* If the root kuid maps to a valid uid in current ns, then return
+        * this as a nscap. */
+       mappedroot = from_kuid(current_user_ns(), kroot);
+       if (mappedroot != (uid_t)-1 && mappedroot != (uid_t)0) {
+               if (alloc) {
+                       *buffer = tmpbuf;
+                       nscap->rootid = cpu_to_le32(mappedroot);
+               } else
+                       kfree(tmpbuf);
+               return size;
+       }
+ 
+       if (!rootid_owns_currentns(kroot)) {
+               kfree(tmpbuf);
+               return -EOPNOTSUPP;
+       }
+ 
+       /* This comes from a parent namespace.  Return as a v2 capability */
+       size = sizeof(struct vfs_cap_data);
+       if (alloc) {
+               *buffer = kmalloc(size, GFP_ATOMIC);
+               if (*buffer) {
+                       struct vfs_cap_data *cap = *buffer;
+                       __le32 nsmagic, magic;
+                       magic = VFS_CAP_REVISION_2;
+                       nsmagic = le32_to_cpu(nscap->magic_etc);
+                       if (nsmagic & VFS_CAP_FLAGS_EFFECTIVE)
+                               magic |= VFS_CAP_FLAGS_EFFECTIVE;
+                       memcpy(&cap->data, &nscap->data, sizeof(__le32) * 2 * VFS_CAP_U32);
+                       cap->magic_etc = cpu_to_le32(magic);
+               }
+       }
+       kfree(tmpbuf);
+       return size;
+ }
+ 
+ static kuid_t rootid_from_xattr(const void *value, size_t size,
+                               struct user_namespace *task_ns)
+ {
+       const struct vfs_ns_cap_data *nscap = value;
+       uid_t rootid = 0;
+ 
+       if (size == XATTR_CAPS_SZ_3)
+               rootid = le32_to_cpu(nscap->rootid);
+ 
+       return make_kuid(task_ns, rootid);
+ }
+ 
+ static bool validheader(size_t size, __le32 magic)
+ {
+       return is_v2header(size, magic) || is_v3header(size, magic);
+ }
+ 
+ /*
+  * User requested a write of security.capability.  If needed, update the
+  * xattr to change from v2 to v3, or to fixup the v3 rootid.
+  *
+  * If all is ok, we return the new size, on error return < 0.
+  */
+ int cap_convert_nscap(struct dentry *dentry, void **ivalue, size_t size)
+ {
+       struct vfs_ns_cap_data *nscap;
+       uid_t nsrootid;
+       const struct vfs_cap_data *cap = *ivalue;
+       __u32 magic, nsmagic;
+       struct inode *inode = d_backing_inode(dentry);
+       struct user_namespace *task_ns = current_user_ns(),
+               *fs_ns = inode->i_sb->s_user_ns;
+       kuid_t rootid;
+       size_t newsize;
+ 
+       if (!*ivalue)
+               return -EINVAL;
+       if (!validheader(size, cap->magic_etc))
+               return -EINVAL;
+       if (!capable_wrt_inode_uidgid(inode, CAP_SETFCAP))
+               return -EPERM;
+       if (size == XATTR_CAPS_SZ_2)
+               if (ns_capable(inode->i_sb->s_user_ns, CAP_SETFCAP))
+                       /* user is privileged, just write the v2 */
+                       return size;
+ 
+       rootid = rootid_from_xattr(*ivalue, size, task_ns);
+       if (!uid_valid(rootid))
+               return -EINVAL;
+ 
+       nsrootid = from_kuid(fs_ns, rootid);
+       if (nsrootid == -1)
+               return -EINVAL;
+ 
+       newsize = sizeof(struct vfs_ns_cap_data);
+       nscap = kmalloc(newsize, GFP_ATOMIC);
+       if (!nscap)
+               return -ENOMEM;
+       nscap->rootid = cpu_to_le32(nsrootid);
+       nsmagic = VFS_CAP_REVISION_3;
+       magic = le32_to_cpu(cap->magic_etc);
+       if (magic & VFS_CAP_FLAGS_EFFECTIVE)
+               nsmagic |= VFS_CAP_FLAGS_EFFECTIVE;
+       nscap->magic_etc = cpu_to_le32(nsmagic);
+       memcpy(&nscap->data, &cap->data, sizeof(__le32) * 2 * VFS_CAP_U32);
+ 
+       kvfree(*ivalue);
+       *ivalue = nscap;
+       return newsize;
+ }
+ 
   /*
    * Calculate the new process capability sets from the capability sets attached
    * to a file.
@@@ -376,7 -591,10 +582,10 @@@ int get_vfs_caps_from_disk(const struc
         __u32 magic_etc;
         unsigned tocopy, i;
         int size;
-       struct vfs_cap_data caps;
+       struct vfs_ns_cap_data data, *nscaps = &data;
+       struct vfs_cap_data *caps = (struct vfs_cap_data *) &data;
+       kuid_t rootkuid;
+       struct user_namespace *fs_ns = inode->i_sb->s_user_ns;
   
         memset(cpu_caps, 0, sizeof(struct cpu_vfs_cap_data));
   
@@@ -384,18 -602,20 +593,20 @@@
                 return -ENODATA;
   
         size = __vfs_getxattr((struct dentry *)dentry, inode,
-                             XATTR_NAME_CAPS, &caps, XATTR_CAPS_SZ);
+                             XATTR_NAME_CAPS, &data, XATTR_CAPS_SZ);
         if (size == -ENODATA || size == -EOPNOTSUPP)
                 /* no data, that's ok */
                 return -ENODATA;
+ 
         if (size < 0)
                 return size;
   
         if (size < sizeof(magic_etc))
                 return -EINVAL;
   
-       cpu_caps->magic_etc = magic_etc = le32_to_cpu(caps.magic_etc);
+       cpu_caps->magic_etc = magic_etc = le32_to_cpu(caps->magic_etc);
   
+       rootkuid = make_kuid(fs_ns, 0);
         switch (magic_etc & VFS_CAP_REVISION_MASK) {
         case VFS_CAP_REVISION_1:
                 if (size != XATTR_CAPS_SZ_1)
@@@ -407,15 -627,27 +618,27 @@@
                         return -EINVAL;
                 tocopy = VFS_CAP_U32_2;
                 break;
+       case VFS_CAP_REVISION_3:
+               if (size != XATTR_CAPS_SZ_3)
+                       return -EINVAL;
+               tocopy = VFS_CAP_U32_3;
+               rootkuid = make_kuid(fs_ns, le32_to_cpu(nscaps->rootid));
+               break;
+ 
         default:
                 return -EINVAL;
         }
+       /* Limit the caps to the mounter of the filesystem
+        * or the more limited uid specified in the xattr.
+        */
+       if (!rootid_owns_currentns(rootkuid))
+               return -ENODATA;
   
         CAP_FOR_EACH_U32(i) {
                 if (i >= tocopy)
                         break;
-               cpu_caps->permitted.cap[i] = le32_to_cpu(caps.data[i].permitted);
-               cpu_caps->inheritable.cap[i] = le32_to_cpu(caps.data[i].inheritable);
+               cpu_caps->permitted.cap[i] = le32_to_cpu(caps->data[i].permitted);
+               cpu_caps->inheritable.cap[i] = le32_to_cpu(caps->data[i].inheritable);
         }
   
         cpu_caps->permitted.cap[CAP_LAST_U32] &= CAP_LAST_U32_VALID_MASK;
@@@ -434,7 -666,7 +657,7 @@@ static int get_file_caps(struct linux_b
         int rc = 0;
         struct cpu_vfs_cap_data vcaps;
   
- -      bprm_clear_caps(bprm);
+ +      cap_clear(bprm->cred->cap_permitted);
   
         if (!file_caps_enabled)
                 return 0;
@@@ -453,8 -685,8 +676,8 @@@
         rc = get_vfs_caps_from_disk(bprm->file->f_path.dentry, &vcaps);
         if (rc < 0) {
                 if (rc == -EINVAL)
-                       printk(KERN_NOTICE "%s: get_vfs_caps_from_disk returned %d for %s\n",
-                               __func__, rc, bprm->filename);
+                       printk(KERN_NOTICE "Invalid argument reading file caps for %s\n",
+                                       bprm->filename);
                 else if (rc == -ENODATA)
                         rc = 0;
                 goto out;
@@@ -467,7 -699,7 +690,7 @@@
   
   out:
         if (rc)
- -              bprm_clear_caps(bprm);
+ +              cap_clear(bprm->cred->cap_permitted);
   
         return rc;
   }
@@@ -576,6 -808,8 +799,6 @@@ skip
         if (WARN_ON(!cap_ambient_invariant_ok(new)))
                 return -EPERM;
   
- -      bprm->cap_effective = effective;
- -
         /*
          * Audit candidate if current->cap_effective is set
          *
@@@ -603,17 -837,33 +826,17 @@@
         if (WARN_ON(!cap_ambient_invariant_ok(new)))
                 return -EPERM;
   
- -      return 0;
- -}
- -
- -/**
- - * cap_bprm_secureexec - Determine whether a secure execution is required
- - * @bprm: The execution parameters
- - *
- - * Determine whether a secure execution is required, return 1 if it is, and 0
- - * if it is not.
- - *
- - * The credentials have been committed by this point, and so are no longer
- - * available through @bprm->cred.
- - */
- -int cap_bprm_secureexec(struct linux_binprm *bprm)
- -{
- -      const struct cred *cred = current_cred();
- -      kuid_t root_uid = make_kuid(cred->user_ns, 0);
- -
- -      if (!uid_eq(cred->uid, root_uid)) {
- -              if (bprm->cap_effective)
- -                      return 1;
- -              if (!cap_issubset(cred->cap_permitted, cred->cap_ambient))
- -                      return 1;
+ +      /* Check for privilege-elevated exec. */
+ +      bprm->cap_elevated = 0;
+ +      if (is_setid) {
+ +              bprm->cap_elevated = 1;
+ +      } else if (!uid_eq(new->uid, root_uid)) {
+ +              if (effective ||
+ +                  !cap_issubset(new->cap_permitted, new->cap_ambient))
+ +                      bprm->cap_elevated = 1;
         }
   
- -      return (!uid_eq(cred->euid, cred->uid) ||
- -              !gid_eq(cred->egid, cred->gid));
+ +      return 0;
   }
   
   /**
@@@ -633,15 -883,19 +856,19 @@@
   int cap_inode_setxattr(struct dentry *dentry, const char *name,
                        const void *value, size_t size, int flags)
   {
-       if (!strcmp(name, XATTR_NAME_CAPS)) {
-               if (!capable(CAP_SETFCAP))
-                       return -EPERM;
+       /* Ignore non-security xattrs */
+       if (strncmp(name, XATTR_SECURITY_PREFIX,
+                       sizeof(XATTR_SECURITY_PREFIX) - 1) != 0)
+               return 0;
+ 
+       /*
+        * For XATTR_NAME_CAPS the check will be done in
+        * cap_convert_nscap(), called by setxattr()
+        */
+       if (strcmp(name, XATTR_NAME_CAPS) == 0)
                 return 0;
-       }
   
-       if (!strncmp(name, XATTR_SECURITY_PREFIX,
-                    sizeof(XATTR_SECURITY_PREFIX) - 1) &&
-           !capable(CAP_SYS_ADMIN))
+       if (!capable(CAP_SYS_ADMIN))
                 return -EPERM;
         return 0;
   }
@@@ -659,15 -913,22 +886,22 @@@
    */
   int cap_inode_removexattr(struct dentry *dentry, const char *name)
   {
-       if (!strcmp(name, XATTR_NAME_CAPS)) {
-               if (!capable(CAP_SETFCAP))
+       /* Ignore non-security xattrs */
+       if (strncmp(name, XATTR_SECURITY_PREFIX,
+                       sizeof(XATTR_SECURITY_PREFIX) - 1) != 0)
+               return 0;
+ 
+       if (strcmp(name, XATTR_NAME_CAPS) == 0) {
+               /* security.capability gets namespaced */
+               struct inode *inode = d_backing_inode(dentry);
+               if (!inode)
+                       return -EINVAL;
+               if (!capable_wrt_inode_uidgid(inode, CAP_SETFCAP))
                         return -EPERM;
                 return 0;
         }
   
-       if (!strncmp(name, XATTR_SECURITY_PREFIX,
-                    sizeof(XATTR_SECURITY_PREFIX) - 1) &&
-           !capable(CAP_SYS_ADMIN))
+       if (!capable(CAP_SYS_ADMIN))
                 return -EPERM;
         return 0;
   }
@@@ -1052,8 -1313,10 +1286,9 @@@ struct security_hook_list capability_ho
         LSM_HOOK_INIT(capget, cap_capget),
         LSM_HOOK_INIT(capset, cap_capset),
         LSM_HOOK_INIT(bprm_set_creds, cap_bprm_set_creds),
- -      LSM_HOOK_INIT(bprm_secureexec, cap_bprm_secureexec),
         LSM_HOOK_INIT(inode_need_killpriv, cap_inode_need_killpriv),
         LSM_HOOK_INIT(inode_killpriv, cap_inode_killpriv),
+       LSM_HOOK_INIT(inode_getsecurity, cap_inode_getsecurity),
         LSM_HOOK_INIT(mmap_addr, cap_mmap_addr),
         LSM_HOOK_INIT(mmap_file, cap_mmap_file),
         LSM_HOOK_INIT(task_fix_setuid, cap_task_fix_setuid),
author	Linus Torvalds <torvalds@linux-foundation.org>
	Tue, 12 Sep 2017 01:34:47 +0000 (18:34 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Tue, 12 Sep 2017 01:34:47 +0000 (18:34 -0700)
		1	2
arch/alpha/kernel/traps.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/arm64/kernel/signal32.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/sparc/kernel/traps_64.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/security.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/user_namespace.h	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/exit.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/signal.c	patch \|	diff1 \|	diff2 \|	blob \| history
security/commoncap.c	patch \|	diff1 \|	diff2 \|	blob \| history