]> asedeno.scripts.mit.edu Git - linux.git/blob - ipc/shm.c
Merge ra.kernel.org:/pub/scm/linux/kernel/git/davem/net
[linux.git] / ipc / shm.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * linux/ipc/shm.c
4  * Copyright (C) 1992, 1993 Krishna Balasubramanian
5  *       Many improvements/fixes by Bruno Haible.
6  * Replaced `struct shm_desc' by `struct vm_area_struct', July 1994.
7  * Fixed the shm swap deallocation (shm_unuse()), August 1998 Andrea Arcangeli.
8  *
9  * /proc/sysvipc/shm support (c) 1999 Dragos Acostachioaie <dragos@iname.com>
10  * BIGMEM support, Andrea Arcangeli <andrea@suse.de>
11  * SMP thread shm, Jean-Luc Boyard <jean-luc.boyard@siemens.fr>
12  * HIGHMEM support, Ingo Molnar <mingo@redhat.com>
13  * Make shmmax, shmall, shmmni sysctl'able, Christoph Rohland <cr@sap.com>
14  * Shared /dev/zero support, Kanoj Sarcar <kanoj@sgi.com>
15  * Move the mm functionality over to mm/shmem.c, Christoph Rohland <cr@sap.com>
16  *
17  * support for audit of ipc object properties and permission changes
18  * Dustin Kirkland <dustin.kirkland@us.ibm.com>
19  *
20  * namespaces support
21  * OpenVZ, SWsoft Inc.
22  * Pavel Emelianov <xemul@openvz.org>
23  *
24  * Better ipc lock (kern_ipc_perm.lock) handling
25  * Davidlohr Bueso <davidlohr.bueso@hp.com>, June 2013.
26  */
27
28 #include <linux/slab.h>
29 #include <linux/mm.h>
30 #include <linux/hugetlb.h>
31 #include <linux/shm.h>
32 #include <linux/init.h>
33 #include <linux/file.h>
34 #include <linux/mman.h>
35 #include <linux/shmem_fs.h>
36 #include <linux/security.h>
37 #include <linux/syscalls.h>
38 #include <linux/audit.h>
39 #include <linux/capability.h>
40 #include <linux/ptrace.h>
41 #include <linux/seq_file.h>
42 #include <linux/rwsem.h>
43 #include <linux/nsproxy.h>
44 #include <linux/mount.h>
45 #include <linux/ipc_namespace.h>
46 #include <linux/rhashtable.h>
47
48 #include <linux/uaccess.h>
49
50 #include "util.h"
51
52 struct shmid_kernel /* private to the kernel */
53 {
54         struct kern_ipc_perm    shm_perm;
55         struct file             *shm_file;
56         unsigned long           shm_nattch;
57         unsigned long           shm_segsz;
58         time64_t                shm_atim;
59         time64_t                shm_dtim;
60         time64_t                shm_ctim;
61         struct pid              *shm_cprid;
62         struct pid              *shm_lprid;
63         struct user_struct      *mlock_user;
64
65         /* The task created the shm object.  NULL if the task is dead. */
66         struct task_struct      *shm_creator;
67         struct list_head        shm_clist;      /* list by creator */
68 } __randomize_layout;
69
70 /* shm_mode upper byte flags */
71 #define SHM_DEST        01000   /* segment will be destroyed on last detach */
72 #define SHM_LOCKED      02000   /* segment will not be swapped */
73
74 struct shm_file_data {
75         int id;
76         struct ipc_namespace *ns;
77         struct file *file;
78         const struct vm_operations_struct *vm_ops;
79 };
80
81 #define shm_file_data(file) (*((struct shm_file_data **)&(file)->private_data))
82
83 static const struct file_operations shm_file_operations;
84 static const struct vm_operations_struct shm_vm_ops;
85
86 #define shm_ids(ns)     ((ns)->ids[IPC_SHM_IDS])
87
88 #define shm_unlock(shp)                 \
89         ipc_unlock(&(shp)->shm_perm)
90
91 static int newseg(struct ipc_namespace *, struct ipc_params *);
92 static void shm_open(struct vm_area_struct *vma);
93 static void shm_close(struct vm_area_struct *vma);
94 static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp);
95 #ifdef CONFIG_PROC_FS
96 static int sysvipc_shm_proc_show(struct seq_file *s, void *it);
97 #endif
98
99 int shm_init_ns(struct ipc_namespace *ns)
100 {
101         ns->shm_ctlmax = SHMMAX;
102         ns->shm_ctlall = SHMALL;
103         ns->shm_ctlmni = SHMMNI;
104         ns->shm_rmid_forced = 0;
105         ns->shm_tot = 0;
106         return ipc_init_ids(&shm_ids(ns));
107 }
108
109 /*
110  * Called with shm_ids.rwsem (writer) and the shp structure locked.
111  * Only shm_ids.rwsem remains locked on exit.
112  */
113 static void do_shm_rmid(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
114 {
115         struct shmid_kernel *shp;
116
117         shp = container_of(ipcp, struct shmid_kernel, shm_perm);
118
119         if (shp->shm_nattch) {
120                 shp->shm_perm.mode |= SHM_DEST;
121                 /* Do not find it any more */
122                 ipc_set_key_private(&shm_ids(ns), &shp->shm_perm);
123                 shm_unlock(shp);
124         } else
125                 shm_destroy(ns, shp);
126 }
127
128 #ifdef CONFIG_IPC_NS
129 void shm_exit_ns(struct ipc_namespace *ns)
130 {
131         free_ipcs(ns, &shm_ids(ns), do_shm_rmid);
132         idr_destroy(&ns->ids[IPC_SHM_IDS].ipcs_idr);
133         rhashtable_destroy(&ns->ids[IPC_SHM_IDS].key_ht);
134 }
135 #endif
136
137 static int __init ipc_ns_init(void)
138 {
139         const int err = shm_init_ns(&init_ipc_ns);
140         WARN(err, "ipc: sysv shm_init_ns failed: %d\n", err);
141         return err;
142 }
143
144 pure_initcall(ipc_ns_init);
145
146 void __init shm_init(void)
147 {
148         ipc_init_proc_interface("sysvipc/shm",
149 #if BITS_PER_LONG <= 32
150                                 "       key      shmid perms       size  cpid  lpid nattch   uid   gid  cuid  cgid      atime      dtime      ctime        rss       swap\n",
151 #else
152                                 "       key      shmid perms                  size  cpid  lpid nattch   uid   gid  cuid  cgid      atime      dtime      ctime                   rss                  swap\n",
153 #endif
154                                 IPC_SHM_IDS, sysvipc_shm_proc_show);
155 }
156
157 static inline struct shmid_kernel *shm_obtain_object(struct ipc_namespace *ns, int id)
158 {
159         struct kern_ipc_perm *ipcp = ipc_obtain_object_idr(&shm_ids(ns), id);
160
161         if (IS_ERR(ipcp))
162                 return ERR_CAST(ipcp);
163
164         return container_of(ipcp, struct shmid_kernel, shm_perm);
165 }
166
167 static inline struct shmid_kernel *shm_obtain_object_check(struct ipc_namespace *ns, int id)
168 {
169         struct kern_ipc_perm *ipcp = ipc_obtain_object_check(&shm_ids(ns), id);
170
171         if (IS_ERR(ipcp))
172                 return ERR_CAST(ipcp);
173
174         return container_of(ipcp, struct shmid_kernel, shm_perm);
175 }
176
177 /*
178  * shm_lock_(check_) routines are called in the paths where the rwsem
179  * is not necessarily held.
180  */
181 static inline struct shmid_kernel *shm_lock(struct ipc_namespace *ns, int id)
182 {
183         struct kern_ipc_perm *ipcp = ipc_lock(&shm_ids(ns), id);
184
185         /*
186          * Callers of shm_lock() must validate the status of the returned ipc
187          * object pointer (as returned by ipc_lock()), and error out as
188          * appropriate.
189          */
190         if (IS_ERR(ipcp))
191                 return (void *)ipcp;
192         return container_of(ipcp, struct shmid_kernel, shm_perm);
193 }
194
195 static inline void shm_lock_by_ptr(struct shmid_kernel *ipcp)
196 {
197         rcu_read_lock();
198         ipc_lock_object(&ipcp->shm_perm);
199 }
200
201 static void shm_rcu_free(struct rcu_head *head)
202 {
203         struct kern_ipc_perm *ptr = container_of(head, struct kern_ipc_perm,
204                                                         rcu);
205         struct shmid_kernel *shp = container_of(ptr, struct shmid_kernel,
206                                                         shm_perm);
207         security_shm_free(&shp->shm_perm);
208         kvfree(shp);
209 }
210
211 static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s)
212 {
213         list_del(&s->shm_clist);
214         ipc_rmid(&shm_ids(ns), &s->shm_perm);
215 }
216
217
218 static int __shm_open(struct vm_area_struct *vma)
219 {
220         struct file *file = vma->vm_file;
221         struct shm_file_data *sfd = shm_file_data(file);
222         struct shmid_kernel *shp;
223
224         shp = shm_lock(sfd->ns, sfd->id);
225
226         if (IS_ERR(shp))
227                 return PTR_ERR(shp);
228
229         if (shp->shm_file != sfd->file) {
230                 /* ID was reused */
231                 shm_unlock(shp);
232                 return -EINVAL;
233         }
234
235         shp->shm_atim = ktime_get_real_seconds();
236         ipc_update_pid(&shp->shm_lprid, task_tgid(current));
237         shp->shm_nattch++;
238         shm_unlock(shp);
239         return 0;
240 }
241
242 /* This is called by fork, once for every shm attach. */
243 static void shm_open(struct vm_area_struct *vma)
244 {
245         int err = __shm_open(vma);
246         /*
247          * We raced in the idr lookup or with shm_destroy().
248          * Either way, the ID is busted.
249          */
250         WARN_ON_ONCE(err);
251 }
252
253 /*
254  * shm_destroy - free the struct shmid_kernel
255  *
256  * @ns: namespace
257  * @shp: struct to free
258  *
259  * It has to be called with shp and shm_ids.rwsem (writer) locked,
260  * but returns with shp unlocked and freed.
261  */
262 static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
263 {
264         struct file *shm_file;
265
266         shm_file = shp->shm_file;
267         shp->shm_file = NULL;
268         ns->shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT;
269         shm_rmid(ns, shp);
270         shm_unlock(shp);
271         if (!is_file_hugepages(shm_file))
272                 shmem_lock(shm_file, 0, shp->mlock_user);
273         else if (shp->mlock_user)
274                 user_shm_unlock(i_size_read(file_inode(shm_file)),
275                                 shp->mlock_user);
276         fput(shm_file);
277         ipc_update_pid(&shp->shm_cprid, NULL);
278         ipc_update_pid(&shp->shm_lprid, NULL);
279         ipc_rcu_putref(&shp->shm_perm, shm_rcu_free);
280 }
281
282 /*
283  * shm_may_destroy - identifies whether shm segment should be destroyed now
284  *
285  * Returns true if and only if there are no active users of the segment and
286  * one of the following is true:
287  *
288  * 1) shmctl(id, IPC_RMID, NULL) was called for this shp
289  *
290  * 2) sysctl kernel.shm_rmid_forced is set to 1.
291  */
292 static bool shm_may_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
293 {
294         return (shp->shm_nattch == 0) &&
295                (ns->shm_rmid_forced ||
296                 (shp->shm_perm.mode & SHM_DEST));
297 }
298
299 /*
300  * remove the attach descriptor vma.
301  * free memory for segment if it is marked destroyed.
302  * The descriptor has already been removed from the current->mm->mmap list
303  * and will later be kfree()d.
304  */
305 static void shm_close(struct vm_area_struct *vma)
306 {
307         struct file *file = vma->vm_file;
308         struct shm_file_data *sfd = shm_file_data(file);
309         struct shmid_kernel *shp;
310         struct ipc_namespace *ns = sfd->ns;
311
312         down_write(&shm_ids(ns).rwsem);
313         /* remove from the list of attaches of the shm segment */
314         shp = shm_lock(ns, sfd->id);
315
316         /*
317          * We raced in the idr lookup or with shm_destroy().
318          * Either way, the ID is busted.
319          */
320         if (WARN_ON_ONCE(IS_ERR(shp)))
321                 goto done; /* no-op */
322
323         ipc_update_pid(&shp->shm_lprid, task_tgid(current));
324         shp->shm_dtim = ktime_get_real_seconds();
325         shp->shm_nattch--;
326         if (shm_may_destroy(ns, shp))
327                 shm_destroy(ns, shp);
328         else
329                 shm_unlock(shp);
330 done:
331         up_write(&shm_ids(ns).rwsem);
332 }
333
334 /* Called with ns->shm_ids(ns).rwsem locked */
335 static int shm_try_destroy_orphaned(int id, void *p, void *data)
336 {
337         struct ipc_namespace *ns = data;
338         struct kern_ipc_perm *ipcp = p;
339         struct shmid_kernel *shp = container_of(ipcp, struct shmid_kernel, shm_perm);
340
341         /*
342          * We want to destroy segments without users and with already
343          * exit'ed originating process.
344          *
345          * As shp->* are changed under rwsem, it's safe to skip shp locking.
346          */
347         if (shp->shm_creator != NULL)
348                 return 0;
349
350         if (shm_may_destroy(ns, shp)) {
351                 shm_lock_by_ptr(shp);
352                 shm_destroy(ns, shp);
353         }
354         return 0;
355 }
356
357 void shm_destroy_orphaned(struct ipc_namespace *ns)
358 {
359         down_write(&shm_ids(ns).rwsem);
360         if (shm_ids(ns).in_use)
361                 idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_orphaned, ns);
362         up_write(&shm_ids(ns).rwsem);
363 }
364
365 /* Locking assumes this will only be called with task == current */
366 void exit_shm(struct task_struct *task)
367 {
368         struct ipc_namespace *ns = task->nsproxy->ipc_ns;
369         struct shmid_kernel *shp, *n;
370
371         if (list_empty(&task->sysvshm.shm_clist))
372                 return;
373
374         /*
375          * If kernel.shm_rmid_forced is not set then only keep track of
376          * which shmids are orphaned, so that a later set of the sysctl
377          * can clean them up.
378          */
379         if (!ns->shm_rmid_forced) {
380                 down_read(&shm_ids(ns).rwsem);
381                 list_for_each_entry(shp, &task->sysvshm.shm_clist, shm_clist)
382                         shp->shm_creator = NULL;
383                 /*
384                  * Only under read lock but we are only called on current
385                  * so no entry on the list will be shared.
386                  */
387                 list_del(&task->sysvshm.shm_clist);
388                 up_read(&shm_ids(ns).rwsem);
389                 return;
390         }
391
392         /*
393          * Destroy all already created segments, that were not yet mapped,
394          * and mark any mapped as orphan to cover the sysctl toggling.
395          * Destroy is skipped if shm_may_destroy() returns false.
396          */
397         down_write(&shm_ids(ns).rwsem);
398         list_for_each_entry_safe(shp, n, &task->sysvshm.shm_clist, shm_clist) {
399                 shp->shm_creator = NULL;
400
401                 if (shm_may_destroy(ns, shp)) {
402                         shm_lock_by_ptr(shp);
403                         shm_destroy(ns, shp);
404                 }
405         }
406
407         /* Remove the list head from any segments still attached. */
408         list_del(&task->sysvshm.shm_clist);
409         up_write(&shm_ids(ns).rwsem);
410 }
411
412 static vm_fault_t shm_fault(struct vm_fault *vmf)
413 {
414         struct file *file = vmf->vma->vm_file;
415         struct shm_file_data *sfd = shm_file_data(file);
416
417         return sfd->vm_ops->fault(vmf);
418 }
419
420 static int shm_split(struct vm_area_struct *vma, unsigned long addr)
421 {
422         struct file *file = vma->vm_file;
423         struct shm_file_data *sfd = shm_file_data(file);
424
425         if (sfd->vm_ops->split)
426                 return sfd->vm_ops->split(vma, addr);
427
428         return 0;
429 }
430
431 #ifdef CONFIG_NUMA
432 static int shm_set_policy(struct vm_area_struct *vma, struct mempolicy *new)
433 {
434         struct file *file = vma->vm_file;
435         struct shm_file_data *sfd = shm_file_data(file);
436         int err = 0;
437
438         if (sfd->vm_ops->set_policy)
439                 err = sfd->vm_ops->set_policy(vma, new);
440         return err;
441 }
442
443 static struct mempolicy *shm_get_policy(struct vm_area_struct *vma,
444                                         unsigned long addr)
445 {
446         struct file *file = vma->vm_file;
447         struct shm_file_data *sfd = shm_file_data(file);
448         struct mempolicy *pol = NULL;
449
450         if (sfd->vm_ops->get_policy)
451                 pol = sfd->vm_ops->get_policy(vma, addr);
452         else if (vma->vm_policy)
453                 pol = vma->vm_policy;
454
455         return pol;
456 }
457 #endif
458
459 static int shm_mmap(struct file *file, struct vm_area_struct *vma)
460 {
461         struct shm_file_data *sfd = shm_file_data(file);
462         int ret;
463
464         /*
465          * In case of remap_file_pages() emulation, the file can represent an
466          * IPC ID that was removed, and possibly even reused by another shm
467          * segment already.  Propagate this case as an error to caller.
468          */
469         ret = __shm_open(vma);
470         if (ret)
471                 return ret;
472
473         ret = call_mmap(sfd->file, vma);
474         if (ret) {
475                 shm_close(vma);
476                 return ret;
477         }
478         sfd->vm_ops = vma->vm_ops;
479 #ifdef CONFIG_MMU
480         WARN_ON(!sfd->vm_ops->fault);
481 #endif
482         vma->vm_ops = &shm_vm_ops;
483         return 0;
484 }
485
486 static int shm_release(struct inode *ino, struct file *file)
487 {
488         struct shm_file_data *sfd = shm_file_data(file);
489
490         put_ipc_ns(sfd->ns);
491         fput(sfd->file);
492         shm_file_data(file) = NULL;
493         kfree(sfd);
494         return 0;
495 }
496
497 static int shm_fsync(struct file *file, loff_t start, loff_t end, int datasync)
498 {
499         struct shm_file_data *sfd = shm_file_data(file);
500
501         if (!sfd->file->f_op->fsync)
502                 return -EINVAL;
503         return sfd->file->f_op->fsync(sfd->file, start, end, datasync);
504 }
505
506 static long shm_fallocate(struct file *file, int mode, loff_t offset,
507                           loff_t len)
508 {
509         struct shm_file_data *sfd = shm_file_data(file);
510
511         if (!sfd->file->f_op->fallocate)
512                 return -EOPNOTSUPP;
513         return sfd->file->f_op->fallocate(file, mode, offset, len);
514 }
515
516 static unsigned long shm_get_unmapped_area(struct file *file,
517         unsigned long addr, unsigned long len, unsigned long pgoff,
518         unsigned long flags)
519 {
520         struct shm_file_data *sfd = shm_file_data(file);
521
522         return sfd->file->f_op->get_unmapped_area(sfd->file, addr, len,
523                                                 pgoff, flags);
524 }
525
526 static const struct file_operations shm_file_operations = {
527         .mmap           = shm_mmap,
528         .fsync          = shm_fsync,
529         .release        = shm_release,
530         .get_unmapped_area      = shm_get_unmapped_area,
531         .llseek         = noop_llseek,
532         .fallocate      = shm_fallocate,
533 };
534
535 /*
536  * shm_file_operations_huge is now identical to shm_file_operations,
537  * but we keep it distinct for the sake of is_file_shm_hugepages().
538  */
539 static const struct file_operations shm_file_operations_huge = {
540         .mmap           = shm_mmap,
541         .fsync          = shm_fsync,
542         .release        = shm_release,
543         .get_unmapped_area      = shm_get_unmapped_area,
544         .llseek         = noop_llseek,
545         .fallocate      = shm_fallocate,
546 };
547
548 bool is_file_shm_hugepages(struct file *file)
549 {
550         return file->f_op == &shm_file_operations_huge;
551 }
552
553 static const struct vm_operations_struct shm_vm_ops = {
554         .open   = shm_open,     /* callback for a new vm-area open */
555         .close  = shm_close,    /* callback for when the vm-area is released */
556         .fault  = shm_fault,
557         .split  = shm_split,
558 #if defined(CONFIG_NUMA)
559         .set_policy = shm_set_policy,
560         .get_policy = shm_get_policy,
561 #endif
562 };
563
564 /**
565  * newseg - Create a new shared memory segment
566  * @ns: namespace
567  * @params: ptr to the structure that contains key, size and shmflg
568  *
569  * Called with shm_ids.rwsem held as a writer.
570  */
571 static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
572 {
573         key_t key = params->key;
574         int shmflg = params->flg;
575         size_t size = params->u.size;
576         int error;
577         struct shmid_kernel *shp;
578         size_t numpages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
579         struct file *file;
580         char name[13];
581         vm_flags_t acctflag = 0;
582
583         if (size < SHMMIN || size > ns->shm_ctlmax)
584                 return -EINVAL;
585
586         if (numpages << PAGE_SHIFT < size)
587                 return -ENOSPC;
588
589         if (ns->shm_tot + numpages < ns->shm_tot ||
590                         ns->shm_tot + numpages > ns->shm_ctlall)
591                 return -ENOSPC;
592
593         shp = kvmalloc(sizeof(*shp), GFP_KERNEL);
594         if (unlikely(!shp))
595                 return -ENOMEM;
596
597         shp->shm_perm.key = key;
598         shp->shm_perm.mode = (shmflg & S_IRWXUGO);
599         shp->mlock_user = NULL;
600
601         shp->shm_perm.security = NULL;
602         error = security_shm_alloc(&shp->shm_perm);
603         if (error) {
604                 kvfree(shp);
605                 return error;
606         }
607
608         sprintf(name, "SYSV%08x", key);
609         if (shmflg & SHM_HUGETLB) {
610                 struct hstate *hs;
611                 size_t hugesize;
612
613                 hs = hstate_sizelog((shmflg >> SHM_HUGE_SHIFT) & SHM_HUGE_MASK);
614                 if (!hs) {
615                         error = -EINVAL;
616                         goto no_file;
617                 }
618                 hugesize = ALIGN(size, huge_page_size(hs));
619
620                 /* hugetlb_file_setup applies strict accounting */
621                 if (shmflg & SHM_NORESERVE)
622                         acctflag = VM_NORESERVE;
623                 file = hugetlb_file_setup(name, hugesize, acctflag,
624                                   &shp->mlock_user, HUGETLB_SHMFS_INODE,
625                                 (shmflg >> SHM_HUGE_SHIFT) & SHM_HUGE_MASK);
626         } else {
627                 /*
628                  * Do not allow no accounting for OVERCOMMIT_NEVER, even
629                  * if it's asked for.
630                  */
631                 if  ((shmflg & SHM_NORESERVE) &&
632                                 sysctl_overcommit_memory != OVERCOMMIT_NEVER)
633                         acctflag = VM_NORESERVE;
634                 file = shmem_kernel_file_setup(name, size, acctflag);
635         }
636         error = PTR_ERR(file);
637         if (IS_ERR(file))
638                 goto no_file;
639
640         shp->shm_cprid = get_pid(task_tgid(current));
641         shp->shm_lprid = NULL;
642         shp->shm_atim = shp->shm_dtim = 0;
643         shp->shm_ctim = ktime_get_real_seconds();
644         shp->shm_segsz = size;
645         shp->shm_nattch = 0;
646         shp->shm_file = file;
647         shp->shm_creator = current;
648
649         /* ipc_addid() locks shp upon success. */
650         error = ipc_addid(&shm_ids(ns), &shp->shm_perm, ns->shm_ctlmni);
651         if (error < 0)
652                 goto no_id;
653
654         list_add(&shp->shm_clist, &current->sysvshm.shm_clist);
655
656         /*
657          * shmid gets reported as "inode#" in /proc/pid/maps.
658          * proc-ps tools use this. Changing this will break them.
659          */
660         file_inode(file)->i_ino = shp->shm_perm.id;
661
662         ns->shm_tot += numpages;
663         error = shp->shm_perm.id;
664
665         ipc_unlock_object(&shp->shm_perm);
666         rcu_read_unlock();
667         return error;
668
669 no_id:
670         ipc_update_pid(&shp->shm_cprid, NULL);
671         ipc_update_pid(&shp->shm_lprid, NULL);
672         if (is_file_hugepages(file) && shp->mlock_user)
673                 user_shm_unlock(size, shp->mlock_user);
674         fput(file);
675 no_file:
676         call_rcu(&shp->shm_perm.rcu, shm_rcu_free);
677         return error;
678 }
679
680 /*
681  * Called with shm_ids.rwsem and ipcp locked.
682  */
683 static inline int shm_more_checks(struct kern_ipc_perm *ipcp,
684                                 struct ipc_params *params)
685 {
686         struct shmid_kernel *shp;
687
688         shp = container_of(ipcp, struct shmid_kernel, shm_perm);
689         if (shp->shm_segsz < params->u.size)
690                 return -EINVAL;
691
692         return 0;
693 }
694
695 long ksys_shmget(key_t key, size_t size, int shmflg)
696 {
697         struct ipc_namespace *ns;
698         static const struct ipc_ops shm_ops = {
699                 .getnew = newseg,
700                 .associate = security_shm_associate,
701                 .more_checks = shm_more_checks,
702         };
703         struct ipc_params shm_params;
704
705         ns = current->nsproxy->ipc_ns;
706
707         shm_params.key = key;
708         shm_params.flg = shmflg;
709         shm_params.u.size = size;
710
711         return ipcget(ns, &shm_ids(ns), &shm_ops, &shm_params);
712 }
713
714 SYSCALL_DEFINE3(shmget, key_t, key, size_t, size, int, shmflg)
715 {
716         return ksys_shmget(key, size, shmflg);
717 }
718
719 static inline unsigned long copy_shmid_to_user(void __user *buf, struct shmid64_ds *in, int version)
720 {
721         switch (version) {
722         case IPC_64:
723                 return copy_to_user(buf, in, sizeof(*in));
724         case IPC_OLD:
725             {
726                 struct shmid_ds out;
727
728                 memset(&out, 0, sizeof(out));
729                 ipc64_perm_to_ipc_perm(&in->shm_perm, &out.shm_perm);
730                 out.shm_segsz   = in->shm_segsz;
731                 out.shm_atime   = in->shm_atime;
732                 out.shm_dtime   = in->shm_dtime;
733                 out.shm_ctime   = in->shm_ctime;
734                 out.shm_cpid    = in->shm_cpid;
735                 out.shm_lpid    = in->shm_lpid;
736                 out.shm_nattch  = in->shm_nattch;
737
738                 return copy_to_user(buf, &out, sizeof(out));
739             }
740         default:
741                 return -EINVAL;
742         }
743 }
744
745 static inline unsigned long
746 copy_shmid_from_user(struct shmid64_ds *out, void __user *buf, int version)
747 {
748         switch (version) {
749         case IPC_64:
750                 if (copy_from_user(out, buf, sizeof(*out)))
751                         return -EFAULT;
752                 return 0;
753         case IPC_OLD:
754             {
755                 struct shmid_ds tbuf_old;
756
757                 if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old)))
758                         return -EFAULT;
759
760                 out->shm_perm.uid       = tbuf_old.shm_perm.uid;
761                 out->shm_perm.gid       = tbuf_old.shm_perm.gid;
762                 out->shm_perm.mode      = tbuf_old.shm_perm.mode;
763
764                 return 0;
765             }
766         default:
767                 return -EINVAL;
768         }
769 }
770
771 static inline unsigned long copy_shminfo_to_user(void __user *buf, struct shminfo64 *in, int version)
772 {
773         switch (version) {
774         case IPC_64:
775                 return copy_to_user(buf, in, sizeof(*in));
776         case IPC_OLD:
777             {
778                 struct shminfo out;
779
780                 if (in->shmmax > INT_MAX)
781                         out.shmmax = INT_MAX;
782                 else
783                         out.shmmax = (int)in->shmmax;
784
785                 out.shmmin      = in->shmmin;
786                 out.shmmni      = in->shmmni;
787                 out.shmseg      = in->shmseg;
788                 out.shmall      = in->shmall;
789
790                 return copy_to_user(buf, &out, sizeof(out));
791             }
792         default:
793                 return -EINVAL;
794         }
795 }
796
797 /*
798  * Calculate and add used RSS and swap pages of a shm.
799  * Called with shm_ids.rwsem held as a reader
800  */
801 static void shm_add_rss_swap(struct shmid_kernel *shp,
802         unsigned long *rss_add, unsigned long *swp_add)
803 {
804         struct inode *inode;
805
806         inode = file_inode(shp->shm_file);
807
808         if (is_file_hugepages(shp->shm_file)) {
809                 struct address_space *mapping = inode->i_mapping;
810                 struct hstate *h = hstate_file(shp->shm_file);
811                 *rss_add += pages_per_huge_page(h) * mapping->nrpages;
812         } else {
813 #ifdef CONFIG_SHMEM
814                 struct shmem_inode_info *info = SHMEM_I(inode);
815
816                 spin_lock_irq(&info->lock);
817                 *rss_add += inode->i_mapping->nrpages;
818                 *swp_add += info->swapped;
819                 spin_unlock_irq(&info->lock);
820 #else
821                 *rss_add += inode->i_mapping->nrpages;
822 #endif
823         }
824 }
825
826 /*
827  * Called with shm_ids.rwsem held as a reader
828  */
829 static void shm_get_stat(struct ipc_namespace *ns, unsigned long *rss,
830                 unsigned long *swp)
831 {
832         int next_id;
833         int total, in_use;
834
835         *rss = 0;
836         *swp = 0;
837
838         in_use = shm_ids(ns).in_use;
839
840         for (total = 0, next_id = 0; total < in_use; next_id++) {
841                 struct kern_ipc_perm *ipc;
842                 struct shmid_kernel *shp;
843
844                 ipc = idr_find(&shm_ids(ns).ipcs_idr, next_id);
845                 if (ipc == NULL)
846                         continue;
847                 shp = container_of(ipc, struct shmid_kernel, shm_perm);
848
849                 shm_add_rss_swap(shp, rss, swp);
850
851                 total++;
852         }
853 }
854
855 /*
856  * This function handles some shmctl commands which require the rwsem
857  * to be held in write mode.
858  * NOTE: no locks must be held, the rwsem is taken inside this function.
859  */
860 static int shmctl_down(struct ipc_namespace *ns, int shmid, int cmd,
861                        struct shmid64_ds *shmid64)
862 {
863         struct kern_ipc_perm *ipcp;
864         struct shmid_kernel *shp;
865         int err;
866
867         down_write(&shm_ids(ns).rwsem);
868         rcu_read_lock();
869
870         ipcp = ipcctl_pre_down_nolock(ns, &shm_ids(ns), shmid, cmd,
871                                       &shmid64->shm_perm, 0);
872         if (IS_ERR(ipcp)) {
873                 err = PTR_ERR(ipcp);
874                 goto out_unlock1;
875         }
876
877         shp = container_of(ipcp, struct shmid_kernel, shm_perm);
878
879         err = security_shm_shmctl(&shp->shm_perm, cmd);
880         if (err)
881                 goto out_unlock1;
882
883         switch (cmd) {
884         case IPC_RMID:
885                 ipc_lock_object(&shp->shm_perm);
886                 /* do_shm_rmid unlocks the ipc object and rcu */
887                 do_shm_rmid(ns, ipcp);
888                 goto out_up;
889         case IPC_SET:
890                 ipc_lock_object(&shp->shm_perm);
891                 err = ipc_update_perm(&shmid64->shm_perm, ipcp);
892                 if (err)
893                         goto out_unlock0;
894                 shp->shm_ctim = ktime_get_real_seconds();
895                 break;
896         default:
897                 err = -EINVAL;
898                 goto out_unlock1;
899         }
900
901 out_unlock0:
902         ipc_unlock_object(&shp->shm_perm);
903 out_unlock1:
904         rcu_read_unlock();
905 out_up:
906         up_write(&shm_ids(ns).rwsem);
907         return err;
908 }
909
910 static int shmctl_ipc_info(struct ipc_namespace *ns,
911                            struct shminfo64 *shminfo)
912 {
913         int err = security_shm_shmctl(NULL, IPC_INFO);
914         if (!err) {
915                 memset(shminfo, 0, sizeof(*shminfo));
916                 shminfo->shmmni = shminfo->shmseg = ns->shm_ctlmni;
917                 shminfo->shmmax = ns->shm_ctlmax;
918                 shminfo->shmall = ns->shm_ctlall;
919                 shminfo->shmmin = SHMMIN;
920                 down_read(&shm_ids(ns).rwsem);
921                 err = ipc_get_maxid(&shm_ids(ns));
922                 up_read(&shm_ids(ns).rwsem);
923                 if (err < 0)
924                         err = 0;
925         }
926         return err;
927 }
928
929 static int shmctl_shm_info(struct ipc_namespace *ns,
930                            struct shm_info *shm_info)
931 {
932         int err = security_shm_shmctl(NULL, SHM_INFO);
933         if (!err) {
934                 memset(shm_info, 0, sizeof(*shm_info));
935                 down_read(&shm_ids(ns).rwsem);
936                 shm_info->used_ids = shm_ids(ns).in_use;
937                 shm_get_stat(ns, &shm_info->shm_rss, &shm_info->shm_swp);
938                 shm_info->shm_tot = ns->shm_tot;
939                 shm_info->swap_attempts = 0;
940                 shm_info->swap_successes = 0;
941                 err = ipc_get_maxid(&shm_ids(ns));
942                 up_read(&shm_ids(ns).rwsem);
943                 if (err < 0)
944                         err = 0;
945         }
946         return err;
947 }
948
949 static int shmctl_stat(struct ipc_namespace *ns, int shmid,
950                         int cmd, struct shmid64_ds *tbuf)
951 {
952         struct shmid_kernel *shp;
953         int id = 0;
954         int err;
955
956         memset(tbuf, 0, sizeof(*tbuf));
957
958         rcu_read_lock();
959         if (cmd == SHM_STAT || cmd == SHM_STAT_ANY) {
960                 shp = shm_obtain_object(ns, shmid);
961                 if (IS_ERR(shp)) {
962                         err = PTR_ERR(shp);
963                         goto out_unlock;
964                 }
965                 id = shp->shm_perm.id;
966         } else { /* IPC_STAT */
967                 shp = shm_obtain_object_check(ns, shmid);
968                 if (IS_ERR(shp)) {
969                         err = PTR_ERR(shp);
970                         goto out_unlock;
971                 }
972         }
973
974         /*
975          * Semantically SHM_STAT_ANY ought to be identical to
976          * that functionality provided by the /proc/sysvipc/
977          * interface. As such, only audit these calls and
978          * do not do traditional S_IRUGO permission checks on
979          * the ipc object.
980          */
981         if (cmd == SHM_STAT_ANY)
982                 audit_ipc_obj(&shp->shm_perm);
983         else {
984                 err = -EACCES;
985                 if (ipcperms(ns, &shp->shm_perm, S_IRUGO))
986                         goto out_unlock;
987         }
988
989         err = security_shm_shmctl(&shp->shm_perm, cmd);
990         if (err)
991                 goto out_unlock;
992
993         ipc_lock_object(&shp->shm_perm);
994
995         if (!ipc_valid_object(&shp->shm_perm)) {
996                 ipc_unlock_object(&shp->shm_perm);
997                 err = -EIDRM;
998                 goto out_unlock;
999         }
1000
1001         kernel_to_ipc64_perm(&shp->shm_perm, &tbuf->shm_perm);
1002         tbuf->shm_segsz = shp->shm_segsz;
1003         tbuf->shm_atime = shp->shm_atim;
1004         tbuf->shm_dtime = shp->shm_dtim;
1005         tbuf->shm_ctime = shp->shm_ctim;
1006 #ifndef CONFIG_64BIT
1007         tbuf->shm_atime_high = shp->shm_atim >> 32;
1008         tbuf->shm_dtime_high = shp->shm_dtim >> 32;
1009         tbuf->shm_ctime_high = shp->shm_ctim >> 32;
1010 #endif
1011         tbuf->shm_cpid  = pid_vnr(shp->shm_cprid);
1012         tbuf->shm_lpid  = pid_vnr(shp->shm_lprid);
1013         tbuf->shm_nattch = shp->shm_nattch;
1014
1015         ipc_unlock_object(&shp->shm_perm);
1016         rcu_read_unlock();
1017         return id;
1018
1019 out_unlock:
1020         rcu_read_unlock();
1021         return err;
1022 }
1023
1024 static int shmctl_do_lock(struct ipc_namespace *ns, int shmid, int cmd)
1025 {
1026         struct shmid_kernel *shp;
1027         struct file *shm_file;
1028         int err;
1029
1030         rcu_read_lock();
1031         shp = shm_obtain_object_check(ns, shmid);
1032         if (IS_ERR(shp)) {
1033                 err = PTR_ERR(shp);
1034                 goto out_unlock1;
1035         }
1036
1037         audit_ipc_obj(&(shp->shm_perm));
1038         err = security_shm_shmctl(&shp->shm_perm, cmd);
1039         if (err)
1040                 goto out_unlock1;
1041
1042         ipc_lock_object(&shp->shm_perm);
1043
1044         /* check if shm_destroy() is tearing down shp */
1045         if (!ipc_valid_object(&shp->shm_perm)) {
1046                 err = -EIDRM;
1047                 goto out_unlock0;
1048         }
1049
1050         if (!ns_capable(ns->user_ns, CAP_IPC_LOCK)) {
1051                 kuid_t euid = current_euid();
1052
1053                 if (!uid_eq(euid, shp->shm_perm.uid) &&
1054                     !uid_eq(euid, shp->shm_perm.cuid)) {
1055                         err = -EPERM;
1056                         goto out_unlock0;
1057                 }
1058                 if (cmd == SHM_LOCK && !rlimit(RLIMIT_MEMLOCK)) {
1059                         err = -EPERM;
1060                         goto out_unlock0;
1061                 }
1062         }
1063
1064         shm_file = shp->shm_file;
1065         if (is_file_hugepages(shm_file))
1066                 goto out_unlock0;
1067
1068         if (cmd == SHM_LOCK) {
1069                 struct user_struct *user = current_user();
1070
1071                 err = shmem_lock(shm_file, 1, user);
1072                 if (!err && !(shp->shm_perm.mode & SHM_LOCKED)) {
1073                         shp->shm_perm.mode |= SHM_LOCKED;
1074                         shp->mlock_user = user;
1075                 }
1076                 goto out_unlock0;
1077         }
1078
1079         /* SHM_UNLOCK */
1080         if (!(shp->shm_perm.mode & SHM_LOCKED))
1081                 goto out_unlock0;
1082         shmem_lock(shm_file, 0, shp->mlock_user);
1083         shp->shm_perm.mode &= ~SHM_LOCKED;
1084         shp->mlock_user = NULL;
1085         get_file(shm_file);
1086         ipc_unlock_object(&shp->shm_perm);
1087         rcu_read_unlock();
1088         shmem_unlock_mapping(shm_file->f_mapping);
1089
1090         fput(shm_file);
1091         return err;
1092
1093 out_unlock0:
1094         ipc_unlock_object(&shp->shm_perm);
1095 out_unlock1:
1096         rcu_read_unlock();
1097         return err;
1098 }
1099
1100 long ksys_shmctl(int shmid, int cmd, struct shmid_ds __user *buf)
1101 {
1102         int err, version;
1103         struct ipc_namespace *ns;
1104         struct shmid64_ds sem64;
1105
1106         if (cmd < 0 || shmid < 0)
1107                 return -EINVAL;
1108
1109         version = ipc_parse_version(&cmd);
1110         ns = current->nsproxy->ipc_ns;
1111
1112         switch (cmd) {
1113         case IPC_INFO: {
1114                 struct shminfo64 shminfo;
1115                 err = shmctl_ipc_info(ns, &shminfo);
1116                 if (err < 0)
1117                         return err;
1118                 if (copy_shminfo_to_user(buf, &shminfo, version))
1119                         err = -EFAULT;
1120                 return err;
1121         }
1122         case SHM_INFO: {
1123                 struct shm_info shm_info;
1124                 err = shmctl_shm_info(ns, &shm_info);
1125                 if (err < 0)
1126                         return err;
1127                 if (copy_to_user(buf, &shm_info, sizeof(shm_info)))
1128                         err = -EFAULT;
1129                 return err;
1130         }
1131         case SHM_STAT:
1132         case SHM_STAT_ANY:
1133         case IPC_STAT: {
1134                 err = shmctl_stat(ns, shmid, cmd, &sem64);
1135                 if (err < 0)
1136                         return err;
1137                 if (copy_shmid_to_user(buf, &sem64, version))
1138                         err = -EFAULT;
1139                 return err;
1140         }
1141         case IPC_SET:
1142                 if (copy_shmid_from_user(&sem64, buf, version))
1143                         return -EFAULT;
1144                 /* fallthru */
1145         case IPC_RMID:
1146                 return shmctl_down(ns, shmid, cmd, &sem64);
1147         case SHM_LOCK:
1148         case SHM_UNLOCK:
1149                 return shmctl_do_lock(ns, shmid, cmd);
1150         default:
1151                 return -EINVAL;
1152         }
1153 }
1154
1155 SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf)
1156 {
1157         return ksys_shmctl(shmid, cmd, buf);
1158 }
1159
1160 #ifdef CONFIG_COMPAT
1161
1162 struct compat_shmid_ds {
1163         struct compat_ipc_perm shm_perm;
1164         int shm_segsz;
1165         compat_time_t shm_atime;
1166         compat_time_t shm_dtime;
1167         compat_time_t shm_ctime;
1168         compat_ipc_pid_t shm_cpid;
1169         compat_ipc_pid_t shm_lpid;
1170         unsigned short shm_nattch;
1171         unsigned short shm_unused;
1172         compat_uptr_t shm_unused2;
1173         compat_uptr_t shm_unused3;
1174 };
1175
1176 struct compat_shminfo64 {
1177         compat_ulong_t shmmax;
1178         compat_ulong_t shmmin;
1179         compat_ulong_t shmmni;
1180         compat_ulong_t shmseg;
1181         compat_ulong_t shmall;
1182         compat_ulong_t __unused1;
1183         compat_ulong_t __unused2;
1184         compat_ulong_t __unused3;
1185         compat_ulong_t __unused4;
1186 };
1187
1188 struct compat_shm_info {
1189         compat_int_t used_ids;
1190         compat_ulong_t shm_tot, shm_rss, shm_swp;
1191         compat_ulong_t swap_attempts, swap_successes;
1192 };
1193
1194 static int copy_compat_shminfo_to_user(void __user *buf, struct shminfo64 *in,
1195                                         int version)
1196 {
1197         if (in->shmmax > INT_MAX)
1198                 in->shmmax = INT_MAX;
1199         if (version == IPC_64) {
1200                 struct compat_shminfo64 info;
1201                 memset(&info, 0, sizeof(info));
1202                 info.shmmax = in->shmmax;
1203                 info.shmmin = in->shmmin;
1204                 info.shmmni = in->shmmni;
1205                 info.shmseg = in->shmseg;
1206                 info.shmall = in->shmall;
1207                 return copy_to_user(buf, &info, sizeof(info));
1208         } else {
1209                 struct shminfo info;
1210                 memset(&info, 0, sizeof(info));
1211                 info.shmmax = in->shmmax;
1212                 info.shmmin = in->shmmin;
1213                 info.shmmni = in->shmmni;
1214                 info.shmseg = in->shmseg;
1215                 info.shmall = in->shmall;
1216                 return copy_to_user(buf, &info, sizeof(info));
1217         }
1218 }
1219
1220 static int put_compat_shm_info(struct shm_info *ip,
1221                                 struct compat_shm_info __user *uip)
1222 {
1223         struct compat_shm_info info;
1224
1225         memset(&info, 0, sizeof(info));
1226         info.used_ids = ip->used_ids;
1227         info.shm_tot = ip->shm_tot;
1228         info.shm_rss = ip->shm_rss;
1229         info.shm_swp = ip->shm_swp;
1230         info.swap_attempts = ip->swap_attempts;
1231         info.swap_successes = ip->swap_successes;
1232         return copy_to_user(uip, &info, sizeof(info));
1233 }
1234
1235 static int copy_compat_shmid_to_user(void __user *buf, struct shmid64_ds *in,
1236                                         int version)
1237 {
1238         if (version == IPC_64) {
1239                 struct compat_shmid64_ds v;
1240                 memset(&v, 0, sizeof(v));
1241                 to_compat_ipc64_perm(&v.shm_perm, &in->shm_perm);
1242                 v.shm_atime      = lower_32_bits(in->shm_atime);
1243                 v.shm_atime_high = upper_32_bits(in->shm_atime);
1244                 v.shm_dtime      = lower_32_bits(in->shm_dtime);
1245                 v.shm_dtime_high = upper_32_bits(in->shm_dtime);
1246                 v.shm_ctime      = lower_32_bits(in->shm_ctime);
1247                 v.shm_ctime_high = upper_32_bits(in->shm_ctime);
1248                 v.shm_segsz = in->shm_segsz;
1249                 v.shm_nattch = in->shm_nattch;
1250                 v.shm_cpid = in->shm_cpid;
1251                 v.shm_lpid = in->shm_lpid;
1252                 return copy_to_user(buf, &v, sizeof(v));
1253         } else {
1254                 struct compat_shmid_ds v;
1255                 memset(&v, 0, sizeof(v));
1256                 to_compat_ipc_perm(&v.shm_perm, &in->shm_perm);
1257                 v.shm_perm.key = in->shm_perm.key;
1258                 v.shm_atime = in->shm_atime;
1259                 v.shm_dtime = in->shm_dtime;
1260                 v.shm_ctime = in->shm_ctime;
1261                 v.shm_segsz = in->shm_segsz;
1262                 v.shm_nattch = in->shm_nattch;
1263                 v.shm_cpid = in->shm_cpid;
1264                 v.shm_lpid = in->shm_lpid;
1265                 return copy_to_user(buf, &v, sizeof(v));
1266         }
1267 }
1268
1269 static int copy_compat_shmid_from_user(struct shmid64_ds *out, void __user *buf,
1270                                         int version)
1271 {
1272         memset(out, 0, sizeof(*out));
1273         if (version == IPC_64) {
1274                 struct compat_shmid64_ds __user *p = buf;
1275                 return get_compat_ipc64_perm(&out->shm_perm, &p->shm_perm);
1276         } else {
1277                 struct compat_shmid_ds __user *p = buf;
1278                 return get_compat_ipc_perm(&out->shm_perm, &p->shm_perm);
1279         }
1280 }
1281
1282 long compat_ksys_shmctl(int shmid, int cmd, void __user *uptr)
1283 {
1284         struct ipc_namespace *ns;
1285         struct shmid64_ds sem64;
1286         int version = compat_ipc_parse_version(&cmd);
1287         int err;
1288
1289         ns = current->nsproxy->ipc_ns;
1290
1291         if (cmd < 0 || shmid < 0)
1292                 return -EINVAL;
1293
1294         switch (cmd) {
1295         case IPC_INFO: {
1296                 struct shminfo64 shminfo;
1297                 err = shmctl_ipc_info(ns, &shminfo);
1298                 if (err < 0)
1299                         return err;
1300                 if (copy_compat_shminfo_to_user(uptr, &shminfo, version))
1301                         err = -EFAULT;
1302                 return err;
1303         }
1304         case SHM_INFO: {
1305                 struct shm_info shm_info;
1306                 err = shmctl_shm_info(ns, &shm_info);
1307                 if (err < 0)
1308                         return err;
1309                 if (put_compat_shm_info(&shm_info, uptr))
1310                         err = -EFAULT;
1311                 return err;
1312         }
1313         case IPC_STAT:
1314         case SHM_STAT_ANY:
1315         case SHM_STAT:
1316                 err = shmctl_stat(ns, shmid, cmd, &sem64);
1317                 if (err < 0)
1318                         return err;
1319                 if (copy_compat_shmid_to_user(uptr, &sem64, version))
1320                         err = -EFAULT;
1321                 return err;
1322
1323         case IPC_SET:
1324                 if (copy_compat_shmid_from_user(&sem64, uptr, version))
1325                         return -EFAULT;
1326                 /* fallthru */
1327         case IPC_RMID:
1328                 return shmctl_down(ns, shmid, cmd, &sem64);
1329         case SHM_LOCK:
1330         case SHM_UNLOCK:
1331                 return shmctl_do_lock(ns, shmid, cmd);
1332                 break;
1333         default:
1334                 return -EINVAL;
1335         }
1336         return err;
1337 }
1338
1339 COMPAT_SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, void __user *, uptr)
1340 {
1341         return compat_ksys_shmctl(shmid, cmd, uptr);
1342 }
1343 #endif
1344
1345 /*
1346  * Fix shmaddr, allocate descriptor, map shm, add attach descriptor to lists.
1347  *
1348  * NOTE! Despite the name, this is NOT a direct system call entrypoint. The
1349  * "raddr" thing points to kernel space, and there has to be a wrapper around
1350  * this.
1351  */
1352 long do_shmat(int shmid, char __user *shmaddr, int shmflg,
1353               ulong *raddr, unsigned long shmlba)
1354 {
1355         struct shmid_kernel *shp;
1356         unsigned long addr = (unsigned long)shmaddr;
1357         unsigned long size;
1358         struct file *file;
1359         int    err;
1360         unsigned long flags = MAP_SHARED;
1361         unsigned long prot;
1362         int acc_mode;
1363         struct ipc_namespace *ns;
1364         struct shm_file_data *sfd;
1365         struct path path;
1366         fmode_t f_mode;
1367         unsigned long populate = 0;
1368
1369         err = -EINVAL;
1370         if (shmid < 0)
1371                 goto out;
1372
1373         if (addr) {
1374                 if (addr & (shmlba - 1)) {
1375                         if (shmflg & SHM_RND) {
1376                                 addr &= ~(shmlba - 1);  /* round down */
1377
1378                                 /*
1379                                  * Ensure that the round-down is non-nil
1380                                  * when remapping. This can happen for
1381                                  * cases when addr < shmlba.
1382                                  */
1383                                 if (!addr && (shmflg & SHM_REMAP))
1384                                         goto out;
1385                         } else
1386 #ifndef __ARCH_FORCE_SHMLBA
1387                                 if (addr & ~PAGE_MASK)
1388 #endif
1389                                         goto out;
1390                 }
1391
1392                 flags |= MAP_FIXED;
1393         } else if ((shmflg & SHM_REMAP))
1394                 goto out;
1395
1396         if (shmflg & SHM_RDONLY) {
1397                 prot = PROT_READ;
1398                 acc_mode = S_IRUGO;
1399                 f_mode = FMODE_READ;
1400         } else {
1401                 prot = PROT_READ | PROT_WRITE;
1402                 acc_mode = S_IRUGO | S_IWUGO;
1403                 f_mode = FMODE_READ | FMODE_WRITE;
1404         }
1405         if (shmflg & SHM_EXEC) {
1406                 prot |= PROT_EXEC;
1407                 acc_mode |= S_IXUGO;
1408         }
1409
1410         /*
1411          * We cannot rely on the fs check since SYSV IPC does have an
1412          * additional creator id...
1413          */
1414         ns = current->nsproxy->ipc_ns;
1415         rcu_read_lock();
1416         shp = shm_obtain_object_check(ns, shmid);
1417         if (IS_ERR(shp)) {
1418                 err = PTR_ERR(shp);
1419                 goto out_unlock;
1420         }
1421
1422         err = -EACCES;
1423         if (ipcperms(ns, &shp->shm_perm, acc_mode))
1424                 goto out_unlock;
1425
1426         err = security_shm_shmat(&shp->shm_perm, shmaddr, shmflg);
1427         if (err)
1428                 goto out_unlock;
1429
1430         ipc_lock_object(&shp->shm_perm);
1431
1432         /* check if shm_destroy() is tearing down shp */
1433         if (!ipc_valid_object(&shp->shm_perm)) {
1434                 ipc_unlock_object(&shp->shm_perm);
1435                 err = -EIDRM;
1436                 goto out_unlock;
1437         }
1438
1439         path = shp->shm_file->f_path;
1440         path_get(&path);
1441         shp->shm_nattch++;
1442         size = i_size_read(d_inode(path.dentry));
1443         ipc_unlock_object(&shp->shm_perm);
1444         rcu_read_unlock();
1445
1446         err = -ENOMEM;
1447         sfd = kzalloc(sizeof(*sfd), GFP_KERNEL);
1448         if (!sfd) {
1449                 path_put(&path);
1450                 goto out_nattch;
1451         }
1452
1453         file = alloc_file(&path, f_mode,
1454                           is_file_hugepages(shp->shm_file) ?
1455                                 &shm_file_operations_huge :
1456                                 &shm_file_operations);
1457         err = PTR_ERR(file);
1458         if (IS_ERR(file)) {
1459                 kfree(sfd);
1460                 path_put(&path);
1461                 goto out_nattch;
1462         }
1463
1464         file->private_data = sfd;
1465         file->f_mapping = shp->shm_file->f_mapping;
1466         sfd->id = shp->shm_perm.id;
1467         sfd->ns = get_ipc_ns(ns);
1468         /*
1469          * We need to take a reference to the real shm file to prevent the
1470          * pointer from becoming stale in cases where the lifetime of the outer
1471          * file extends beyond that of the shm segment.  It's not usually
1472          * possible, but it can happen during remap_file_pages() emulation as
1473          * that unmaps the memory, then does ->mmap() via file reference only.
1474          * We'll deny the ->mmap() if the shm segment was since removed, but to
1475          * detect shm ID reuse we need to compare the file pointers.
1476          */
1477         sfd->file = get_file(shp->shm_file);
1478         sfd->vm_ops = NULL;
1479
1480         err = security_mmap_file(file, prot, flags);
1481         if (err)
1482                 goto out_fput;
1483
1484         if (down_write_killable(&current->mm->mmap_sem)) {
1485                 err = -EINTR;
1486                 goto out_fput;
1487         }
1488
1489         if (addr && !(shmflg & SHM_REMAP)) {
1490                 err = -EINVAL;
1491                 if (addr + size < addr)
1492                         goto invalid;
1493
1494                 if (find_vma_intersection(current->mm, addr, addr + size))
1495                         goto invalid;
1496         }
1497
1498         addr = do_mmap_pgoff(file, addr, size, prot, flags, 0, &populate, NULL);
1499         *raddr = addr;
1500         err = 0;
1501         if (IS_ERR_VALUE(addr))
1502                 err = (long)addr;
1503 invalid:
1504         up_write(&current->mm->mmap_sem);
1505         if (populate)
1506                 mm_populate(addr, populate);
1507
1508 out_fput:
1509         fput(file);
1510
1511 out_nattch:
1512         down_write(&shm_ids(ns).rwsem);
1513         shp = shm_lock(ns, shmid);
1514         shp->shm_nattch--;
1515         if (shm_may_destroy(ns, shp))
1516                 shm_destroy(ns, shp);
1517         else
1518                 shm_unlock(shp);
1519         up_write(&shm_ids(ns).rwsem);
1520         return err;
1521
1522 out_unlock:
1523         rcu_read_unlock();
1524 out:
1525         return err;
1526 }
1527
1528 SYSCALL_DEFINE3(shmat, int, shmid, char __user *, shmaddr, int, shmflg)
1529 {
1530         unsigned long ret;
1531         long err;
1532
1533         err = do_shmat(shmid, shmaddr, shmflg, &ret, SHMLBA);
1534         if (err)
1535                 return err;
1536         force_successful_syscall_return();
1537         return (long)ret;
1538 }
1539
1540 #ifdef CONFIG_COMPAT
1541
1542 #ifndef COMPAT_SHMLBA
1543 #define COMPAT_SHMLBA   SHMLBA
1544 #endif
1545
1546 COMPAT_SYSCALL_DEFINE3(shmat, int, shmid, compat_uptr_t, shmaddr, int, shmflg)
1547 {
1548         unsigned long ret;
1549         long err;
1550
1551         err = do_shmat(shmid, compat_ptr(shmaddr), shmflg, &ret, COMPAT_SHMLBA);
1552         if (err)
1553                 return err;
1554         force_successful_syscall_return();
1555         return (long)ret;
1556 }
1557 #endif
1558
1559 /*
1560  * detach and kill segment if marked destroyed.
1561  * The work is done in shm_close.
1562  */
1563 long ksys_shmdt(char __user *shmaddr)
1564 {
1565         struct mm_struct *mm = current->mm;
1566         struct vm_area_struct *vma;
1567         unsigned long addr = (unsigned long)shmaddr;
1568         int retval = -EINVAL;
1569 #ifdef CONFIG_MMU
1570         loff_t size = 0;
1571         struct file *file;
1572         struct vm_area_struct *next;
1573 #endif
1574
1575         if (addr & ~PAGE_MASK)
1576                 return retval;
1577
1578         if (down_write_killable(&mm->mmap_sem))
1579                 return -EINTR;
1580
1581         /*
1582          * This function tries to be smart and unmap shm segments that
1583          * were modified by partial mlock or munmap calls:
1584          * - It first determines the size of the shm segment that should be
1585          *   unmapped: It searches for a vma that is backed by shm and that
1586          *   started at address shmaddr. It records it's size and then unmaps
1587          *   it.
1588          * - Then it unmaps all shm vmas that started at shmaddr and that
1589          *   are within the initially determined size and that are from the
1590          *   same shm segment from which we determined the size.
1591          * Errors from do_munmap are ignored: the function only fails if
1592          * it's called with invalid parameters or if it's called to unmap
1593          * a part of a vma. Both calls in this function are for full vmas,
1594          * the parameters are directly copied from the vma itself and always
1595          * valid - therefore do_munmap cannot fail. (famous last words?)
1596          */
1597         /*
1598          * If it had been mremap()'d, the starting address would not
1599          * match the usual checks anyway. So assume all vma's are
1600          * above the starting address given.
1601          */
1602         vma = find_vma(mm, addr);
1603
1604 #ifdef CONFIG_MMU
1605         while (vma) {
1606                 next = vma->vm_next;
1607
1608                 /*
1609                  * Check if the starting address would match, i.e. it's
1610                  * a fragment created by mprotect() and/or munmap(), or it
1611                  * otherwise it starts at this address with no hassles.
1612                  */
1613                 if ((vma->vm_ops == &shm_vm_ops) &&
1614                         (vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff) {
1615
1616                         /*
1617                          * Record the file of the shm segment being
1618                          * unmapped.  With mremap(), someone could place
1619                          * page from another segment but with equal offsets
1620                          * in the range we are unmapping.
1621                          */
1622                         file = vma->vm_file;
1623                         size = i_size_read(file_inode(vma->vm_file));
1624                         do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start, NULL);
1625                         /*
1626                          * We discovered the size of the shm segment, so
1627                          * break out of here and fall through to the next
1628                          * loop that uses the size information to stop
1629                          * searching for matching vma's.
1630                          */
1631                         retval = 0;
1632                         vma = next;
1633                         break;
1634                 }
1635                 vma = next;
1636         }
1637
1638         /*
1639          * We need look no further than the maximum address a fragment
1640          * could possibly have landed at. Also cast things to loff_t to
1641          * prevent overflows and make comparisons vs. equal-width types.
1642          */
1643         size = PAGE_ALIGN(size);
1644         while (vma && (loff_t)(vma->vm_end - addr) <= size) {
1645                 next = vma->vm_next;
1646
1647                 /* finding a matching vma now does not alter retval */
1648                 if ((vma->vm_ops == &shm_vm_ops) &&
1649                     ((vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff) &&
1650                     (vma->vm_file == file))
1651                         do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start, NULL);
1652                 vma = next;
1653         }
1654
1655 #else   /* CONFIG_MMU */
1656         /* under NOMMU conditions, the exact address to be destroyed must be
1657          * given
1658          */
1659         if (vma && vma->vm_start == addr && vma->vm_ops == &shm_vm_ops) {
1660                 do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start, NULL);
1661                 retval = 0;
1662         }
1663
1664 #endif
1665
1666         up_write(&mm->mmap_sem);
1667         return retval;
1668 }
1669
1670 SYSCALL_DEFINE1(shmdt, char __user *, shmaddr)
1671 {
1672         return ksys_shmdt(shmaddr);
1673 }
1674
1675 #ifdef CONFIG_PROC_FS
1676 static int sysvipc_shm_proc_show(struct seq_file *s, void *it)
1677 {
1678         struct pid_namespace *pid_ns = ipc_seq_pid_ns(s);
1679         struct user_namespace *user_ns = seq_user_ns(s);
1680         struct kern_ipc_perm *ipcp = it;
1681         struct shmid_kernel *shp;
1682         unsigned long rss = 0, swp = 0;
1683
1684         shp = container_of(ipcp, struct shmid_kernel, shm_perm);
1685         shm_add_rss_swap(shp, &rss, &swp);
1686
1687 #if BITS_PER_LONG <= 32
1688 #define SIZE_SPEC "%10lu"
1689 #else
1690 #define SIZE_SPEC "%21lu"
1691 #endif
1692
1693         seq_printf(s,
1694                    "%10d %10d  %4o " SIZE_SPEC " %5u %5u  "
1695                    "%5lu %5u %5u %5u %5u %10llu %10llu %10llu "
1696                    SIZE_SPEC " " SIZE_SPEC "\n",
1697                    shp->shm_perm.key,
1698                    shp->shm_perm.id,
1699                    shp->shm_perm.mode,
1700                    shp->shm_segsz,
1701                    pid_nr_ns(shp->shm_cprid, pid_ns),
1702                    pid_nr_ns(shp->shm_lprid, pid_ns),
1703                    shp->shm_nattch,
1704                    from_kuid_munged(user_ns, shp->shm_perm.uid),
1705                    from_kgid_munged(user_ns, shp->shm_perm.gid),
1706                    from_kuid_munged(user_ns, shp->shm_perm.cuid),
1707                    from_kgid_munged(user_ns, shp->shm_perm.cgid),
1708                    shp->shm_atim,
1709                    shp->shm_dtim,
1710                    shp->shm_ctim,
1711                    rss * PAGE_SIZE,
1712                    swp * PAGE_SIZE);
1713
1714         return 0;
1715 }
1716 #endif