]> asedeno.scripts.mit.edu Git - linux.git/blob - ipc/shm.c
a413ddf74daca26b2d22e1f81882cb07097d1ce8
[linux.git] / ipc / shm.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * linux/ipc/shm.c
4  * Copyright (C) 1992, 1993 Krishna Balasubramanian
5  *       Many improvements/fixes by Bruno Haible.
6  * Replaced `struct shm_desc' by `struct vm_area_struct', July 1994.
7  * Fixed the shm swap deallocation (shm_unuse()), August 1998 Andrea Arcangeli.
8  *
9  * /proc/sysvipc/shm support (c) 1999 Dragos Acostachioaie <dragos@iname.com>
10  * BIGMEM support, Andrea Arcangeli <andrea@suse.de>
11  * SMP thread shm, Jean-Luc Boyard <jean-luc.boyard@siemens.fr>
12  * HIGHMEM support, Ingo Molnar <mingo@redhat.com>
13  * Make shmmax, shmall, shmmni sysctl'able, Christoph Rohland <cr@sap.com>
14  * Shared /dev/zero support, Kanoj Sarcar <kanoj@sgi.com>
15  * Move the mm functionality over to mm/shmem.c, Christoph Rohland <cr@sap.com>
16  *
17  * support for audit of ipc object properties and permission changes
18  * Dustin Kirkland <dustin.kirkland@us.ibm.com>
19  *
20  * namespaces support
21  * OpenVZ, SWsoft Inc.
22  * Pavel Emelianov <xemul@openvz.org>
23  *
24  * Better ipc lock (kern_ipc_perm.lock) handling
25  * Davidlohr Bueso <davidlohr.bueso@hp.com>, June 2013.
26  */
27
28 #include <linux/slab.h>
29 #include <linux/mm.h>
30 #include <linux/hugetlb.h>
31 #include <linux/shm.h>
32 #include <linux/init.h>
33 #include <linux/file.h>
34 #include <linux/mman.h>
35 #include <linux/shmem_fs.h>
36 #include <linux/security.h>
37 #include <linux/syscalls.h>
38 #include <linux/audit.h>
39 #include <linux/capability.h>
40 #include <linux/ptrace.h>
41 #include <linux/seq_file.h>
42 #include <linux/rwsem.h>
43 #include <linux/nsproxy.h>
44 #include <linux/mount.h>
45 #include <linux/ipc_namespace.h>
46 #include <linux/rhashtable.h>
47
48 #include <linux/uaccess.h>
49
50 #include "util.h"
51
52 struct shmid_kernel /* private to the kernel */
53 {
54         struct kern_ipc_perm    shm_perm;
55         struct file             *shm_file;
56         unsigned long           shm_nattch;
57         unsigned long           shm_segsz;
58         time64_t                shm_atim;
59         time64_t                shm_dtim;
60         time64_t                shm_ctim;
61         struct pid              *shm_cprid;
62         struct pid              *shm_lprid;
63         struct user_struct      *mlock_user;
64
65         /* The task created the shm object.  NULL if the task is dead. */
66         struct task_struct      *shm_creator;
67         struct list_head        shm_clist;      /* list by creator */
68 } __randomize_layout;
69
70 /* shm_mode upper byte flags */
71 #define SHM_DEST        01000   /* segment will be destroyed on last detach */
72 #define SHM_LOCKED      02000   /* segment will not be swapped */
73
74 struct shm_file_data {
75         int id;
76         struct ipc_namespace *ns;
77         struct file *file;
78         const struct vm_operations_struct *vm_ops;
79 };
80
81 #define shm_file_data(file) (*((struct shm_file_data **)&(file)->private_data))
82
83 static const struct file_operations shm_file_operations;
84 static const struct vm_operations_struct shm_vm_ops;
85
86 #define shm_ids(ns)     ((ns)->ids[IPC_SHM_IDS])
87
88 #define shm_unlock(shp)                 \
89         ipc_unlock(&(shp)->shm_perm)
90
91 static int newseg(struct ipc_namespace *, struct ipc_params *);
92 static void shm_open(struct vm_area_struct *vma);
93 static void shm_close(struct vm_area_struct *vma);
94 static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp);
95 #ifdef CONFIG_PROC_FS
96 static int sysvipc_shm_proc_show(struct seq_file *s, void *it);
97 #endif
98
99 int shm_init_ns(struct ipc_namespace *ns)
100 {
101         ns->shm_ctlmax = SHMMAX;
102         ns->shm_ctlall = SHMALL;
103         ns->shm_ctlmni = SHMMNI;
104         ns->shm_rmid_forced = 0;
105         ns->shm_tot = 0;
106         return ipc_init_ids(&shm_ids(ns));
107 }
108
109 /*
110  * Called with shm_ids.rwsem (writer) and the shp structure locked.
111  * Only shm_ids.rwsem remains locked on exit.
112  */
113 static void do_shm_rmid(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
114 {
115         struct shmid_kernel *shp;
116
117         shp = container_of(ipcp, struct shmid_kernel, shm_perm);
118
119         if (shp->shm_nattch) {
120                 shp->shm_perm.mode |= SHM_DEST;
121                 /* Do not find it any more */
122                 ipc_set_key_private(&shm_ids(ns), &shp->shm_perm);
123                 shm_unlock(shp);
124         } else
125                 shm_destroy(ns, shp);
126 }
127
128 #ifdef CONFIG_IPC_NS
129 void shm_exit_ns(struct ipc_namespace *ns)
130 {
131         free_ipcs(ns, &shm_ids(ns), do_shm_rmid);
132         idr_destroy(&ns->ids[IPC_SHM_IDS].ipcs_idr);
133         rhashtable_destroy(&ns->ids[IPC_SHM_IDS].key_ht);
134 }
135 #endif
136
137 static int __init ipc_ns_init(void)
138 {
139         const int err = shm_init_ns(&init_ipc_ns);
140         WARN(err, "ipc: sysv shm_init_ns failed: %d\n", err);
141         return err;
142 }
143
144 pure_initcall(ipc_ns_init);
145
146 void __init shm_init(void)
147 {
148         ipc_init_proc_interface("sysvipc/shm",
149 #if BITS_PER_LONG <= 32
150                                 "       key      shmid perms       size  cpid  lpid nattch   uid   gid  cuid  cgid      atime      dtime      ctime        rss       swap\n",
151 #else
152                                 "       key      shmid perms                  size  cpid  lpid nattch   uid   gid  cuid  cgid      atime      dtime      ctime                   rss                  swap\n",
153 #endif
154                                 IPC_SHM_IDS, sysvipc_shm_proc_show);
155 }
156
157 static inline struct shmid_kernel *shm_obtain_object(struct ipc_namespace *ns, int id)
158 {
159         struct kern_ipc_perm *ipcp = ipc_obtain_object_idr(&shm_ids(ns), id);
160
161         if (IS_ERR(ipcp))
162                 return ERR_CAST(ipcp);
163
164         return container_of(ipcp, struct shmid_kernel, shm_perm);
165 }
166
167 static inline struct shmid_kernel *shm_obtain_object_check(struct ipc_namespace *ns, int id)
168 {
169         struct kern_ipc_perm *ipcp = ipc_obtain_object_check(&shm_ids(ns), id);
170
171         if (IS_ERR(ipcp))
172                 return ERR_CAST(ipcp);
173
174         return container_of(ipcp, struct shmid_kernel, shm_perm);
175 }
176
177 /*
178  * shm_lock_(check_) routines are called in the paths where the rwsem
179  * is not necessarily held.
180  */
181 static inline struct shmid_kernel *shm_lock(struct ipc_namespace *ns, int id)
182 {
183         struct kern_ipc_perm *ipcp = ipc_lock(&shm_ids(ns), id);
184
185         /*
186          * Callers of shm_lock() must validate the status of the returned ipc
187          * object pointer (as returned by ipc_lock()), and error out as
188          * appropriate.
189          */
190         if (IS_ERR(ipcp))
191                 return (void *)ipcp;
192         return container_of(ipcp, struct shmid_kernel, shm_perm);
193 }
194
195 static inline void shm_lock_by_ptr(struct shmid_kernel *ipcp)
196 {
197         rcu_read_lock();
198         ipc_lock_object(&ipcp->shm_perm);
199 }
200
201 static void shm_rcu_free(struct rcu_head *head)
202 {
203         struct kern_ipc_perm *ptr = container_of(head, struct kern_ipc_perm,
204                                                         rcu);
205         struct shmid_kernel *shp = container_of(ptr, struct shmid_kernel,
206                                                         shm_perm);
207         security_shm_free(&shp->shm_perm);
208         kvfree(shp);
209 }
210
211 static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s)
212 {
213         list_del(&s->shm_clist);
214         ipc_rmid(&shm_ids(ns), &s->shm_perm);
215 }
216
217
218 static int __shm_open(struct vm_area_struct *vma)
219 {
220         struct file *file = vma->vm_file;
221         struct shm_file_data *sfd = shm_file_data(file);
222         struct shmid_kernel *shp;
223
224         shp = shm_lock(sfd->ns, sfd->id);
225
226         if (IS_ERR(shp))
227                 return PTR_ERR(shp);
228
229         if (shp->shm_file != sfd->file) {
230                 /* ID was reused */
231                 shm_unlock(shp);
232                 return -EINVAL;
233         }
234
235         shp->shm_atim = ktime_get_real_seconds();
236         ipc_update_pid(&shp->shm_lprid, task_tgid(current));
237         shp->shm_nattch++;
238         shm_unlock(shp);
239         return 0;
240 }
241
242 /* This is called by fork, once for every shm attach. */
243 static void shm_open(struct vm_area_struct *vma)
244 {
245         int err = __shm_open(vma);
246         /*
247          * We raced in the idr lookup or with shm_destroy().
248          * Either way, the ID is busted.
249          */
250         WARN_ON_ONCE(err);
251 }
252
253 /*
254  * shm_destroy - free the struct shmid_kernel
255  *
256  * @ns: namespace
257  * @shp: struct to free
258  *
259  * It has to be called with shp and shm_ids.rwsem (writer) locked,
260  * but returns with shp unlocked and freed.
261  */
262 static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
263 {
264         struct file *shm_file;
265
266         shm_file = shp->shm_file;
267         shp->shm_file = NULL;
268         ns->shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT;
269         shm_rmid(ns, shp);
270         shm_unlock(shp);
271         if (!is_file_hugepages(shm_file))
272                 shmem_lock(shm_file, 0, shp->mlock_user);
273         else if (shp->mlock_user)
274                 user_shm_unlock(i_size_read(file_inode(shm_file)),
275                                 shp->mlock_user);
276         fput(shm_file);
277         ipc_update_pid(&shp->shm_cprid, NULL);
278         ipc_update_pid(&shp->shm_lprid, NULL);
279         ipc_rcu_putref(&shp->shm_perm, shm_rcu_free);
280 }
281
282 /*
283  * shm_may_destroy - identifies whether shm segment should be destroyed now
284  *
285  * Returns true if and only if there are no active users of the segment and
286  * one of the following is true:
287  *
288  * 1) shmctl(id, IPC_RMID, NULL) was called for this shp
289  *
290  * 2) sysctl kernel.shm_rmid_forced is set to 1.
291  */
292 static bool shm_may_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
293 {
294         return (shp->shm_nattch == 0) &&
295                (ns->shm_rmid_forced ||
296                 (shp->shm_perm.mode & SHM_DEST));
297 }
298
299 /*
300  * remove the attach descriptor vma.
301  * free memory for segment if it is marked destroyed.
302  * The descriptor has already been removed from the current->mm->mmap list
303  * and will later be kfree()d.
304  */
305 static void shm_close(struct vm_area_struct *vma)
306 {
307         struct file *file = vma->vm_file;
308         struct shm_file_data *sfd = shm_file_data(file);
309         struct shmid_kernel *shp;
310         struct ipc_namespace *ns = sfd->ns;
311
312         down_write(&shm_ids(ns).rwsem);
313         /* remove from the list of attaches of the shm segment */
314         shp = shm_lock(ns, sfd->id);
315
316         /*
317          * We raced in the idr lookup or with shm_destroy().
318          * Either way, the ID is busted.
319          */
320         if (WARN_ON_ONCE(IS_ERR(shp)))
321                 goto done; /* no-op */
322
323         ipc_update_pid(&shp->shm_lprid, task_tgid(current));
324         shp->shm_dtim = ktime_get_real_seconds();
325         shp->shm_nattch--;
326         if (shm_may_destroy(ns, shp))
327                 shm_destroy(ns, shp);
328         else
329                 shm_unlock(shp);
330 done:
331         up_write(&shm_ids(ns).rwsem);
332 }
333
334 /* Called with ns->shm_ids(ns).rwsem locked */
335 static int shm_try_destroy_orphaned(int id, void *p, void *data)
336 {
337         struct ipc_namespace *ns = data;
338         struct kern_ipc_perm *ipcp = p;
339         struct shmid_kernel *shp = container_of(ipcp, struct shmid_kernel, shm_perm);
340
341         /*
342          * We want to destroy segments without users and with already
343          * exit'ed originating process.
344          *
345          * As shp->* are changed under rwsem, it's safe to skip shp locking.
346          */
347         if (shp->shm_creator != NULL)
348                 return 0;
349
350         if (shm_may_destroy(ns, shp)) {
351                 shm_lock_by_ptr(shp);
352                 shm_destroy(ns, shp);
353         }
354         return 0;
355 }
356
357 void shm_destroy_orphaned(struct ipc_namespace *ns)
358 {
359         down_write(&shm_ids(ns).rwsem);
360         if (shm_ids(ns).in_use)
361                 idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_orphaned, ns);
362         up_write(&shm_ids(ns).rwsem);
363 }
364
365 /* Locking assumes this will only be called with task == current */
366 void exit_shm(struct task_struct *task)
367 {
368         struct ipc_namespace *ns = task->nsproxy->ipc_ns;
369         struct shmid_kernel *shp, *n;
370
371         if (list_empty(&task->sysvshm.shm_clist))
372                 return;
373
374         /*
375          * If kernel.shm_rmid_forced is not set then only keep track of
376          * which shmids are orphaned, so that a later set of the sysctl
377          * can clean them up.
378          */
379         if (!ns->shm_rmid_forced) {
380                 down_read(&shm_ids(ns).rwsem);
381                 list_for_each_entry(shp, &task->sysvshm.shm_clist, shm_clist)
382                         shp->shm_creator = NULL;
383                 /*
384                  * Only under read lock but we are only called on current
385                  * so no entry on the list will be shared.
386                  */
387                 list_del(&task->sysvshm.shm_clist);
388                 up_read(&shm_ids(ns).rwsem);
389                 return;
390         }
391
392         /*
393          * Destroy all already created segments, that were not yet mapped,
394          * and mark any mapped as orphan to cover the sysctl toggling.
395          * Destroy is skipped if shm_may_destroy() returns false.
396          */
397         down_write(&shm_ids(ns).rwsem);
398         list_for_each_entry_safe(shp, n, &task->sysvshm.shm_clist, shm_clist) {
399                 shp->shm_creator = NULL;
400
401                 if (shm_may_destroy(ns, shp)) {
402                         shm_lock_by_ptr(shp);
403                         shm_destroy(ns, shp);
404                 }
405         }
406
407         /* Remove the list head from any segments still attached. */
408         list_del(&task->sysvshm.shm_clist);
409         up_write(&shm_ids(ns).rwsem);
410 }
411
412 static vm_fault_t shm_fault(struct vm_fault *vmf)
413 {
414         struct file *file = vmf->vma->vm_file;
415         struct shm_file_data *sfd = shm_file_data(file);
416
417         return sfd->vm_ops->fault(vmf);
418 }
419
420 static int shm_split(struct vm_area_struct *vma, unsigned long addr)
421 {
422         struct file *file = vma->vm_file;
423         struct shm_file_data *sfd = shm_file_data(file);
424
425         if (sfd->vm_ops->split)
426                 return sfd->vm_ops->split(vma, addr);
427
428         return 0;
429 }
430
431 static unsigned long shm_pagesize(struct vm_area_struct *vma)
432 {
433         struct file *file = vma->vm_file;
434         struct shm_file_data *sfd = shm_file_data(file);
435
436         if (sfd->vm_ops->pagesize)
437                 return sfd->vm_ops->pagesize(vma);
438
439         return PAGE_SIZE;
440 }
441
442 #ifdef CONFIG_NUMA
443 static int shm_set_policy(struct vm_area_struct *vma, struct mempolicy *new)
444 {
445         struct file *file = vma->vm_file;
446         struct shm_file_data *sfd = shm_file_data(file);
447         int err = 0;
448
449         if (sfd->vm_ops->set_policy)
450                 err = sfd->vm_ops->set_policy(vma, new);
451         return err;
452 }
453
454 static struct mempolicy *shm_get_policy(struct vm_area_struct *vma,
455                                         unsigned long addr)
456 {
457         struct file *file = vma->vm_file;
458         struct shm_file_data *sfd = shm_file_data(file);
459         struct mempolicy *pol = NULL;
460
461         if (sfd->vm_ops->get_policy)
462                 pol = sfd->vm_ops->get_policy(vma, addr);
463         else if (vma->vm_policy)
464                 pol = vma->vm_policy;
465
466         return pol;
467 }
468 #endif
469
470 static int shm_mmap(struct file *file, struct vm_area_struct *vma)
471 {
472         struct shm_file_data *sfd = shm_file_data(file);
473         int ret;
474
475         /*
476          * In case of remap_file_pages() emulation, the file can represent an
477          * IPC ID that was removed, and possibly even reused by another shm
478          * segment already.  Propagate this case as an error to caller.
479          */
480         ret = __shm_open(vma);
481         if (ret)
482                 return ret;
483
484         ret = call_mmap(sfd->file, vma);
485         if (ret) {
486                 shm_close(vma);
487                 return ret;
488         }
489         sfd->vm_ops = vma->vm_ops;
490 #ifdef CONFIG_MMU
491         WARN_ON(!sfd->vm_ops->fault);
492 #endif
493         vma->vm_ops = &shm_vm_ops;
494         return 0;
495 }
496
497 static int shm_release(struct inode *ino, struct file *file)
498 {
499         struct shm_file_data *sfd = shm_file_data(file);
500
501         put_ipc_ns(sfd->ns);
502         fput(sfd->file);
503         shm_file_data(file) = NULL;
504         kfree(sfd);
505         return 0;
506 }
507
508 static int shm_fsync(struct file *file, loff_t start, loff_t end, int datasync)
509 {
510         struct shm_file_data *sfd = shm_file_data(file);
511
512         if (!sfd->file->f_op->fsync)
513                 return -EINVAL;
514         return sfd->file->f_op->fsync(sfd->file, start, end, datasync);
515 }
516
517 static long shm_fallocate(struct file *file, int mode, loff_t offset,
518                           loff_t len)
519 {
520         struct shm_file_data *sfd = shm_file_data(file);
521
522         if (!sfd->file->f_op->fallocate)
523                 return -EOPNOTSUPP;
524         return sfd->file->f_op->fallocate(file, mode, offset, len);
525 }
526
527 static unsigned long shm_get_unmapped_area(struct file *file,
528         unsigned long addr, unsigned long len, unsigned long pgoff,
529         unsigned long flags)
530 {
531         struct shm_file_data *sfd = shm_file_data(file);
532
533         return sfd->file->f_op->get_unmapped_area(sfd->file, addr, len,
534                                                 pgoff, flags);
535 }
536
537 static const struct file_operations shm_file_operations = {
538         .mmap           = shm_mmap,
539         .fsync          = shm_fsync,
540         .release        = shm_release,
541         .get_unmapped_area      = shm_get_unmapped_area,
542         .llseek         = noop_llseek,
543         .fallocate      = shm_fallocate,
544 };
545
546 /*
547  * shm_file_operations_huge is now identical to shm_file_operations,
548  * but we keep it distinct for the sake of is_file_shm_hugepages().
549  */
550 static const struct file_operations shm_file_operations_huge = {
551         .mmap           = shm_mmap,
552         .fsync          = shm_fsync,
553         .release        = shm_release,
554         .get_unmapped_area      = shm_get_unmapped_area,
555         .llseek         = noop_llseek,
556         .fallocate      = shm_fallocate,
557 };
558
559 bool is_file_shm_hugepages(struct file *file)
560 {
561         return file->f_op == &shm_file_operations_huge;
562 }
563
564 static const struct vm_operations_struct shm_vm_ops = {
565         .open   = shm_open,     /* callback for a new vm-area open */
566         .close  = shm_close,    /* callback for when the vm-area is released */
567         .fault  = shm_fault,
568         .split  = shm_split,
569         .pagesize = shm_pagesize,
570 #if defined(CONFIG_NUMA)
571         .set_policy = shm_set_policy,
572         .get_policy = shm_get_policy,
573 #endif
574 };
575
576 /**
577  * newseg - Create a new shared memory segment
578  * @ns: namespace
579  * @params: ptr to the structure that contains key, size and shmflg
580  *
581  * Called with shm_ids.rwsem held as a writer.
582  */
583 static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
584 {
585         key_t key = params->key;
586         int shmflg = params->flg;
587         size_t size = params->u.size;
588         int error;
589         struct shmid_kernel *shp;
590         size_t numpages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
591         struct file *file;
592         char name[13];
593         vm_flags_t acctflag = 0;
594
595         if (size < SHMMIN || size > ns->shm_ctlmax)
596                 return -EINVAL;
597
598         if (numpages << PAGE_SHIFT < size)
599                 return -ENOSPC;
600
601         if (ns->shm_tot + numpages < ns->shm_tot ||
602                         ns->shm_tot + numpages > ns->shm_ctlall)
603                 return -ENOSPC;
604
605         shp = kvmalloc(sizeof(*shp), GFP_KERNEL);
606         if (unlikely(!shp))
607                 return -ENOMEM;
608
609         shp->shm_perm.key = key;
610         shp->shm_perm.mode = (shmflg & S_IRWXUGO);
611         shp->mlock_user = NULL;
612
613         shp->shm_perm.security = NULL;
614         error = security_shm_alloc(&shp->shm_perm);
615         if (error) {
616                 kvfree(shp);
617                 return error;
618         }
619
620         sprintf(name, "SYSV%08x", key);
621         if (shmflg & SHM_HUGETLB) {
622                 struct hstate *hs;
623                 size_t hugesize;
624
625                 hs = hstate_sizelog((shmflg >> SHM_HUGE_SHIFT) & SHM_HUGE_MASK);
626                 if (!hs) {
627                         error = -EINVAL;
628                         goto no_file;
629                 }
630                 hugesize = ALIGN(size, huge_page_size(hs));
631
632                 /* hugetlb_file_setup applies strict accounting */
633                 if (shmflg & SHM_NORESERVE)
634                         acctflag = VM_NORESERVE;
635                 file = hugetlb_file_setup(name, hugesize, acctflag,
636                                   &shp->mlock_user, HUGETLB_SHMFS_INODE,
637                                 (shmflg >> SHM_HUGE_SHIFT) & SHM_HUGE_MASK);
638         } else {
639                 /*
640                  * Do not allow no accounting for OVERCOMMIT_NEVER, even
641                  * if it's asked for.
642                  */
643                 if  ((shmflg & SHM_NORESERVE) &&
644                                 sysctl_overcommit_memory != OVERCOMMIT_NEVER)
645                         acctflag = VM_NORESERVE;
646                 file = shmem_kernel_file_setup(name, size, acctflag);
647         }
648         error = PTR_ERR(file);
649         if (IS_ERR(file))
650                 goto no_file;
651
652         shp->shm_cprid = get_pid(task_tgid(current));
653         shp->shm_lprid = NULL;
654         shp->shm_atim = shp->shm_dtim = 0;
655         shp->shm_ctim = ktime_get_real_seconds();
656         shp->shm_segsz = size;
657         shp->shm_nattch = 0;
658         shp->shm_file = file;
659         shp->shm_creator = current;
660
661         /* ipc_addid() locks shp upon success. */
662         error = ipc_addid(&shm_ids(ns), &shp->shm_perm, ns->shm_ctlmni);
663         if (error < 0)
664                 goto no_id;
665
666         list_add(&shp->shm_clist, &current->sysvshm.shm_clist);
667
668         /*
669          * shmid gets reported as "inode#" in /proc/pid/maps.
670          * proc-ps tools use this. Changing this will break them.
671          */
672         file_inode(file)->i_ino = shp->shm_perm.id;
673
674         ns->shm_tot += numpages;
675         error = shp->shm_perm.id;
676
677         ipc_unlock_object(&shp->shm_perm);
678         rcu_read_unlock();
679         return error;
680
681 no_id:
682         ipc_update_pid(&shp->shm_cprid, NULL);
683         ipc_update_pid(&shp->shm_lprid, NULL);
684         if (is_file_hugepages(file) && shp->mlock_user)
685                 user_shm_unlock(size, shp->mlock_user);
686         fput(file);
687         ipc_rcu_putref(&shp->shm_perm, shm_rcu_free);
688         return error;
689 no_file:
690         call_rcu(&shp->shm_perm.rcu, shm_rcu_free);
691         return error;
692 }
693
694 /*
695  * Called with shm_ids.rwsem and ipcp locked.
696  */
697 static inline int shm_more_checks(struct kern_ipc_perm *ipcp,
698                                 struct ipc_params *params)
699 {
700         struct shmid_kernel *shp;
701
702         shp = container_of(ipcp, struct shmid_kernel, shm_perm);
703         if (shp->shm_segsz < params->u.size)
704                 return -EINVAL;
705
706         return 0;
707 }
708
709 long ksys_shmget(key_t key, size_t size, int shmflg)
710 {
711         struct ipc_namespace *ns;
712         static const struct ipc_ops shm_ops = {
713                 .getnew = newseg,
714                 .associate = security_shm_associate,
715                 .more_checks = shm_more_checks,
716         };
717         struct ipc_params shm_params;
718
719         ns = current->nsproxy->ipc_ns;
720
721         shm_params.key = key;
722         shm_params.flg = shmflg;
723         shm_params.u.size = size;
724
725         return ipcget(ns, &shm_ids(ns), &shm_ops, &shm_params);
726 }
727
728 SYSCALL_DEFINE3(shmget, key_t, key, size_t, size, int, shmflg)
729 {
730         return ksys_shmget(key, size, shmflg);
731 }
732
733 static inline unsigned long copy_shmid_to_user(void __user *buf, struct shmid64_ds *in, int version)
734 {
735         switch (version) {
736         case IPC_64:
737                 return copy_to_user(buf, in, sizeof(*in));
738         case IPC_OLD:
739             {
740                 struct shmid_ds out;
741
742                 memset(&out, 0, sizeof(out));
743                 ipc64_perm_to_ipc_perm(&in->shm_perm, &out.shm_perm);
744                 out.shm_segsz   = in->shm_segsz;
745                 out.shm_atime   = in->shm_atime;
746                 out.shm_dtime   = in->shm_dtime;
747                 out.shm_ctime   = in->shm_ctime;
748                 out.shm_cpid    = in->shm_cpid;
749                 out.shm_lpid    = in->shm_lpid;
750                 out.shm_nattch  = in->shm_nattch;
751
752                 return copy_to_user(buf, &out, sizeof(out));
753             }
754         default:
755                 return -EINVAL;
756         }
757 }
758
759 static inline unsigned long
760 copy_shmid_from_user(struct shmid64_ds *out, void __user *buf, int version)
761 {
762         switch (version) {
763         case IPC_64:
764                 if (copy_from_user(out, buf, sizeof(*out)))
765                         return -EFAULT;
766                 return 0;
767         case IPC_OLD:
768             {
769                 struct shmid_ds tbuf_old;
770
771                 if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old)))
772                         return -EFAULT;
773
774                 out->shm_perm.uid       = tbuf_old.shm_perm.uid;
775                 out->shm_perm.gid       = tbuf_old.shm_perm.gid;
776                 out->shm_perm.mode      = tbuf_old.shm_perm.mode;
777
778                 return 0;
779             }
780         default:
781                 return -EINVAL;
782         }
783 }
784
785 static inline unsigned long copy_shminfo_to_user(void __user *buf, struct shminfo64 *in, int version)
786 {
787         switch (version) {
788         case IPC_64:
789                 return copy_to_user(buf, in, sizeof(*in));
790         case IPC_OLD:
791             {
792                 struct shminfo out;
793
794                 if (in->shmmax > INT_MAX)
795                         out.shmmax = INT_MAX;
796                 else
797                         out.shmmax = (int)in->shmmax;
798
799                 out.shmmin      = in->shmmin;
800                 out.shmmni      = in->shmmni;
801                 out.shmseg      = in->shmseg;
802                 out.shmall      = in->shmall;
803
804                 return copy_to_user(buf, &out, sizeof(out));
805             }
806         default:
807                 return -EINVAL;
808         }
809 }
810
811 /*
812  * Calculate and add used RSS and swap pages of a shm.
813  * Called with shm_ids.rwsem held as a reader
814  */
815 static void shm_add_rss_swap(struct shmid_kernel *shp,
816         unsigned long *rss_add, unsigned long *swp_add)
817 {
818         struct inode *inode;
819
820         inode = file_inode(shp->shm_file);
821
822         if (is_file_hugepages(shp->shm_file)) {
823                 struct address_space *mapping = inode->i_mapping;
824                 struct hstate *h = hstate_file(shp->shm_file);
825                 *rss_add += pages_per_huge_page(h) * mapping->nrpages;
826         } else {
827 #ifdef CONFIG_SHMEM
828                 struct shmem_inode_info *info = SHMEM_I(inode);
829
830                 spin_lock_irq(&info->lock);
831                 *rss_add += inode->i_mapping->nrpages;
832                 *swp_add += info->swapped;
833                 spin_unlock_irq(&info->lock);
834 #else
835                 *rss_add += inode->i_mapping->nrpages;
836 #endif
837         }
838 }
839
840 /*
841  * Called with shm_ids.rwsem held as a reader
842  */
843 static void shm_get_stat(struct ipc_namespace *ns, unsigned long *rss,
844                 unsigned long *swp)
845 {
846         int next_id;
847         int total, in_use;
848
849         *rss = 0;
850         *swp = 0;
851
852         in_use = shm_ids(ns).in_use;
853
854         for (total = 0, next_id = 0; total < in_use; next_id++) {
855                 struct kern_ipc_perm *ipc;
856                 struct shmid_kernel *shp;
857
858                 ipc = idr_find(&shm_ids(ns).ipcs_idr, next_id);
859                 if (ipc == NULL)
860                         continue;
861                 shp = container_of(ipc, struct shmid_kernel, shm_perm);
862
863                 shm_add_rss_swap(shp, rss, swp);
864
865                 total++;
866         }
867 }
868
869 /*
870  * This function handles some shmctl commands which require the rwsem
871  * to be held in write mode.
872  * NOTE: no locks must be held, the rwsem is taken inside this function.
873  */
874 static int shmctl_down(struct ipc_namespace *ns, int shmid, int cmd,
875                        struct shmid64_ds *shmid64)
876 {
877         struct kern_ipc_perm *ipcp;
878         struct shmid_kernel *shp;
879         int err;
880
881         down_write(&shm_ids(ns).rwsem);
882         rcu_read_lock();
883
884         ipcp = ipcctl_obtain_check(ns, &shm_ids(ns), shmid, cmd,
885                                       &shmid64->shm_perm, 0);
886         if (IS_ERR(ipcp)) {
887                 err = PTR_ERR(ipcp);
888                 goto out_unlock1;
889         }
890
891         shp = container_of(ipcp, struct shmid_kernel, shm_perm);
892
893         err = security_shm_shmctl(&shp->shm_perm, cmd);
894         if (err)
895                 goto out_unlock1;
896
897         switch (cmd) {
898         case IPC_RMID:
899                 ipc_lock_object(&shp->shm_perm);
900                 /* do_shm_rmid unlocks the ipc object and rcu */
901                 do_shm_rmid(ns, ipcp);
902                 goto out_up;
903         case IPC_SET:
904                 ipc_lock_object(&shp->shm_perm);
905                 err = ipc_update_perm(&shmid64->shm_perm, ipcp);
906                 if (err)
907                         goto out_unlock0;
908                 shp->shm_ctim = ktime_get_real_seconds();
909                 break;
910         default:
911                 err = -EINVAL;
912                 goto out_unlock1;
913         }
914
915 out_unlock0:
916         ipc_unlock_object(&shp->shm_perm);
917 out_unlock1:
918         rcu_read_unlock();
919 out_up:
920         up_write(&shm_ids(ns).rwsem);
921         return err;
922 }
923
924 static int shmctl_ipc_info(struct ipc_namespace *ns,
925                            struct shminfo64 *shminfo)
926 {
927         int err = security_shm_shmctl(NULL, IPC_INFO);
928         if (!err) {
929                 memset(shminfo, 0, sizeof(*shminfo));
930                 shminfo->shmmni = shminfo->shmseg = ns->shm_ctlmni;
931                 shminfo->shmmax = ns->shm_ctlmax;
932                 shminfo->shmall = ns->shm_ctlall;
933                 shminfo->shmmin = SHMMIN;
934                 down_read(&shm_ids(ns).rwsem);
935                 err = ipc_get_maxid(&shm_ids(ns));
936                 up_read(&shm_ids(ns).rwsem);
937                 if (err < 0)
938                         err = 0;
939         }
940         return err;
941 }
942
943 static int shmctl_shm_info(struct ipc_namespace *ns,
944                            struct shm_info *shm_info)
945 {
946         int err = security_shm_shmctl(NULL, SHM_INFO);
947         if (!err) {
948                 memset(shm_info, 0, sizeof(*shm_info));
949                 down_read(&shm_ids(ns).rwsem);
950                 shm_info->used_ids = shm_ids(ns).in_use;
951                 shm_get_stat(ns, &shm_info->shm_rss, &shm_info->shm_swp);
952                 shm_info->shm_tot = ns->shm_tot;
953                 shm_info->swap_attempts = 0;
954                 shm_info->swap_successes = 0;
955                 err = ipc_get_maxid(&shm_ids(ns));
956                 up_read(&shm_ids(ns).rwsem);
957                 if (err < 0)
958                         err = 0;
959         }
960         return err;
961 }
962
963 static int shmctl_stat(struct ipc_namespace *ns, int shmid,
964                         int cmd, struct shmid64_ds *tbuf)
965 {
966         struct shmid_kernel *shp;
967         int err;
968
969         memset(tbuf, 0, sizeof(*tbuf));
970
971         rcu_read_lock();
972         if (cmd == SHM_STAT || cmd == SHM_STAT_ANY) {
973                 shp = shm_obtain_object(ns, shmid);
974                 if (IS_ERR(shp)) {
975                         err = PTR_ERR(shp);
976                         goto out_unlock;
977                 }
978         } else { /* IPC_STAT */
979                 shp = shm_obtain_object_check(ns, shmid);
980                 if (IS_ERR(shp)) {
981                         err = PTR_ERR(shp);
982                         goto out_unlock;
983                 }
984         }
985
986         /*
987          * Semantically SHM_STAT_ANY ought to be identical to
988          * that functionality provided by the /proc/sysvipc/
989          * interface. As such, only audit these calls and
990          * do not do traditional S_IRUGO permission checks on
991          * the ipc object.
992          */
993         if (cmd == SHM_STAT_ANY)
994                 audit_ipc_obj(&shp->shm_perm);
995         else {
996                 err = -EACCES;
997                 if (ipcperms(ns, &shp->shm_perm, S_IRUGO))
998                         goto out_unlock;
999         }
1000
1001         err = security_shm_shmctl(&shp->shm_perm, cmd);
1002         if (err)
1003                 goto out_unlock;
1004
1005         ipc_lock_object(&shp->shm_perm);
1006
1007         if (!ipc_valid_object(&shp->shm_perm)) {
1008                 ipc_unlock_object(&shp->shm_perm);
1009                 err = -EIDRM;
1010                 goto out_unlock;
1011         }
1012
1013         kernel_to_ipc64_perm(&shp->shm_perm, &tbuf->shm_perm);
1014         tbuf->shm_segsz = shp->shm_segsz;
1015         tbuf->shm_atime = shp->shm_atim;
1016         tbuf->shm_dtime = shp->shm_dtim;
1017         tbuf->shm_ctime = shp->shm_ctim;
1018 #ifndef CONFIG_64BIT
1019         tbuf->shm_atime_high = shp->shm_atim >> 32;
1020         tbuf->shm_dtime_high = shp->shm_dtim >> 32;
1021         tbuf->shm_ctime_high = shp->shm_ctim >> 32;
1022 #endif
1023         tbuf->shm_cpid  = pid_vnr(shp->shm_cprid);
1024         tbuf->shm_lpid  = pid_vnr(shp->shm_lprid);
1025         tbuf->shm_nattch = shp->shm_nattch;
1026
1027         if (cmd == IPC_STAT) {
1028                 /*
1029                  * As defined in SUS:
1030                  * Return 0 on success
1031                  */
1032                 err = 0;
1033         } else {
1034                 /*
1035                  * SHM_STAT and SHM_STAT_ANY (both Linux specific)
1036                  * Return the full id, including the sequence number
1037                  */
1038                 err = shp->shm_perm.id;
1039         }
1040
1041         ipc_unlock_object(&shp->shm_perm);
1042 out_unlock:
1043         rcu_read_unlock();
1044         return err;
1045 }
1046
1047 static int shmctl_do_lock(struct ipc_namespace *ns, int shmid, int cmd)
1048 {
1049         struct shmid_kernel *shp;
1050         struct file *shm_file;
1051         int err;
1052
1053         rcu_read_lock();
1054         shp = shm_obtain_object_check(ns, shmid);
1055         if (IS_ERR(shp)) {
1056                 err = PTR_ERR(shp);
1057                 goto out_unlock1;
1058         }
1059
1060         audit_ipc_obj(&(shp->shm_perm));
1061         err = security_shm_shmctl(&shp->shm_perm, cmd);
1062         if (err)
1063                 goto out_unlock1;
1064
1065         ipc_lock_object(&shp->shm_perm);
1066
1067         /* check if shm_destroy() is tearing down shp */
1068         if (!ipc_valid_object(&shp->shm_perm)) {
1069                 err = -EIDRM;
1070                 goto out_unlock0;
1071         }
1072
1073         if (!ns_capable(ns->user_ns, CAP_IPC_LOCK)) {
1074                 kuid_t euid = current_euid();
1075
1076                 if (!uid_eq(euid, shp->shm_perm.uid) &&
1077                     !uid_eq(euid, shp->shm_perm.cuid)) {
1078                         err = -EPERM;
1079                         goto out_unlock0;
1080                 }
1081                 if (cmd == SHM_LOCK && !rlimit(RLIMIT_MEMLOCK)) {
1082                         err = -EPERM;
1083                         goto out_unlock0;
1084                 }
1085         }
1086
1087         shm_file = shp->shm_file;
1088         if (is_file_hugepages(shm_file))
1089                 goto out_unlock0;
1090
1091         if (cmd == SHM_LOCK) {
1092                 struct user_struct *user = current_user();
1093
1094                 err = shmem_lock(shm_file, 1, user);
1095                 if (!err && !(shp->shm_perm.mode & SHM_LOCKED)) {
1096                         shp->shm_perm.mode |= SHM_LOCKED;
1097                         shp->mlock_user = user;
1098                 }
1099                 goto out_unlock0;
1100         }
1101
1102         /* SHM_UNLOCK */
1103         if (!(shp->shm_perm.mode & SHM_LOCKED))
1104                 goto out_unlock0;
1105         shmem_lock(shm_file, 0, shp->mlock_user);
1106         shp->shm_perm.mode &= ~SHM_LOCKED;
1107         shp->mlock_user = NULL;
1108         get_file(shm_file);
1109         ipc_unlock_object(&shp->shm_perm);
1110         rcu_read_unlock();
1111         shmem_unlock_mapping(shm_file->f_mapping);
1112
1113         fput(shm_file);
1114         return err;
1115
1116 out_unlock0:
1117         ipc_unlock_object(&shp->shm_perm);
1118 out_unlock1:
1119         rcu_read_unlock();
1120         return err;
1121 }
1122
1123 long ksys_shmctl(int shmid, int cmd, struct shmid_ds __user *buf)
1124 {
1125         int err, version;
1126         struct ipc_namespace *ns;
1127         struct shmid64_ds sem64;
1128
1129         if (cmd < 0 || shmid < 0)
1130                 return -EINVAL;
1131
1132         version = ipc_parse_version(&cmd);
1133         ns = current->nsproxy->ipc_ns;
1134
1135         switch (cmd) {
1136         case IPC_INFO: {
1137                 struct shminfo64 shminfo;
1138                 err = shmctl_ipc_info(ns, &shminfo);
1139                 if (err < 0)
1140                         return err;
1141                 if (copy_shminfo_to_user(buf, &shminfo, version))
1142                         err = -EFAULT;
1143                 return err;
1144         }
1145         case SHM_INFO: {
1146                 struct shm_info shm_info;
1147                 err = shmctl_shm_info(ns, &shm_info);
1148                 if (err < 0)
1149                         return err;
1150                 if (copy_to_user(buf, &shm_info, sizeof(shm_info)))
1151                         err = -EFAULT;
1152                 return err;
1153         }
1154         case SHM_STAT:
1155         case SHM_STAT_ANY:
1156         case IPC_STAT: {
1157                 err = shmctl_stat(ns, shmid, cmd, &sem64);
1158                 if (err < 0)
1159                         return err;
1160                 if (copy_shmid_to_user(buf, &sem64, version))
1161                         err = -EFAULT;
1162                 return err;
1163         }
1164         case IPC_SET:
1165                 if (copy_shmid_from_user(&sem64, buf, version))
1166                         return -EFAULT;
1167                 /* fallthru */
1168         case IPC_RMID:
1169                 return shmctl_down(ns, shmid, cmd, &sem64);
1170         case SHM_LOCK:
1171         case SHM_UNLOCK:
1172                 return shmctl_do_lock(ns, shmid, cmd);
1173         default:
1174                 return -EINVAL;
1175         }
1176 }
1177
1178 SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf)
1179 {
1180         return ksys_shmctl(shmid, cmd, buf);
1181 }
1182
1183 #ifdef CONFIG_COMPAT
1184
1185 struct compat_shmid_ds {
1186         struct compat_ipc_perm shm_perm;
1187         int shm_segsz;
1188         compat_time_t shm_atime;
1189         compat_time_t shm_dtime;
1190         compat_time_t shm_ctime;
1191         compat_ipc_pid_t shm_cpid;
1192         compat_ipc_pid_t shm_lpid;
1193         unsigned short shm_nattch;
1194         unsigned short shm_unused;
1195         compat_uptr_t shm_unused2;
1196         compat_uptr_t shm_unused3;
1197 };
1198
1199 struct compat_shminfo64 {
1200         compat_ulong_t shmmax;
1201         compat_ulong_t shmmin;
1202         compat_ulong_t shmmni;
1203         compat_ulong_t shmseg;
1204         compat_ulong_t shmall;
1205         compat_ulong_t __unused1;
1206         compat_ulong_t __unused2;
1207         compat_ulong_t __unused3;
1208         compat_ulong_t __unused4;
1209 };
1210
1211 struct compat_shm_info {
1212         compat_int_t used_ids;
1213         compat_ulong_t shm_tot, shm_rss, shm_swp;
1214         compat_ulong_t swap_attempts, swap_successes;
1215 };
1216
1217 static int copy_compat_shminfo_to_user(void __user *buf, struct shminfo64 *in,
1218                                         int version)
1219 {
1220         if (in->shmmax > INT_MAX)
1221                 in->shmmax = INT_MAX;
1222         if (version == IPC_64) {
1223                 struct compat_shminfo64 info;
1224                 memset(&info, 0, sizeof(info));
1225                 info.shmmax = in->shmmax;
1226                 info.shmmin = in->shmmin;
1227                 info.shmmni = in->shmmni;
1228                 info.shmseg = in->shmseg;
1229                 info.shmall = in->shmall;
1230                 return copy_to_user(buf, &info, sizeof(info));
1231         } else {
1232                 struct shminfo info;
1233                 memset(&info, 0, sizeof(info));
1234                 info.shmmax = in->shmmax;
1235                 info.shmmin = in->shmmin;
1236                 info.shmmni = in->shmmni;
1237                 info.shmseg = in->shmseg;
1238                 info.shmall = in->shmall;
1239                 return copy_to_user(buf, &info, sizeof(info));
1240         }
1241 }
1242
1243 static int put_compat_shm_info(struct shm_info *ip,
1244                                 struct compat_shm_info __user *uip)
1245 {
1246         struct compat_shm_info info;
1247
1248         memset(&info, 0, sizeof(info));
1249         info.used_ids = ip->used_ids;
1250         info.shm_tot = ip->shm_tot;
1251         info.shm_rss = ip->shm_rss;
1252         info.shm_swp = ip->shm_swp;
1253         info.swap_attempts = ip->swap_attempts;
1254         info.swap_successes = ip->swap_successes;
1255         return copy_to_user(uip, &info, sizeof(info));
1256 }
1257
1258 static int copy_compat_shmid_to_user(void __user *buf, struct shmid64_ds *in,
1259                                         int version)
1260 {
1261         if (version == IPC_64) {
1262                 struct compat_shmid64_ds v;
1263                 memset(&v, 0, sizeof(v));
1264                 to_compat_ipc64_perm(&v.shm_perm, &in->shm_perm);
1265                 v.shm_atime      = lower_32_bits(in->shm_atime);
1266                 v.shm_atime_high = upper_32_bits(in->shm_atime);
1267                 v.shm_dtime      = lower_32_bits(in->shm_dtime);
1268                 v.shm_dtime_high = upper_32_bits(in->shm_dtime);
1269                 v.shm_ctime      = lower_32_bits(in->shm_ctime);
1270                 v.shm_ctime_high = upper_32_bits(in->shm_ctime);
1271                 v.shm_segsz = in->shm_segsz;
1272                 v.shm_nattch = in->shm_nattch;
1273                 v.shm_cpid = in->shm_cpid;
1274                 v.shm_lpid = in->shm_lpid;
1275                 return copy_to_user(buf, &v, sizeof(v));
1276         } else {
1277                 struct compat_shmid_ds v;
1278                 memset(&v, 0, sizeof(v));
1279                 to_compat_ipc_perm(&v.shm_perm, &in->shm_perm);
1280                 v.shm_perm.key = in->shm_perm.key;
1281                 v.shm_atime = in->shm_atime;
1282                 v.shm_dtime = in->shm_dtime;
1283                 v.shm_ctime = in->shm_ctime;
1284                 v.shm_segsz = in->shm_segsz;
1285                 v.shm_nattch = in->shm_nattch;
1286                 v.shm_cpid = in->shm_cpid;
1287                 v.shm_lpid = in->shm_lpid;
1288                 return copy_to_user(buf, &v, sizeof(v));
1289         }
1290 }
1291
1292 static int copy_compat_shmid_from_user(struct shmid64_ds *out, void __user *buf,
1293                                         int version)
1294 {
1295         memset(out, 0, sizeof(*out));
1296         if (version == IPC_64) {
1297                 struct compat_shmid64_ds __user *p = buf;
1298                 return get_compat_ipc64_perm(&out->shm_perm, &p->shm_perm);
1299         } else {
1300                 struct compat_shmid_ds __user *p = buf;
1301                 return get_compat_ipc_perm(&out->shm_perm, &p->shm_perm);
1302         }
1303 }
1304
1305 long compat_ksys_shmctl(int shmid, int cmd, void __user *uptr)
1306 {
1307         struct ipc_namespace *ns;
1308         struct shmid64_ds sem64;
1309         int version = compat_ipc_parse_version(&cmd);
1310         int err;
1311
1312         ns = current->nsproxy->ipc_ns;
1313
1314         if (cmd < 0 || shmid < 0)
1315                 return -EINVAL;
1316
1317         switch (cmd) {
1318         case IPC_INFO: {
1319                 struct shminfo64 shminfo;
1320                 err = shmctl_ipc_info(ns, &shminfo);
1321                 if (err < 0)
1322                         return err;
1323                 if (copy_compat_shminfo_to_user(uptr, &shminfo, version))
1324                         err = -EFAULT;
1325                 return err;
1326         }
1327         case SHM_INFO: {
1328                 struct shm_info shm_info;
1329                 err = shmctl_shm_info(ns, &shm_info);
1330                 if (err < 0)
1331                         return err;
1332                 if (put_compat_shm_info(&shm_info, uptr))
1333                         err = -EFAULT;
1334                 return err;
1335         }
1336         case IPC_STAT:
1337         case SHM_STAT_ANY:
1338         case SHM_STAT:
1339                 err = shmctl_stat(ns, shmid, cmd, &sem64);
1340                 if (err < 0)
1341                         return err;
1342                 if (copy_compat_shmid_to_user(uptr, &sem64, version))
1343                         err = -EFAULT;
1344                 return err;
1345
1346         case IPC_SET:
1347                 if (copy_compat_shmid_from_user(&sem64, uptr, version))
1348                         return -EFAULT;
1349                 /* fallthru */
1350         case IPC_RMID:
1351                 return shmctl_down(ns, shmid, cmd, &sem64);
1352         case SHM_LOCK:
1353         case SHM_UNLOCK:
1354                 return shmctl_do_lock(ns, shmid, cmd);
1355                 break;
1356         default:
1357                 return -EINVAL;
1358         }
1359         return err;
1360 }
1361
1362 COMPAT_SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, void __user *, uptr)
1363 {
1364         return compat_ksys_shmctl(shmid, cmd, uptr);
1365 }
1366 #endif
1367
1368 /*
1369  * Fix shmaddr, allocate descriptor, map shm, add attach descriptor to lists.
1370  *
1371  * NOTE! Despite the name, this is NOT a direct system call entrypoint. The
1372  * "raddr" thing points to kernel space, and there has to be a wrapper around
1373  * this.
1374  */
1375 long do_shmat(int shmid, char __user *shmaddr, int shmflg,
1376               ulong *raddr, unsigned long shmlba)
1377 {
1378         struct shmid_kernel *shp;
1379         unsigned long addr = (unsigned long)shmaddr;
1380         unsigned long size;
1381         struct file *file, *base;
1382         int    err;
1383         unsigned long flags = MAP_SHARED;
1384         unsigned long prot;
1385         int acc_mode;
1386         struct ipc_namespace *ns;
1387         struct shm_file_data *sfd;
1388         int f_flags;
1389         unsigned long populate = 0;
1390
1391         err = -EINVAL;
1392         if (shmid < 0)
1393                 goto out;
1394
1395         if (addr) {
1396                 if (addr & (shmlba - 1)) {
1397                         if (shmflg & SHM_RND) {
1398                                 addr &= ~(shmlba - 1);  /* round down */
1399
1400                                 /*
1401                                  * Ensure that the round-down is non-nil
1402                                  * when remapping. This can happen for
1403                                  * cases when addr < shmlba.
1404                                  */
1405                                 if (!addr && (shmflg & SHM_REMAP))
1406                                         goto out;
1407                         } else
1408 #ifndef __ARCH_FORCE_SHMLBA
1409                                 if (addr & ~PAGE_MASK)
1410 #endif
1411                                         goto out;
1412                 }
1413
1414                 flags |= MAP_FIXED;
1415         } else if ((shmflg & SHM_REMAP))
1416                 goto out;
1417
1418         if (shmflg & SHM_RDONLY) {
1419                 prot = PROT_READ;
1420                 acc_mode = S_IRUGO;
1421                 f_flags = O_RDONLY;
1422         } else {
1423                 prot = PROT_READ | PROT_WRITE;
1424                 acc_mode = S_IRUGO | S_IWUGO;
1425                 f_flags = O_RDWR;
1426         }
1427         if (shmflg & SHM_EXEC) {
1428                 prot |= PROT_EXEC;
1429                 acc_mode |= S_IXUGO;
1430         }
1431
1432         /*
1433          * We cannot rely on the fs check since SYSV IPC does have an
1434          * additional creator id...
1435          */
1436         ns = current->nsproxy->ipc_ns;
1437         rcu_read_lock();
1438         shp = shm_obtain_object_check(ns, shmid);
1439         if (IS_ERR(shp)) {
1440                 err = PTR_ERR(shp);
1441                 goto out_unlock;
1442         }
1443
1444         err = -EACCES;
1445         if (ipcperms(ns, &shp->shm_perm, acc_mode))
1446                 goto out_unlock;
1447
1448         err = security_shm_shmat(&shp->shm_perm, shmaddr, shmflg);
1449         if (err)
1450                 goto out_unlock;
1451
1452         ipc_lock_object(&shp->shm_perm);
1453
1454         /* check if shm_destroy() is tearing down shp */
1455         if (!ipc_valid_object(&shp->shm_perm)) {
1456                 ipc_unlock_object(&shp->shm_perm);
1457                 err = -EIDRM;
1458                 goto out_unlock;
1459         }
1460
1461         /*
1462          * We need to take a reference to the real shm file to prevent the
1463          * pointer from becoming stale in cases where the lifetime of the outer
1464          * file extends beyond that of the shm segment.  It's not usually
1465          * possible, but it can happen during remap_file_pages() emulation as
1466          * that unmaps the memory, then does ->mmap() via file reference only.
1467          * We'll deny the ->mmap() if the shm segment was since removed, but to
1468          * detect shm ID reuse we need to compare the file pointers.
1469          */
1470         base = get_file(shp->shm_file);
1471         shp->shm_nattch++;
1472         size = i_size_read(file_inode(base));
1473         ipc_unlock_object(&shp->shm_perm);
1474         rcu_read_unlock();
1475
1476         err = -ENOMEM;
1477         sfd = kzalloc(sizeof(*sfd), GFP_KERNEL);
1478         if (!sfd) {
1479                 fput(base);
1480                 goto out_nattch;
1481         }
1482
1483         file = alloc_file_clone(base, f_flags,
1484                           is_file_hugepages(base) ?
1485                                 &shm_file_operations_huge :
1486                                 &shm_file_operations);
1487         err = PTR_ERR(file);
1488         if (IS_ERR(file)) {
1489                 kfree(sfd);
1490                 fput(base);
1491                 goto out_nattch;
1492         }
1493
1494         sfd->id = shp->shm_perm.id;
1495         sfd->ns = get_ipc_ns(ns);
1496         sfd->file = base;
1497         sfd->vm_ops = NULL;
1498         file->private_data = sfd;
1499
1500         err = security_mmap_file(file, prot, flags);
1501         if (err)
1502                 goto out_fput;
1503
1504         if (down_write_killable(&current->mm->mmap_sem)) {
1505                 err = -EINTR;
1506                 goto out_fput;
1507         }
1508
1509         if (addr && !(shmflg & SHM_REMAP)) {
1510                 err = -EINVAL;
1511                 if (addr + size < addr)
1512                         goto invalid;
1513
1514                 if (find_vma_intersection(current->mm, addr, addr + size))
1515                         goto invalid;
1516         }
1517
1518         addr = do_mmap_pgoff(file, addr, size, prot, flags, 0, &populate, NULL);
1519         *raddr = addr;
1520         err = 0;
1521         if (IS_ERR_VALUE(addr))
1522                 err = (long)addr;
1523 invalid:
1524         up_write(&current->mm->mmap_sem);
1525         if (populate)
1526                 mm_populate(addr, populate);
1527
1528 out_fput:
1529         fput(file);
1530
1531 out_nattch:
1532         down_write(&shm_ids(ns).rwsem);
1533         shp = shm_lock(ns, shmid);
1534         shp->shm_nattch--;
1535         if (shm_may_destroy(ns, shp))
1536                 shm_destroy(ns, shp);
1537         else
1538                 shm_unlock(shp);
1539         up_write(&shm_ids(ns).rwsem);
1540         return err;
1541
1542 out_unlock:
1543         rcu_read_unlock();
1544 out:
1545         return err;
1546 }
1547
1548 SYSCALL_DEFINE3(shmat, int, shmid, char __user *, shmaddr, int, shmflg)
1549 {
1550         unsigned long ret;
1551         long err;
1552
1553         err = do_shmat(shmid, shmaddr, shmflg, &ret, SHMLBA);
1554         if (err)
1555                 return err;
1556         force_successful_syscall_return();
1557         return (long)ret;
1558 }
1559
1560 #ifdef CONFIG_COMPAT
1561
1562 #ifndef COMPAT_SHMLBA
1563 #define COMPAT_SHMLBA   SHMLBA
1564 #endif
1565
1566 COMPAT_SYSCALL_DEFINE3(shmat, int, shmid, compat_uptr_t, shmaddr, int, shmflg)
1567 {
1568         unsigned long ret;
1569         long err;
1570
1571         err = do_shmat(shmid, compat_ptr(shmaddr), shmflg, &ret, COMPAT_SHMLBA);
1572         if (err)
1573                 return err;
1574         force_successful_syscall_return();
1575         return (long)ret;
1576 }
1577 #endif
1578
1579 /*
1580  * detach and kill segment if marked destroyed.
1581  * The work is done in shm_close.
1582  */
1583 long ksys_shmdt(char __user *shmaddr)
1584 {
1585         struct mm_struct *mm = current->mm;
1586         struct vm_area_struct *vma;
1587         unsigned long addr = (unsigned long)shmaddr;
1588         int retval = -EINVAL;
1589 #ifdef CONFIG_MMU
1590         loff_t size = 0;
1591         struct file *file;
1592         struct vm_area_struct *next;
1593 #endif
1594
1595         if (addr & ~PAGE_MASK)
1596                 return retval;
1597
1598         if (down_write_killable(&mm->mmap_sem))
1599                 return -EINTR;
1600
1601         /*
1602          * This function tries to be smart and unmap shm segments that
1603          * were modified by partial mlock or munmap calls:
1604          * - It first determines the size of the shm segment that should be
1605          *   unmapped: It searches for a vma that is backed by shm and that
1606          *   started at address shmaddr. It records it's size and then unmaps
1607          *   it.
1608          * - Then it unmaps all shm vmas that started at shmaddr and that
1609          *   are within the initially determined size and that are from the
1610          *   same shm segment from which we determined the size.
1611          * Errors from do_munmap are ignored: the function only fails if
1612          * it's called with invalid parameters or if it's called to unmap
1613          * a part of a vma. Both calls in this function are for full vmas,
1614          * the parameters are directly copied from the vma itself and always
1615          * valid - therefore do_munmap cannot fail. (famous last words?)
1616          */
1617         /*
1618          * If it had been mremap()'d, the starting address would not
1619          * match the usual checks anyway. So assume all vma's are
1620          * above the starting address given.
1621          */
1622         vma = find_vma(mm, addr);
1623
1624 #ifdef CONFIG_MMU
1625         while (vma) {
1626                 next = vma->vm_next;
1627
1628                 /*
1629                  * Check if the starting address would match, i.e. it's
1630                  * a fragment created by mprotect() and/or munmap(), or it
1631                  * otherwise it starts at this address with no hassles.
1632                  */
1633                 if ((vma->vm_ops == &shm_vm_ops) &&
1634                         (vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff) {
1635
1636                         /*
1637                          * Record the file of the shm segment being
1638                          * unmapped.  With mremap(), someone could place
1639                          * page from another segment but with equal offsets
1640                          * in the range we are unmapping.
1641                          */
1642                         file = vma->vm_file;
1643                         size = i_size_read(file_inode(vma->vm_file));
1644                         do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start, NULL);
1645                         /*
1646                          * We discovered the size of the shm segment, so
1647                          * break out of here and fall through to the next
1648                          * loop that uses the size information to stop
1649                          * searching for matching vma's.
1650                          */
1651                         retval = 0;
1652                         vma = next;
1653                         break;
1654                 }
1655                 vma = next;
1656         }
1657
1658         /*
1659          * We need look no further than the maximum address a fragment
1660          * could possibly have landed at. Also cast things to loff_t to
1661          * prevent overflows and make comparisons vs. equal-width types.
1662          */
1663         size = PAGE_ALIGN(size);
1664         while (vma && (loff_t)(vma->vm_end - addr) <= size) {
1665                 next = vma->vm_next;
1666
1667                 /* finding a matching vma now does not alter retval */
1668                 if ((vma->vm_ops == &shm_vm_ops) &&
1669                     ((vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff) &&
1670                     (vma->vm_file == file))
1671                         do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start, NULL);
1672                 vma = next;
1673         }
1674
1675 #else   /* CONFIG_MMU */
1676         /* under NOMMU conditions, the exact address to be destroyed must be
1677          * given
1678          */
1679         if (vma && vma->vm_start == addr && vma->vm_ops == &shm_vm_ops) {
1680                 do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start, NULL);
1681                 retval = 0;
1682         }
1683
1684 #endif
1685
1686         up_write(&mm->mmap_sem);
1687         return retval;
1688 }
1689
1690 SYSCALL_DEFINE1(shmdt, char __user *, shmaddr)
1691 {
1692         return ksys_shmdt(shmaddr);
1693 }
1694
1695 #ifdef CONFIG_PROC_FS
1696 static int sysvipc_shm_proc_show(struct seq_file *s, void *it)
1697 {
1698         struct pid_namespace *pid_ns = ipc_seq_pid_ns(s);
1699         struct user_namespace *user_ns = seq_user_ns(s);
1700         struct kern_ipc_perm *ipcp = it;
1701         struct shmid_kernel *shp;
1702         unsigned long rss = 0, swp = 0;
1703
1704         shp = container_of(ipcp, struct shmid_kernel, shm_perm);
1705         shm_add_rss_swap(shp, &rss, &swp);
1706
1707 #if BITS_PER_LONG <= 32
1708 #define SIZE_SPEC "%10lu"
1709 #else
1710 #define SIZE_SPEC "%21lu"
1711 #endif
1712
1713         seq_printf(s,
1714                    "%10d %10d  %4o " SIZE_SPEC " %5u %5u  "
1715                    "%5lu %5u %5u %5u %5u %10llu %10llu %10llu "
1716                    SIZE_SPEC " " SIZE_SPEC "\n",
1717                    shp->shm_perm.key,
1718                    shp->shm_perm.id,
1719                    shp->shm_perm.mode,
1720                    shp->shm_segsz,
1721                    pid_nr_ns(shp->shm_cprid, pid_ns),
1722                    pid_nr_ns(shp->shm_lprid, pid_ns),
1723                    shp->shm_nattch,
1724                    from_kuid_munged(user_ns, shp->shm_perm.uid),
1725                    from_kgid_munged(user_ns, shp->shm_perm.gid),
1726                    from_kuid_munged(user_ns, shp->shm_perm.cuid),
1727                    from_kgid_munged(user_ns, shp->shm_perm.cgid),
1728                    shp->shm_atim,
1729                    shp->shm_dtim,
1730                    shp->shm_ctim,
1731                    rss * PAGE_SIZE,
1732                    swp * PAGE_SIZE);
1733
1734         return 0;
1735 }
1736 #endif