linux-old/ipc/shm.c
<<
>>
Prefs
   1/*
   2 * linux/ipc/shm.c
   3 * Copyright (C) 1992, 1993 Krishna Balasubramanian
   4 *         Many improvements/fixes by Bruno Haible.
   5 * Replaced `struct shm_desc' by `struct vm_area_struct', July 1994.
   6 */
   7
   8#include <linux/errno.h>
   9#include <linux/sched.h>
  10#include <linux/mm.h>
  11#include <linux/ipc.h>
  12#include <linux/shm.h>
  13#include <linux/stat.h>
  14#include <linux/malloc.h>
  15#include <linux/swap.h>
  16
  17#include <asm/segment.h>
  18#include <asm/pgtable.h>
  19
  20extern int ipcperms (struct ipc_perm *ipcp, short shmflg);
  21extern unsigned long get_swap_page (void);
  22static int findkey (key_t key);
  23static int newseg (key_t key, int shmflg, int size);
  24static int shm_map (struct vm_area_struct *shmd);
  25static void killseg (int id);
  26static void shm_open (struct vm_area_struct *shmd);
  27static void shm_close (struct vm_area_struct *shmd);
  28static pte_t shm_swap_in(struct vm_area_struct *, unsigned long, unsigned long);
  29
  30static int shm_tot = 0; /* total number of shared memory pages */
  31static int shm_rss = 0; /* number of shared memory pages that are in memory */
  32static int shm_swp = 0; /* number of shared memory pages that are in swap */
  33static int max_shmid = 0; /* every used id is <= max_shmid */
  34static struct wait_queue *shm_lock = NULL; /* calling findkey() may need to wait */
  35static struct shmid_ds *shm_segs[SHMMNI];
  36
  37static unsigned short shm_seq = 0; /* incremented, for recognizing stale ids */
  38
  39/* some statistics */
  40static ulong swap_attempts = 0;
  41static ulong swap_successes = 0;
  42static ulong used_segs = 0;
  43
  44void shm_init (void)
  45{
  46        int id;
  47
  48        for (id = 0; id < SHMMNI; id++)
  49                shm_segs[id] = (struct shmid_ds *) IPC_UNUSED;
  50        shm_tot = shm_rss = shm_seq = max_shmid = used_segs = 0;
  51        shm_lock = NULL;
  52        return;
  53}
  54
  55static int findkey (key_t key)
  56{
  57        int id;
  58        struct shmid_ds *shp;
  59
  60        for (id = 0; id <= max_shmid; id++) {
  61                while ((shp = shm_segs[id]) == IPC_NOID)
  62                        sleep_on (&shm_lock);
  63                if (shp == IPC_UNUSED)
  64                        continue;
  65                if (key == shp->shm_perm.key)
  66                        return id;
  67        }
  68        return -1;
  69}
  70
  71/*
  72 * allocate new shmid_ds and pgtable. protected by shm_segs[id] = NOID.
  73 */
  74static int newseg (key_t key, int shmflg, int size)
  75{
  76        struct shmid_ds *shp;
  77        int numpages = (size + PAGE_SIZE -1) >> PAGE_SHIFT;
  78        int id, i;
  79
  80        if (size < SHMMIN)
  81                return -EINVAL;
  82        if (shm_tot + numpages >= SHMALL)
  83                return -ENOSPC;
  84        for (id = 0; id < SHMMNI; id++)
  85                if (shm_segs[id] == IPC_UNUSED) {
  86                        shm_segs[id] = (struct shmid_ds *) IPC_NOID;
  87                        goto found;
  88                }
  89        return -ENOSPC;
  90
  91found:
  92        shp = (struct shmid_ds *) kmalloc (sizeof (*shp), GFP_KERNEL);
  93        if (!shp) {
  94                shm_segs[id] = (struct shmid_ds *) IPC_UNUSED;
  95                wake_up (&shm_lock);
  96                return -ENOMEM;
  97        }
  98
  99        shp->shm_pages = (ulong *) kmalloc (numpages*sizeof(ulong),GFP_KERNEL);
 100        if (!shp->shm_pages) {
 101                shm_segs[id] = (struct shmid_ds *) IPC_UNUSED;
 102                wake_up (&shm_lock);
 103                kfree(shp);
 104                return -ENOMEM;
 105        }
 106
 107        for (i = 0; i < numpages; shp->shm_pages[i++] = 0);
 108        shm_tot += numpages;
 109        shp->shm_perm.key = key;
 110        shp->shm_perm.mode = (shmflg & S_IRWXUGO);
 111        shp->shm_perm.cuid = shp->shm_perm.uid = current->euid;
 112        shp->shm_perm.cgid = shp->shm_perm.gid = current->egid;
 113        shp->shm_perm.seq = shm_seq;
 114        shp->shm_segsz = size;
 115        shp->shm_cpid = current->pid;
 116        shp->attaches = NULL;
 117        shp->shm_lpid = shp->shm_nattch = 0;
 118        shp->shm_atime = shp->shm_dtime = 0;
 119        shp->shm_ctime = CURRENT_TIME;
 120        shp->shm_npages = numpages;
 121
 122        if (id > max_shmid)
 123                max_shmid = id;
 124        shm_segs[id] = shp;
 125        used_segs++;
 126        wake_up (&shm_lock);
 127        return (unsigned int) shp->shm_perm.seq * SHMMNI + id;
 128}
 129
 130asmlinkage int sys_shmget (key_t key, int size, int shmflg)
 131{
 132        struct shmid_ds *shp;
 133        int id = 0;
 134
 135        if (size < 0 || size > SHMMAX)
 136                return -EINVAL;
 137        if (key == IPC_PRIVATE)
 138                return newseg(key, shmflg, size);
 139        if ((id = findkey (key)) == -1) {
 140                if (!(shmflg & IPC_CREAT))
 141                        return -ENOENT;
 142                return newseg(key, shmflg, size);
 143        }
 144        if ((shmflg & IPC_CREAT) && (shmflg & IPC_EXCL))
 145                return -EEXIST;
 146        shp = shm_segs[id];
 147        if (shp->shm_perm.mode & SHM_DEST)
 148                return -EIDRM;
 149        if (size > shp->shm_segsz)
 150                return -EINVAL;
 151        if (ipcperms (&shp->shm_perm, shmflg))
 152                return -EACCES;
 153        return (unsigned int) shp->shm_perm.seq * SHMMNI + id;
 154}
 155
 156/*
 157 * Only called after testing nattch and SHM_DEST.
 158 * Here pages, pgtable and shmid_ds are freed.
 159 */
 160static void killseg (int id)
 161{
 162        struct shmid_ds *shp;
 163        int i, numpages;
 164
 165        shp = shm_segs[id];
 166        if (shp == IPC_NOID || shp == IPC_UNUSED) {
 167                printk ("shm nono: killseg called on unused seg id=%d\n", id);
 168                return;
 169        }
 170        shp->shm_perm.seq++;     /* for shmat */
 171        shm_seq = (shm_seq+1) % ((unsigned)(1<<31)/SHMMNI); /* increment, but avoid overflow */
 172        shm_segs[id] = (struct shmid_ds *) IPC_UNUSED;
 173        used_segs--;
 174        if (id == max_shmid)
 175                while (max_shmid && (shm_segs[--max_shmid] == IPC_UNUSED));
 176        if (!shp->shm_pages) {
 177                printk ("shm nono: killseg shp->pages=NULL. id=%d\n", id);
 178                return;
 179        }
 180        numpages = shp->shm_npages;
 181        for (i = 0; i < numpages ; i++) {
 182                pte_t pte;
 183                pte_val(pte) = shp->shm_pages[i];
 184                if (pte_none(pte))
 185                        continue;
 186                if (pte_present(pte)) {
 187                        free_page (pte_page(pte));
 188                        shm_rss--;
 189                } else {
 190                        swap_free(pte_val(pte));
 191                        shm_swp--;
 192                }
 193        }
 194        kfree(shp->shm_pages);
 195        shm_tot -= numpages;
 196        kfree(shp);
 197        return;
 198}
 199
 200asmlinkage int sys_shmctl (int shmid, int cmd, struct shmid_ds *buf)
 201{
 202        struct shmid_ds tbuf;
 203        struct shmid_ds *shp;
 204        struct ipc_perm *ipcp;
 205        int id, err;
 206
 207        if (cmd < 0 || shmid < 0)
 208                return -EINVAL;
 209        if (cmd == IPC_SET) {
 210                if (!buf)
 211                        return -EFAULT;
 212                err = verify_area (VERIFY_READ, buf, sizeof (*buf));
 213                if (err)
 214                        return err;
 215                memcpy_fromfs (&tbuf, buf, sizeof (*buf));
 216        }
 217
 218        switch (cmd) { /* replace with proc interface ? */
 219        case IPC_INFO:
 220        {
 221                struct shminfo shminfo;
 222                if (!buf)
 223                        return -EFAULT;
 224                shminfo.shmmni = SHMMNI;
 225                shminfo.shmmax = SHMMAX;
 226                shminfo.shmmin = SHMMIN;
 227                shminfo.shmall = SHMALL;
 228                shminfo.shmseg = SHMSEG;
 229                err = verify_area (VERIFY_WRITE, buf, sizeof (struct shminfo));
 230                if (err)
 231                        return err;
 232                memcpy_tofs (buf, &shminfo, sizeof(struct shminfo));
 233                return max_shmid;
 234        }
 235        case SHM_INFO:
 236        {
 237                struct shm_info shm_info;
 238                if (!buf)
 239                        return -EFAULT;
 240                err = verify_area (VERIFY_WRITE, buf, sizeof (shm_info));
 241                if (err)
 242                        return err;
 243                shm_info.used_ids = used_segs;
 244                shm_info.shm_rss = shm_rss;
 245                shm_info.shm_tot = shm_tot;
 246                shm_info.shm_swp = shm_swp;
 247                shm_info.swap_attempts = swap_attempts;
 248                shm_info.swap_successes = swap_successes;
 249                memcpy_tofs (buf, &shm_info, sizeof(shm_info));
 250                return max_shmid;
 251        }
 252        case SHM_STAT:
 253                if (!buf)
 254                        return -EFAULT;
 255                err = verify_area (VERIFY_WRITE, buf, sizeof (*buf));
 256                if (err)
 257                        return err;
 258                if (shmid > max_shmid)
 259                        return -EINVAL;
 260                shp = shm_segs[shmid];
 261                if (shp == IPC_UNUSED || shp == IPC_NOID)
 262                        return -EINVAL;
 263                if (ipcperms (&shp->shm_perm, S_IRUGO))
 264                        return -EACCES;
 265                id = (unsigned int) shp->shm_perm.seq * SHMMNI + shmid;
 266                tbuf.shm_perm   = shp->shm_perm;
 267                tbuf.shm_segsz  = shp->shm_segsz;
 268                tbuf.shm_atime  = shp->shm_atime;
 269                tbuf.shm_dtime  = shp->shm_dtime;
 270                tbuf.shm_ctime  = shp->shm_ctime;
 271                tbuf.shm_cpid   = shp->shm_cpid;
 272                tbuf.shm_lpid   = shp->shm_lpid;
 273                tbuf.shm_nattch = shp->shm_nattch;
 274                memcpy_tofs (buf, &tbuf, sizeof(*buf));
 275                return id;
 276        }
 277
 278        shp = shm_segs[id = (unsigned int) shmid % SHMMNI];
 279        if (shp == IPC_UNUSED || shp == IPC_NOID)
 280                return -EINVAL;
 281        if (shp->shm_perm.seq != (unsigned int) shmid / SHMMNI)
 282                return -EIDRM;
 283        ipcp = &shp->shm_perm;
 284
 285        switch (cmd) {
 286        case SHM_UNLOCK:
 287                if (!suser())
 288                        return -EPERM;
 289                if (!(ipcp->mode & SHM_LOCKED))
 290                        return -EINVAL;
 291                ipcp->mode &= ~SHM_LOCKED;
 292                break;
 293        case SHM_LOCK:
 294/* Allow superuser to lock segment in memory */
 295/* Should the pages be faulted in here or leave it to user? */
 296/* need to determine interaction with current->swappable */
 297                if (!suser())
 298                        return -EPERM;
 299                if (ipcp->mode & SHM_LOCKED)
 300                        return -EINVAL;
 301                ipcp->mode |= SHM_LOCKED;
 302                break;
 303        case IPC_STAT:
 304                if (ipcperms (ipcp, S_IRUGO))
 305                        return -EACCES;
 306                if (!buf)
 307                        return -EFAULT;
 308                err = verify_area (VERIFY_WRITE, buf, sizeof (*buf));
 309                if (err)
 310                        return err;
 311                tbuf.shm_perm   = shp->shm_perm;
 312                tbuf.shm_segsz  = shp->shm_segsz;
 313                tbuf.shm_atime  = shp->shm_atime;
 314                tbuf.shm_dtime  = shp->shm_dtime;
 315                tbuf.shm_ctime  = shp->shm_ctime;
 316                tbuf.shm_cpid   = shp->shm_cpid;
 317                tbuf.shm_lpid   = shp->shm_lpid;
 318                tbuf.shm_nattch = shp->shm_nattch;
 319                memcpy_tofs (buf, &tbuf, sizeof(*buf));
 320                break;
 321        case IPC_SET:
 322                if (suser() || current->euid == shp->shm_perm.uid ||
 323                    current->euid == shp->shm_perm.cuid) {
 324                        ipcp->uid = tbuf.shm_perm.uid;
 325                        ipcp->gid = tbuf.shm_perm.gid;
 326                        ipcp->mode = (ipcp->mode & ~S_IRWXUGO)
 327                                | (tbuf.shm_perm.mode & S_IRWXUGO);
 328                        shp->shm_ctime = CURRENT_TIME;
 329                        break;
 330                }
 331                return -EPERM;
 332        case IPC_RMID:
 333                if (suser() || current->euid == shp->shm_perm.uid ||
 334                    current->euid == shp->shm_perm.cuid) {
 335                        shp->shm_perm.mode |= SHM_DEST;
 336                        if (shp->shm_nattch <= 0)
 337                                killseg (id);
 338                        break;
 339                }
 340                return -EPERM;
 341        default:
 342                return -EINVAL;
 343        }
 344        return 0;
 345}
 346
 347/*
 348 * The per process internal structure for managing segments is
 349 * `struct vm_area_struct'.
 350 * A shmat will add to and shmdt will remove from the list.
 351 * shmd->vm_mm          the attacher
 352 * shmd->vm_start       virt addr of attach, multiple of SHMLBA
 353 * shmd->vm_end         multiple of SHMLBA
 354 * shmd->vm_next        next attach for task
 355 * shmd->vm_next_share  next attach for segment
 356 * shmd->vm_offset      offset into segment
 357 * shmd->vm_pte         signature for this attach
 358 */
 359
 360static struct vm_operations_struct shm_vm_ops = {
 361        shm_open,               /* open - callback for a new vm-area open */
 362        shm_close,              /* close - callback for when the vm-area is released */
 363        NULL,                   /* no need to sync pages at unmap */
 364        NULL,                   /* protect */
 365        NULL,                   /* sync */
 366        NULL,                   /* advise */
 367        NULL,                   /* nopage (done with swapin) */
 368        NULL,                   /* wppage */
 369        NULL,                   /* swapout (hardcoded right now) */
 370        shm_swap_in             /* swapin */
 371};
 372
 373/* Insert shmd into the circular list shp->attaches */
 374static inline void insert_attach (struct shmid_ds * shp, struct vm_area_struct * shmd)
 375{
 376        struct vm_area_struct * attaches;
 377
 378        if ((attaches = shp->attaches)) {
 379                shmd->vm_next_share = attaches;
 380                shmd->vm_prev_share = attaches->vm_prev_share;
 381                shmd->vm_prev_share->vm_next_share = shmd;
 382                attaches->vm_prev_share = shmd;
 383        } else
 384                shp->attaches = shmd->vm_next_share = shmd->vm_prev_share = shmd;
 385}
 386
 387/* Remove shmd from circular list shp->attaches */
 388static inline void remove_attach (struct shmid_ds * shp, struct vm_area_struct * shmd)
 389{
 390        if (shmd->vm_next_share == shmd) {
 391                if (shp->attaches != shmd) {
 392                        printk("shm_close: shm segment (id=%ld) attach list inconsistent\n",
 393                               SWP_OFFSET(shmd->vm_pte) & SHM_ID_MASK);
 394                        printk("shm_close: %08lx-%08lx %c%c%c%c %08lx %08lx\n",
 395                                shmd->vm_start, shmd->vm_end,
 396                                shmd->vm_flags & VM_READ ? 'r' : '-',
 397                                shmd->vm_flags & VM_WRITE ? 'w' : '-',
 398                                shmd->vm_flags & VM_EXEC ? 'x' : '-',
 399                                shmd->vm_flags & VM_MAYSHARE ? 's' : 'p',
 400                                shmd->vm_offset, shmd->vm_pte);
 401                }
 402                shp->attaches = NULL;
 403        } else {
 404                if (shp->attaches == shmd)
 405                        shp->attaches = shmd->vm_next_share;
 406                shmd->vm_prev_share->vm_next_share = shmd->vm_next_share;
 407                shmd->vm_next_share->vm_prev_share = shmd->vm_prev_share;
 408        }
 409}
 410
 411/*
 412 * ensure page tables exist
 413 * mark page table entries with shm_sgn.
 414 */
 415static int shm_map (struct vm_area_struct *shmd)
 416{
 417        pgd_t *page_dir;
 418        pmd_t *page_middle;
 419        pte_t *page_table;
 420        unsigned long tmp, shm_sgn;
 421        int error;
 422
 423        /* clear old mappings */
 424        do_munmap(shmd->vm_start, shmd->vm_end - shmd->vm_start);
 425
 426        /* add new mapping */
 427        tmp = shmd->vm_end - shmd->vm_start;
 428        if((current->mm->total_vm << PAGE_SHIFT) + tmp
 429           > (unsigned long) current->rlim[RLIMIT_AS].rlim_cur)
 430                return -ENOMEM;
 431        current->mm->total_vm += tmp >> PAGE_SHIFT;
 432        insert_vm_struct(current->mm, shmd);
 433        merge_segments(current->mm, shmd->vm_start, shmd->vm_end);
 434
 435        /* map page range */
 436        error = 0;
 437        shm_sgn = shmd->vm_pte +
 438          SWP_ENTRY(0, (shmd->vm_offset >> PAGE_SHIFT) << SHM_IDX_SHIFT);
 439        flush_cache_range(shmd->vm_mm, shmd->vm_start, shmd->vm_end);
 440        for (tmp = shmd->vm_start;
 441             tmp < shmd->vm_end;
 442             tmp += PAGE_SIZE, shm_sgn += SWP_ENTRY(0, 1 << SHM_IDX_SHIFT))
 443        {
 444                page_dir = pgd_offset(shmd->vm_mm,tmp);
 445                page_middle = pmd_alloc(page_dir,tmp);
 446                if (!page_middle) {
 447                        error = -ENOMEM;
 448                        break;
 449                }
 450                page_table = pte_alloc(page_middle,tmp);
 451                if (!page_table) {
 452                        error = -ENOMEM;
 453                        break;
 454                }
 455                set_pte(page_table, __pte(shm_sgn));
 456        }
 457        flush_tlb_range(shmd->vm_mm, shmd->vm_start, shmd->vm_end);
 458        return error;
 459}
 460
 461/*
 462 * Fix shmaddr, allocate descriptor, map shm, add attach descriptor to lists.
 463 */
 464asmlinkage int sys_shmat (int shmid, char *shmaddr, int shmflg, ulong *raddr)
 465{
 466        struct shmid_ds *shp;
 467        struct vm_area_struct *shmd;
 468        int err;
 469        unsigned int id;
 470        unsigned long addr;
 471        unsigned long len;
 472
 473        if (shmid < 0) {
 474                /* printk("shmat() -> EINVAL because shmid = %d < 0\n",shmid); */
 475                return -EINVAL;
 476        }
 477
 478        shp = shm_segs[id = (unsigned int) shmid % SHMMNI];
 479        if (shp == IPC_UNUSED || shp == IPC_NOID) {
 480                /* printk("shmat() -> EINVAL because shmid = %d is invalid\n",shmid); */
 481                return -EINVAL;
 482        }
 483
 484        if (!(addr = (ulong) shmaddr)) {
 485                if (shmflg & SHM_REMAP)
 486                        return -EINVAL;
 487                if (!(addr = get_unmapped_area(0, shp->shm_segsz)))
 488                        return -ENOMEM;
 489        } else if (addr & (SHMLBA-1)) {
 490                if (shmflg & SHM_RND)
 491                        addr &= ~(SHMLBA-1);       /* round down */
 492                else
 493                        return -EINVAL;
 494        }
 495        /*
 496         * Check if addr exceeds TASK_SIZE (from do_mmap)
 497         */
 498        len = PAGE_SIZE*shp->shm_npages;
 499       if (addr >= TASK_SIZE || len > TASK_SIZE  || addr > TASK_SIZE - len)
 500                return -EINVAL;
 501        /*
 502         * If shm segment goes below stack, make sure there is some
 503         * space left for the stack to grow (presently 4 pages).
 504         */
 505        if (addr < current->mm->start_stack &&
 506            addr > current->mm->start_stack - PAGE_SIZE*(shp->shm_npages + 4))
 507        {
 508                /* printk("shmat() -> EINVAL because segment intersects stack\n"); */
 509                return -EINVAL;
 510        }
 511        if (!(shmflg & SHM_REMAP))
 512                if ((shmd = find_vma_intersection(current->mm, addr, addr + shp->shm_segsz))) {
 513                        /* printk("shmat() -> EINVAL because the interval [0x%lx,0x%lx) intersects an already mapped interval [0x%lx,0x%lx).\n",
 514                                addr, addr + shp->shm_segsz, shmd->vm_start, shmd->vm_end); */
 515                        return -EINVAL;
 516                }
 517
 518        if (ipcperms(&shp->shm_perm, shmflg & SHM_RDONLY ? S_IRUGO : S_IRUGO|S_IWUGO))
 519                return -EACCES;
 520        if (shp->shm_perm.seq != (unsigned int) shmid / SHMMNI)
 521                return -EIDRM;
 522
 523        shmd = (struct vm_area_struct *) kmalloc (sizeof(*shmd), GFP_KERNEL);
 524        if (!shmd)
 525                return -ENOMEM;
 526        if ((shp != shm_segs[id]) || (shp->shm_perm.seq != (unsigned int) shmid / SHMMNI)) {
 527                kfree(shmd);
 528                return -EIDRM;
 529        }
 530
 531        shmd->vm_pte = SWP_ENTRY(SHM_SWP_TYPE, id);
 532        shmd->vm_start = addr;
 533        shmd->vm_end = addr + shp->shm_npages * PAGE_SIZE;
 534        shmd->vm_mm = current->mm;
 535        shmd->vm_page_prot = (shmflg & SHM_RDONLY) ? PAGE_READONLY : PAGE_SHARED;
 536        shmd->vm_flags = VM_SHM | VM_MAYSHARE | VM_SHARED
 537                         | VM_MAYREAD | VM_MAYEXEC | VM_READ | VM_EXEC
 538                         | ((shmflg & SHM_RDONLY) ? 0 : VM_MAYWRITE | VM_WRITE);
 539        shmd->vm_next_share = shmd->vm_prev_share = NULL;
 540        shmd->vm_inode = NULL;
 541        shmd->vm_offset = 0;
 542        shmd->vm_ops = &shm_vm_ops;
 543
 544        shp->shm_nattch++;            /* prevent destruction */
 545        if ((err = shm_map (shmd))) {
 546                if (--shp->shm_nattch <= 0 && shp->shm_perm.mode & SHM_DEST)
 547                        killseg(id);
 548                kfree(shmd);
 549                return err;
 550        }
 551
 552        insert_attach(shp,shmd);  /* insert shmd into shp->attaches */
 553
 554        shp->shm_lpid = current->pid;
 555        shp->shm_atime = CURRENT_TIME;
 556
 557        *raddr = addr;
 558        return 0;
 559}
 560
 561/* This is called by fork, once for every shm attach. */
 562static void shm_open (struct vm_area_struct *shmd)
 563{
 564        unsigned int id;
 565        struct shmid_ds *shp;
 566
 567        id = SWP_OFFSET(shmd->vm_pte) & SHM_ID_MASK;
 568        shp = shm_segs[id];
 569        if (shp == IPC_UNUSED) {
 570                printk("shm_open: unused id=%d PANIC\n", id);
 571                return;
 572        }
 573        insert_attach(shp,shmd);  /* insert shmd into shp->attaches */
 574        shp->shm_nattch++;
 575        shp->shm_atime = CURRENT_TIME;
 576        shp->shm_lpid = current->pid;
 577}
 578
 579/*
 580 * remove the attach descriptor shmd.
 581 * free memory for segment if it is marked destroyed.
 582 * The descriptor has already been removed from the current->mm->mmap list
 583 * and will later be kfree()d.
 584 */
 585static void shm_close (struct vm_area_struct *shmd)
 586{
 587        struct shmid_ds *shp;
 588        int id;
 589
 590        /* remove from the list of attaches of the shm segment */
 591        id = SWP_OFFSET(shmd->vm_pte) & SHM_ID_MASK;
 592        shp = shm_segs[id];
 593        remove_attach(shp,shmd);  /* remove from shp->attaches */
 594        shp->shm_lpid = current->pid;
 595        shp->shm_dtime = CURRENT_TIME;
 596        if (--shp->shm_nattch <= 0 && shp->shm_perm.mode & SHM_DEST)
 597                killseg (id);
 598}
 599
 600/*
 601 * detach and kill segment if marked destroyed.
 602 * The work is done in shm_close.
 603 */
 604asmlinkage int sys_shmdt (char *shmaddr)
 605{
 606        struct vm_area_struct *shmd, *shmdnext;
 607
 608        for (shmd = current->mm->mmap; shmd; shmd = shmdnext) {
 609                shmdnext = shmd->vm_next;
 610                if (shmd->vm_ops == &shm_vm_ops
 611                    && shmd->vm_start - shmd->vm_offset == (ulong) shmaddr)
 612                        do_munmap(shmd->vm_start, shmd->vm_end - shmd->vm_start);
 613        }
 614        return 0;
 615}
 616
 617/*
 618 * page not present ... go through shm_pages
 619 */
 620static pte_t shm_swap_in(struct vm_area_struct * shmd, unsigned long offset, unsigned long code)
 621{
 622        pte_t pte;
 623        struct shmid_ds *shp;
 624        unsigned int id, idx;
 625
 626        id = SWP_OFFSET(code) & SHM_ID_MASK;
 627        if (id != (SWP_OFFSET(shmd->vm_pte) & SHM_ID_MASK)) {
 628                printk ("shm_swap_in: code id = %d and shmd id = %ld differ\n",
 629                        id, SWP_OFFSET(shmd->vm_pte) & SHM_ID_MASK);
 630                return BAD_PAGE;
 631        }
 632        if (id > max_shmid) {
 633                printk ("shm_swap_in: id=%d too big. proc mem corrupted\n", id);
 634                return BAD_PAGE;
 635        }
 636        shp = shm_segs[id];
 637        if (shp == IPC_UNUSED || shp == IPC_NOID) {
 638                printk ("shm_swap_in: id=%d invalid. Race.\n", id);
 639                return BAD_PAGE;
 640        }
 641        idx = (SWP_OFFSET(code) >> SHM_IDX_SHIFT) & SHM_IDX_MASK;
 642        if (idx != (offset >> PAGE_SHIFT)) {
 643                printk ("shm_swap_in: code idx = %u and shmd idx = %lu differ\n",
 644                        idx, offset >> PAGE_SHIFT);
 645                return BAD_PAGE;
 646        }
 647        if (idx >= shp->shm_npages) {
 648                printk ("shm_swap_in : too large page index. id=%d\n", id);
 649                return BAD_PAGE;
 650        }
 651
 652        pte_val(pte) = shp->shm_pages[idx];
 653        if (!pte_present(pte)) {
 654                unsigned long page = get_free_page(GFP_KERNEL);
 655                if (!page) {
 656                        oom(current);
 657                        return BAD_PAGE;
 658                }
 659                pte_val(pte) = shp->shm_pages[idx];
 660                if (pte_present(pte)) {
 661                        free_page (page); /* doesn't sleep */
 662                        goto done;
 663                }
 664                if (!pte_none(pte)) {
 665                        read_swap_page(pte_val(pte), (char *) page);
 666                        pte_val(pte) = shp->shm_pages[idx];
 667                        if (pte_present(pte))  {
 668                                free_page (page); /* doesn't sleep */
 669                                goto done;
 670                        }
 671                        swap_free(pte_val(pte));
 672                        shm_swp--;
 673                }
 674                shm_rss++;
 675                pte = pte_mkdirty(mk_pte(page, PAGE_SHARED));
 676                shp->shm_pages[idx] = pte_val(pte);
 677        } else
 678                --current->maj_flt;  /* was incremented in do_no_page */
 679
 680done:   /* pte_val(pte) == shp->shm_pages[idx] */
 681        current->min_flt++;
 682        mem_map[MAP_NR(pte_page(pte))].count++;
 683        return pte_modify(pte, shmd->vm_page_prot);
 684}
 685
 686/*
 687 * Goes through counter = (shm_rss >> prio) present shm pages.
 688 */
 689static unsigned long swap_id = 0; /* currently being swapped */
 690static unsigned long swap_idx = 0; /* next to swap */
 691
 692int shm_swap (int prio, int dma)
 693{
 694        pte_t page;
 695        struct shmid_ds *shp;
 696        struct vm_area_struct *shmd;
 697        unsigned long swap_nr;
 698        unsigned long id, idx;
 699        int loop = 0;
 700        int counter;
 701        
 702        counter = shm_rss >> prio;
 703        if (!counter || !(swap_nr = get_swap_page()))
 704                return 0;
 705
 706 check_id:
 707        shp = shm_segs[swap_id];
 708        if (shp == IPC_UNUSED || shp == IPC_NOID || shp->shm_perm.mode & SHM_LOCKED ) {
 709                next_id:
 710                swap_idx = 0;
 711                if (++swap_id > max_shmid) {
 712                        if (loop)
 713                                goto failed;
 714                        loop = 1;
 715                        swap_id = 0;
 716                }
 717                goto check_id;
 718        }
 719        id = swap_id;
 720
 721 check_table:
 722        idx = swap_idx++;
 723        if (idx >= shp->shm_npages)
 724                goto next_id;
 725
 726        pte_val(page) = shp->shm_pages[idx];
 727        if (!pte_present(page))
 728                goto check_table;
 729        if (dma && !PageDMA(&mem_map[MAP_NR(pte_page(page))]))
 730                goto check_table;
 731        swap_attempts++;
 732
 733        if (--counter < 0) { /* failed */
 734                failed:
 735                swap_free (swap_nr);
 736                return 0;
 737        }
 738        if (shp->attaches)
 739          for (shmd = shp->attaches; ; ) {
 740            do {
 741                pgd_t *page_dir;
 742                pmd_t *page_middle;
 743                pte_t *page_table, pte;
 744                unsigned long tmp;
 745
 746                if ((SWP_OFFSET(shmd->vm_pte) & SHM_ID_MASK) != id) {
 747                        printk ("shm_swap: id=%ld does not match shmd->vm_pte.id=%ld\n",
 748                                id, SWP_OFFSET(shmd->vm_pte) & SHM_ID_MASK);
 749                        continue;
 750                }
 751                tmp = shmd->vm_start + (idx << PAGE_SHIFT) - shmd->vm_offset;
 752                if (!(tmp >= shmd->vm_start && tmp < shmd->vm_end))
 753                        continue;
 754                page_dir = pgd_offset(shmd->vm_mm,tmp);
 755                if (pgd_none(*page_dir) || pgd_bad(*page_dir)) {
 756                        printk("shm_swap: bad pgtbl! id=%ld start=%lx idx=%ld\n",
 757                                        id, shmd->vm_start, idx);
 758                        pgd_clear(page_dir);
 759                        continue;
 760                }
 761                page_middle = pmd_offset(page_dir,tmp);
 762                if (pmd_none(*page_middle) || pmd_bad(*page_middle)) {
 763                        printk("shm_swap: bad pgmid! id=%ld start=%lx idx=%ld\n",
 764                                        id, shmd->vm_start, idx);
 765                        pmd_clear(page_middle);
 766                        continue;
 767                }
 768                page_table = pte_offset(page_middle,tmp);
 769                pte = *page_table;
 770                if (!pte_present(pte))
 771                        continue;
 772                if (pte_young(pte)) {
 773                        set_pte(page_table, pte_mkold(pte));
 774                        continue;
 775                }
 776                if (pte_page(pte) != pte_page(page))
 777                        printk("shm_swap_out: page and pte mismatch\n");
 778                flush_cache_page(shmd, tmp);
 779                set_pte(page_table,
 780                  __pte(shmd->vm_pte + SWP_ENTRY(0, idx << SHM_IDX_SHIFT)));
 781                mem_map[MAP_NR(pte_page(pte))].count--;
 782                if (shmd->vm_mm->rss > 0)
 783                        shmd->vm_mm->rss--;
 784                flush_tlb_page(shmd, tmp);
 785            /* continue looping through circular list */
 786            } while (0);
 787            if ((shmd = shmd->vm_next_share) == shp->attaches)
 788                break;
 789        }
 790
 791        if (mem_map[MAP_NR(pte_page(page))].count != 1)
 792                goto check_table;
 793        shp->shm_pages[idx] = swap_nr;
 794        write_swap_page (swap_nr, (char *) pte_page(page));
 795        free_page(pte_page(page));
 796        swap_successes++;
 797        shm_swp++;
 798        shm_rss--;
 799        return 1;
 800}
 801
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.