linux-old/include/linux/mm.h
<<
>>
Prefs
   1#ifndef _LINUX_MM_H
   2#define _LINUX_MM_H
   3
   4#include <linux/sched.h>
   5#include <linux/errno.h>
   6
   7#ifdef __KERNEL__
   8
   9#include <linux/config.h>
  10#include <linux/string.h>
  11#include <linux/list.h>
  12#include <linux/mmzone.h>
  13
  14extern unsigned long max_mapnr;
  15extern unsigned long num_physpages;
  16extern void * high_memory;
  17extern int page_cluster;
  18
  19#include <asm/page.h>
  20#include <asm/pgtable.h>
  21#include <asm/atomic.h>
  22
  23/*
  24 * Linux kernel virtual memory manager primitives.
  25 * The idea being to have a "virtual" mm in the same way
  26 * we have a virtual fs - giving a cleaner interface to the
  27 * mm details, and allowing different kinds of memory mappings
  28 * (from shared memory to executable loading to arbitrary
  29 * mmap() functions).
  30 */
  31
  32/*
  33 * This struct defines a memory VMM memory area. There is one of these
  34 * per VM-area/task.  A VM area is any part of the process virtual memory
  35 * space that has a special rule for the page-fault handlers (ie a shared
  36 * library, the executable area etc).
  37 */
  38struct vm_area_struct {
  39        struct mm_struct * vm_mm;       /* VM area parameters */
  40        unsigned long vm_start;
  41        unsigned long vm_end;
  42
  43        /* linked list of VM areas per task, sorted by address */
  44        struct vm_area_struct *vm_next;
  45
  46        pgprot_t vm_page_prot;
  47        unsigned long vm_flags;
  48
  49        /* AVL tree of VM areas per task, sorted by address */
  50        short vm_avl_height;
  51        struct vm_area_struct * vm_avl_left;
  52        struct vm_area_struct * vm_avl_right;
  53
  54        /* For areas with inode, the list inode->i_mmap, for shm areas,
  55         * the list of attaches, otherwise unused.
  56         */
  57        struct vm_area_struct *vm_next_share;
  58        struct vm_area_struct **vm_pprev_share;
  59
  60        struct vm_operations_struct * vm_ops;
  61        unsigned long vm_pgoff;         /* offset in PAGE_SIZE units, *not* PAGE_CACHE_SIZE */
  62        struct file * vm_file;
  63        void * vm_private_data;         /* was vm_pte (shared mem) */
  64};
  65
  66/*
  67 * vm_flags..
  68 */
  69#define VM_READ         0x00000001      /* currently active flags */
  70#define VM_WRITE        0x00000002
  71#define VM_EXEC         0x00000004
  72#define VM_SHARED       0x00000008
  73
  74#define VM_MAYREAD      0x00000010      /* limits for mprotect() etc */
  75#define VM_MAYWRITE     0x00000020
  76#define VM_MAYEXEC      0x00000040
  77#define VM_MAYSHARE     0x00000080
  78
  79#define VM_GROWSDOWN    0x00000100      /* general info on the segment */
  80#define VM_GROWSUP      0x00000200
  81#define VM_SHM          0x00000400      /* shared memory area, don't swap out */
  82#define VM_DENYWRITE    0x00000800      /* ETXTBSY on write attempts.. */
  83
  84#define VM_EXECUTABLE   0x00001000
  85#define VM_LOCKED       0x00002000
  86#define VM_IO           0x00004000  /* Memory mapped I/O or similar */
  87
  88#define VM_STACK_FLAGS  0x00000177
  89
  90/*
  91 * mapping from the currently active vm_flags protection bits (the
  92 * low four bits) to a page protection mask..
  93 */
  94extern pgprot_t protection_map[16];
  95
  96
  97/*
  98 * These are the virtual MM functions - opening of an area, closing and
  99 * unmapping it (needed to keep files on disk up-to-date etc), pointer
 100 * to the functions called when a no-page or a wp-page exception occurs. 
 101 */
 102struct vm_operations_struct {
 103        void (*open)(struct vm_area_struct * area);
 104        void (*close)(struct vm_area_struct * area);
 105        void (*unmap)(struct vm_area_struct *area, unsigned long, size_t);
 106        void (*protect)(struct vm_area_struct *area, unsigned long, size_t, unsigned int newprot);
 107        int (*sync)(struct vm_area_struct *area, unsigned long, size_t, unsigned int flags);
 108        void (*advise)(struct vm_area_struct *area, unsigned long, size_t, unsigned int advise);
 109        struct page * (*nopage)(struct vm_area_struct * area, unsigned long address, int write_access);
 110        struct page * (*wppage)(struct vm_area_struct * area, unsigned long address, struct page * page);
 111        int (*swapout)(struct page *, struct file *);
 112};
 113
 114/*
 115 * A swap entry has to fit into a "unsigned long", as
 116 * the entry is hidden in the "index" field of the
 117 * swapper address space.
 118 */
 119typedef struct {
 120        unsigned long val;
 121} swp_entry_t;
 122
 123/*
 124 * Try to keep the most commonly accessed fields in single cache lines
 125 * here (16 bytes or greater).  This ordering should be particularly
 126 * beneficial on 32-bit processors.
 127 *
 128 * The first line is data used in page cache lookup, the second line
 129 * is used for linear searches (eg. clock algorithm scans). 
 130 */
 131typedef struct page {
 132        struct list_head list;
 133        struct address_space *mapping;
 134        unsigned long index;
 135        struct page *next_hash;
 136        atomic_t count;
 137        unsigned long flags;    /* atomic flags, some possibly updated asynchronously */
 138        struct list_head lru;
 139        wait_queue_head_t wait;
 140        struct page **pprev_hash;
 141        struct buffer_head * buffers;
 142        unsigned long virtual; /* nonzero if kmapped */
 143        struct zone_struct *zone;
 144} mem_map_t;
 145
 146#define get_page(p)             atomic_inc(&(p)->count)
 147#define put_page(p)             __free_page(p)
 148#define put_page_testzero(p)    atomic_dec_and_test(&(p)->count)
 149#define page_count(p)           atomic_read(&(p)->count)
 150#define set_page_count(p,v)     atomic_set(&(p)->count, v)
 151
 152/* Page flag bit values */
 153#define PG_locked                0
 154#define PG_error                 1
 155#define PG_referenced            2
 156#define PG_uptodate              3
 157#define PG__unused_00            4
 158#define PG_decr_after            5
 159#define PG_unused_01             6
 160#define PG__unused_02            7
 161#define PG_slab                  8
 162#define PG_swap_cache            9
 163#define PG_skip                 10
 164#define PG_swap_entry           11
 165#define PG_highmem              12
 166                                /* bits 21-30 unused */
 167#define PG_reserved             31
 168
 169
 170/* Make it prettier to test the above... */
 171#define Page_Uptodate(page)     test_bit(PG_uptodate, &(page)->flags)
 172#define SetPageUptodate(page)   set_bit(PG_uptodate, &(page)->flags)
 173#define ClearPageUptodate(page) clear_bit(PG_uptodate, &(page)->flags)
 174#define PageLocked(page)        test_bit(PG_locked, &(page)->flags)
 175#define LockPage(page)          set_bit(PG_locked, &(page)->flags)
 176#define TryLockPage(page)       test_and_set_bit(PG_locked, &(page)->flags)
 177#define UnlockPage(page)        do { \
 178                                        clear_bit(PG_locked, &(page)->flags); \
 179                                        wake_up(&page->wait); \
 180                                } while (0)
 181#define PageError(page)         test_bit(PG_error, &(page)->flags)
 182#define SetPageError(page)      test_and_set_bit(PG_error, &(page)->flags)
 183#define ClearPageError(page)    clear_bit(PG_error, &(page)->flags)
 184#define PageReferenced(page)    test_bit(PG_referenced, &(page)->flags)
 185#define PageDecrAfter(page)     test_bit(PG_decr_after, &(page)->flags)
 186#define PageSlab(page)          test_bit(PG_slab, &(page)->flags)
 187#define PageSwapCache(page)     test_bit(PG_swap_cache, &(page)->flags)
 188#define PageReserved(page)      test_bit(PG_reserved, &(page)->flags)
 189
 190#define PageSetSlab(page)       set_bit(PG_slab, &(page)->flags)
 191#define PageSetSwapCache(page)  set_bit(PG_swap_cache, &(page)->flags)
 192
 193#define PageTestandSetSwapCache(page)   test_and_set_bit(PG_swap_cache, &(page)->flags)
 194
 195#define PageClearSlab(page)             clear_bit(PG_slab, &(page)->flags)
 196#define PageClearSwapCache(page)        clear_bit(PG_swap_cache, &(page)->flags)
 197
 198#define PageTestandClearSwapCache(page) test_and_clear_bit(PG_swap_cache, &(page)->flags)
 199
 200#ifdef CONFIG_HIGHMEM
 201#define PageHighMem(page)               test_bit(PG_highmem, &(page)->flags)
 202#else
 203#define PageHighMem(page)               0 /* needed to optimize away at compile time */
 204#endif
 205
 206#define SetPageReserved(page)           set_bit(PG_reserved, &(page)->flags)
 207#define ClearPageReserved(page)         clear_bit(PG_reserved, &(page)->flags)
 208
 209/*
 210 * Error return values for the *_nopage functions
 211 */
 212#define NOPAGE_SIGBUS   (NULL)
 213#define NOPAGE_OOM      ((struct page *) (-1))
 214
 215
 216/*
 217 * Various page->flags bits:
 218 *
 219 * PG_reserved is set for a page which must never be accessed (which
 220 * may not even be present).
 221 *
 222 * PG_DMA has been removed, page->zone now tells exactly wether the
 223 * page is suited to do DMAing into.
 224 *
 225 * Multiple processes may "see" the same page. E.g. for untouched
 226 * mappings of /dev/null, all processes see the same page full of
 227 * zeroes, and text pages of executables and shared libraries have
 228 * only one copy in memory, at most, normally.
 229 *
 230 * For the non-reserved pages, page->count denotes a reference count.
 231 *   page->count == 0 means the page is free.
 232 *   page->count == 1 means the page is used for exactly one purpose
 233 *   (e.g. a private data page of one process).
 234 *
 235 * A page may be used for kmalloc() or anyone else who does a
 236 * __get_free_page(). In this case the page->count is at least 1, and
 237 * all other fields are unused but should be 0 or NULL. The
 238 * management of this page is the responsibility of the one who uses
 239 * it.
 240 *
 241 * The other pages (we may call them "process pages") are completely
 242 * managed by the Linux memory manager: I/O, buffers, swapping etc.
 243 * The following discussion applies only to them.
 244 *
 245 * A page may belong to an inode's memory mapping. In this case,
 246 * page->inode is the pointer to the inode, and page->offset is the
 247 * file offset of the page (not necessarily a multiple of PAGE_SIZE).
 248 *
 249 * A page may have buffers allocated to it. In this case,
 250 * page->buffers is a circular list of these buffer heads. Else,
 251 * page->buffers == NULL.
 252 *
 253 * For pages belonging to inodes, the page->count is the number of
 254 * attaches, plus 1 if buffers are allocated to the page.
 255 *
 256 * All pages belonging to an inode make up a doubly linked list
 257 * inode->i_pages, using the fields page->next and page->prev. (These
 258 * fields are also used for freelist management when page->count==0.)
 259 * There is also a hash table mapping (inode,offset) to the page
 260 * in memory if present. The lists for this hash table use the fields
 261 * page->next_hash and page->pprev_hash.
 262 *
 263 * All process pages can do I/O:
 264 * - inode pages may need to be read from disk,
 265 * - inode pages which have been modified and are MAP_SHARED may need
 266 *   to be written to disk,
 267 * - private pages which have been modified may need to be swapped out
 268 *   to swap space and (later) to be read back into memory.
 269 * During disk I/O, PG_locked is used. This bit is set before I/O
 270 * and reset when I/O completes. page->wait is a wait queue of all
 271 * tasks waiting for the I/O on this page to complete.
 272 * PG_uptodate tells whether the page's contents is valid.
 273 * When a read completes, the page becomes uptodate, unless a disk I/O
 274 * error happened.
 275 *
 276 * For choosing which pages to swap out, inode pages carry a
 277 * PG_referenced bit, which is set any time the system accesses
 278 * that page through the (inode,offset) hash table.
 279 *
 280 * PG_skip is used on sparc/sparc64 architectures to "skip" certain
 281 * parts of the address space.
 282 *
 283 * PG_error is set to indicate that an I/O error occurred on this page.
 284 */
 285
 286extern mem_map_t * mem_map;
 287
 288/*
 289 * There is only one page-allocator function, and two main namespaces to
 290 * it. The alloc_page*() variants return 'struct page *' and as such
 291 * can allocate highmem pages, the *get*page*() variants return
 292 * virtual kernel addresses to the allocated page(s).
 293 */
 294extern struct page * FASTCALL(__alloc_pages(zonelist_t *zonelist, unsigned long order));
 295extern struct page * alloc_pages_node(int nid, int gfp_mask, unsigned long order);
 296
 297#ifndef CONFIG_DISCONTIGMEM
 298extern inline struct page * alloc_pages(int gfp_mask, unsigned long order)
 299{
 300        /*  temporary check. */
 301        if (contig_page_data.node_zonelists[gfp_mask].gfp_mask != (gfp_mask))
 302                BUG();
 303        /*
 304         * Gets optimized away by the compiler.
 305         */
 306        if (order >= MAX_ORDER)
 307                return NULL;
 308        return __alloc_pages(contig_page_data.node_zonelists+(gfp_mask), order);
 309}
 310#else /* !CONFIG_DISCONTIGMEM */
 311extern struct page * alloc_pages(int gfp_mask, unsigned long order);
 312#endif /* !CONFIG_DISCONTIGMEM */
 313
 314#define alloc_page(gfp_mask) \
 315                alloc_pages(gfp_mask, 0)
 316
 317extern inline unsigned long __get_free_pages (int gfp_mask, unsigned long order)
 318{
 319        struct page * page;
 320
 321        page = alloc_pages(gfp_mask, order);
 322        if (!page)
 323                return 0;
 324        return page_address(page);
 325}
 326
 327#define __get_free_page(gfp_mask) \
 328                __get_free_pages((gfp_mask),0)
 329
 330#define __get_dma_pages(gfp_mask, order) \
 331                __get_free_pages((gfp_mask) | GFP_DMA,(order))
 332
 333extern inline unsigned long get_zeroed_page(int gfp_mask)
 334{
 335        unsigned long page;
 336
 337        page = __get_free_page(gfp_mask);
 338        if (page)
 339                clear_page((void *)page);
 340        return page;
 341}
 342
 343/*
 344 * The old interface name will be removed in 2.5:
 345 */
 346#define get_free_page get_zeroed_page
 347
 348/*
 349 * There is only one 'core' page-freeing function.
 350 */
 351extern void FASTCALL(__free_pages_ok(struct page * page, unsigned long order));
 352
 353extern inline void __free_pages(struct page *page, unsigned long order)
 354{
 355        if (!put_page_testzero(page))
 356                return;
 357        __free_pages_ok(page, order);
 358}
 359
 360#define __free_page(page) __free_pages(page, 0)
 361
 362extern inline void free_pages(unsigned long addr, unsigned long order)
 363{
 364        unsigned long map_nr;
 365
 366#ifdef CONFIG_DISCONTIGMEM
 367        if (addr == 0) return;
 368#endif
 369        map_nr = MAP_NR(addr);
 370        if (map_nr < max_mapnr)
 371                __free_pages(mem_map + map_nr, order);
 372}
 373
 374#define free_page(addr) free_pages((addr),0)
 375
 376extern void show_free_areas(void);
 377extern void show_free_areas_node(int nid);
 378
 379extern void clear_page_tables(struct mm_struct *, unsigned long, int);
 380
 381extern int map_zero_setup(struct vm_area_struct *);
 382
 383extern void zap_page_range(struct mm_struct *mm, unsigned long address, unsigned long size);
 384extern int copy_page_range(struct mm_struct *dst, struct mm_struct *src, struct vm_area_struct *vma);
 385extern int remap_page_range(unsigned long from, unsigned long to, unsigned long size, pgprot_t prot);
 386extern int zeromap_page_range(unsigned long from, unsigned long size, pgprot_t prot);
 387
 388extern void vmtruncate(struct inode * inode, loff_t offset);
 389extern int handle_mm_fault(struct task_struct *tsk,struct vm_area_struct *vma, unsigned long address, int write_access);
 390extern int make_pages_present(unsigned long addr, unsigned long end);
 391extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write);
 392extern int ptrace_readdata(struct task_struct *tsk, unsigned long src, char *dst, int len);
 393extern int ptrace_writedata(struct task_struct *tsk, char * src, unsigned long dst, int len);
 394
 395extern int pgt_cache_water[2];
 396extern int check_pgt_cache(void);
 397
 398extern void free_area_init(unsigned long * zones_size);
 399extern void free_area_init_node(int nid, pg_data_t *pgdat, 
 400                unsigned long * zones_size, unsigned long zone_start_paddr);
 401extern void mem_init(void);
 402extern void show_mem(void);
 403extern void si_meminfo(struct sysinfo * val);
 404extern void swapin_readahead(swp_entry_t);
 405
 406/* mmap.c */
 407extern void vma_init(void);
 408extern void merge_segments(struct mm_struct *, unsigned long, unsigned long);
 409extern void insert_vm_struct(struct mm_struct *, struct vm_area_struct *);
 410extern void build_mmap_avl(struct mm_struct *);
 411extern void exit_mmap(struct mm_struct *);
 412extern unsigned long get_unmapped_area(unsigned long, unsigned long);
 413
 414extern unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
 415        unsigned long len, unsigned long prot,
 416        unsigned long flag, unsigned long pgoff);
 417
 418extern inline unsigned long do_mmap(struct file *file, unsigned long addr,
 419        unsigned long len, unsigned long prot,
 420        unsigned long flag, unsigned long offset)
 421{
 422        unsigned long ret = -EINVAL;
 423        if ((offset + PAGE_ALIGN(len)) < offset)
 424                goto out;
 425        if (!(offset & ~PAGE_MASK))
 426                ret = do_mmap_pgoff(file, addr, len, prot, flag, offset >> PAGE_SHIFT);
 427out:
 428        return ret;
 429}
 430
 431extern int do_munmap(unsigned long, size_t);
 432extern unsigned long do_brk(unsigned long, unsigned long);
 433
 434struct zone_t;
 435/* filemap.c */
 436extern void remove_inode_page(struct page *);
 437extern unsigned long page_unuse(struct page *);
 438extern int shrink_mmap(int, int, zone_t *);
 439extern void truncate_inode_pages(struct address_space *, loff_t);
 440
 441/* generic vm_area_ops exported for stackable file systems */
 442extern int filemap_swapout(struct page * page, struct file *file);
 443extern pte_t filemap_swapin(struct vm_area_struct * vma,
 444                            unsigned long offset, unsigned long entry);
 445extern int filemap_sync(struct vm_area_struct * vma, unsigned long address,
 446                        size_t size, unsigned int flags);
 447extern struct page *filemap_nopage(struct vm_area_struct * area,
 448                                    unsigned long address, int no_share);
 449
 450/*
 451 * GFP bitmasks..
 452 */
 453#define __GFP_WAIT      0x01
 454#define __GFP_HIGH      0x02
 455#define __GFP_IO        0x04
 456#define __GFP_DMA       0x08
 457#ifdef CONFIG_HIGHMEM
 458#define __GFP_HIGHMEM   0x10
 459#else
 460#define __GFP_HIGHMEM   0x0 /* noop */
 461#endif
 462
 463
 464#define GFP_BUFFER      (__GFP_HIGH | __GFP_WAIT)
 465#define GFP_ATOMIC      (__GFP_HIGH)
 466#define GFP_USER        (__GFP_WAIT | __GFP_IO)
 467#define GFP_HIGHUSER    (GFP_USER | __GFP_HIGHMEM)
 468#define GFP_KERNEL      (__GFP_HIGH | __GFP_WAIT | __GFP_IO)
 469#define GFP_NFS         (__GFP_HIGH | __GFP_WAIT | __GFP_IO)
 470#define GFP_KSWAPD      (__GFP_IO)
 471
 472/* Flag - indicates that the buffer will be suitable for DMA.  Ignored on some
 473   platforms, used as appropriate on others */
 474
 475#define GFP_DMA         __GFP_DMA
 476
 477/* Flag - indicates that the buffer can be taken from high memory which is not
 478   permanently mapped by the kernel */
 479
 480#define GFP_HIGHMEM     __GFP_HIGHMEM
 481
 482/* vma is the first one with  address < vma->vm_end,
 483 * and even  address < vma->vm_start. Have to extend vma. */
 484static inline int expand_stack(struct vm_area_struct * vma, unsigned long address)
 485{
 486        unsigned long grow;
 487
 488        address &= PAGE_MASK;
 489        grow = (vma->vm_start - address) >> PAGE_SHIFT;
 490        if (vma->vm_end - address > current->rlim[RLIMIT_STACK].rlim_cur ||
 491            ((vma->vm_mm->total_vm + grow) << PAGE_SHIFT) > current->rlim[RLIMIT_AS].rlim_cur)
 492                return -ENOMEM;
 493        vma->vm_start = address;
 494        vma->vm_pgoff -= grow;
 495        vma->vm_mm->total_vm += grow;
 496        if (vma->vm_flags & VM_LOCKED)
 497                vma->vm_mm->locked_vm += grow;
 498        return 0;
 499}
 500
 501/* Look up the first VMA which satisfies  addr < vm_end,  NULL if none. */
 502extern struct vm_area_struct * find_vma(struct mm_struct * mm, unsigned long addr);
 503extern struct vm_area_struct * find_vma_prev(struct mm_struct * mm, unsigned long addr,
 504                                             struct vm_area_struct **pprev);
 505
 506/* Look up the first VMA which intersects the interval start_addr..end_addr-1,
 507   NULL if none.  Assume start_addr < end_addr. */
 508static inline struct vm_area_struct * find_vma_intersection(struct mm_struct * mm, unsigned long start_addr, unsigned long end_addr)
 509{
 510        struct vm_area_struct * vma = find_vma(mm,start_addr);
 511
 512        if (vma && end_addr <= vma->vm_start)
 513                vma = NULL;
 514        return vma;
 515}
 516
 517extern struct vm_area_struct *find_extend_vma(struct task_struct *tsk, unsigned long addr);
 518
 519#define buffer_under_min()      (atomic_read(&buffermem_pages) * 100 < \
 520                                buffer_mem.min_percent * num_physpages)
 521#define pgcache_under_min()     (atomic_read(&page_cache_size) * 100 < \
 522                                page_cache.min_percent * num_physpages)
 523
 524#define vmlist_access_lock(mm)          spin_lock(&mm->page_table_lock)
 525#define vmlist_access_unlock(mm)        spin_unlock(&mm->page_table_lock)
 526#define vmlist_modify_lock(mm)          vmlist_access_lock(mm)
 527#define vmlist_modify_unlock(mm)        vmlist_access_unlock(mm)
 528
 529#endif /* __KERNEL__ */
 530
 531#endif
 532
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.