linux/mm/swap.c
<<
>>
Prefs
   1/*
   2 *  linux/mm/swap.c
   3 *
   4 *  Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
   5 */
   6
   7/*
   8 * This file contains the default values for the opereation of the
   9 * Linux VM subsystem. Fine-tuning documentation can be found in
  10 * Documentation/sysctl/vm.txt.
  11 * Started 18.12.91
  12 * Swap aging added 23.2.95, Stephen Tweedie.
  13 * Buffermem limits added 12.3.98, Rik van Riel.
  14 */
  15
  16#include <linux/mm.h>
  17#include <linux/sched.h>
  18#include <linux/kernel_stat.h>
  19#include <linux/swap.h>
  20#include <linux/mman.h>
  21#include <linux/pagemap.h>
  22#include <linux/pagevec.h>
  23#include <linux/init.h>
  24#include <linux/module.h>
  25#include <linux/mm_inline.h>
  26#include <linux/buffer_head.h>  /* for try_to_release_page() */
  27#include <linux/module.h>
  28#include <linux/percpu_counter.h>
  29#include <linux/percpu.h>
  30#include <linux/cpu.h>
  31#include <linux/notifier.h>
  32#include <linux/init.h>
  33
  34/* How many pages do we try to swap or page in/out together? */
  35int page_cluster;
  36
  37/*
  38 * This path almost never happens for VM activity - pages are normally
  39 * freed via pagevecs.  But it gets used by networking.
  40 */
  41static void fastcall __page_cache_release(struct page *page)
  42{
  43        if (PageLRU(page)) {
  44                unsigned long flags;
  45                struct zone *zone = page_zone(page);
  46
  47                spin_lock_irqsave(&zone->lru_lock, flags);
  48                VM_BUG_ON(!PageLRU(page));
  49                __ClearPageLRU(page);
  50                del_page_from_lru(zone, page);
  51                spin_unlock_irqrestore(&zone->lru_lock, flags);
  52        }
  53        free_hot_page(page);
  54}
  55
  56static void put_compound_page(struct page *page)
  57{
  58        page = compound_head(page);
  59        if (put_page_testzero(page)) {
  60                compound_page_dtor *dtor;
  61
  62                dtor = get_compound_page_dtor(page);
  63                (*dtor)(page);
  64        }
  65}
  66
  67void put_page(struct page *page)
  68{
  69        if (unlikely(PageCompound(page)))
  70                put_compound_page(page);
  71        else if (put_page_testzero(page))
  72                __page_cache_release(page);
  73}
  74EXPORT_SYMBOL(put_page);
  75
  76/**
  77 * put_pages_list(): release a list of pages
  78 *
  79 * Release a list of pages which are strung together on page.lru.  Currently
  80 * used by read_cache_pages() and related error recovery code.
  81 *
  82 * @pages: list of pages threaded on page->lru
  83 */
  84void put_pages_list(struct list_head *pages)
  85{
  86        while (!list_empty(pages)) {
  87                struct page *victim;
  88
  89                victim = list_entry(pages->prev, struct page, lru);
  90                list_del(&victim->lru);
  91                page_cache_release(victim);
  92        }
  93}
  94EXPORT_SYMBOL(put_pages_list);
  95
  96/*
  97 * Writeback is about to end against a page which has been marked for immediate
  98 * reclaim.  If it still appears to be reclaimable, move it to the tail of the
  99 * inactive list.  The page still has PageWriteback set, which will pin it.
 100 *
 101 * We don't expect many pages to come through here, so don't bother batching
 102 * things up.
 103 *
 104 * To avoid placing the page at the tail of the LRU while PG_writeback is still
 105 * set, this function will clear PG_writeback before performing the page
 106 * motion.  Do that inside the lru lock because once PG_writeback is cleared
 107 * we may not touch the page.
 108 *
 109 * Returns zero if it cleared PG_writeback.
 110 */
 111int rotate_reclaimable_page(struct page *page)
 112{
 113        struct zone *zone;
 114        unsigned long flags;
 115
 116        if (PageLocked(page))
 117                return 1;
 118        if (PageDirty(page))
 119                return 1;
 120        if (PageActive(page))
 121                return 1;
 122        if (!PageLRU(page))
 123                return 1;
 124
 125        zone = page_zone(page);
 126        spin_lock_irqsave(&zone->lru_lock, flags);
 127        if (PageLRU(page) && !PageActive(page)) {
 128                list_move_tail(&page->lru, &zone->inactive_list);
 129                __count_vm_event(PGROTATED);
 130        }
 131        if (!test_clear_page_writeback(page))
 132                BUG();
 133        spin_unlock_irqrestore(&zone->lru_lock, flags);
 134        return 0;
 135}
 136
 137/*
 138 * FIXME: speed this up?
 139 */
 140void fastcall activate_page(struct page *page)
 141{
 142        struct zone *zone = page_zone(page);
 143
 144        spin_lock_irq(&zone->lru_lock);
 145        if (PageLRU(page) && !PageActive(page)) {
 146                del_page_from_inactive_list(zone, page);
 147                SetPageActive(page);
 148                add_page_to_active_list(zone, page);
 149                __count_vm_event(PGACTIVATE);
 150        }
 151        spin_unlock_irq(&zone->lru_lock);
 152}
 153
 154/*
 155 * Mark a page as having seen activity.
 156 *
 157 * inactive,unreferenced        ->      inactive,referenced
 158 * inactive,referenced          ->      active,unreferenced
 159 * active,unreferenced          ->      active,referenced
 160 */
 161void fastcall mark_page_accessed(struct page *page)
 162{
 163        if (!PageActive(page) && PageReferenced(page) && PageLRU(page)) {
 164                activate_page(page);
 165                ClearPageReferenced(page);
 166        } else if (!PageReferenced(page)) {
 167                SetPageReferenced(page);
 168        }
 169}
 170
 171EXPORT_SYMBOL(mark_page_accessed);
 172
 173/**
 174 * lru_cache_add: add a page to the page lists
 175 * @page: the page to add
 176 */
 177static DEFINE_PER_CPU(struct pagevec, lru_add_pvecs) = { 0, };
 178static DEFINE_PER_CPU(struct pagevec, lru_add_active_pvecs) = { 0, };
 179
 180void fastcall lru_cache_add(struct page *page)
 181{
 182        struct pagevec *pvec = &get_cpu_var(lru_add_pvecs);
 183
 184        page_cache_get(page);
 185        if (!pagevec_add(pvec, page))
 186                __pagevec_lru_add(pvec);
 187        put_cpu_var(lru_add_pvecs);
 188}
 189
 190void fastcall lru_cache_add_active(struct page *page)
 191{
 192        struct pagevec *pvec = &get_cpu_var(lru_add_active_pvecs);
 193
 194        page_cache_get(page);
 195        if (!pagevec_add(pvec, page))
 196                __pagevec_lru_add_active(pvec);
 197        put_cpu_var(lru_add_active_pvecs);
 198}
 199
 200static void __lru_add_drain(int cpu)
 201{
 202        struct pagevec *pvec = &per_cpu(lru_add_pvecs, cpu);
 203
 204        /* CPU is dead, so no locking needed. */
 205        if (pagevec_count(pvec))
 206                __pagevec_lru_add(pvec);
 207        pvec = &per_cpu(lru_add_active_pvecs, cpu);
 208        if (pagevec_count(pvec))
 209                __pagevec_lru_add_active(pvec);
 210}
 211
 212void lru_add_drain(void)
 213{
 214        __lru_add_drain(get_cpu());
 215        put_cpu();
 216}
 217
 218#ifdef CONFIG_NUMA
 219static void lru_add_drain_per_cpu(struct work_struct *dummy)
 220{
 221        lru_add_drain();
 222}
 223
 224/*
 225 * Returns 0 for success
 226 */
 227int lru_add_drain_all(void)
 228{
 229        return schedule_on_each_cpu(lru_add_drain_per_cpu);
 230}
 231
 232#else
 233
 234/*
 235 * Returns 0 for success
 236 */
 237int lru_add_drain_all(void)
 238{
 239        lru_add_drain();
 240        return 0;
 241}
 242#endif
 243
 244/*
 245 * Batched page_cache_release().  Decrement the reference count on all the
 246 * passed pages.  If it fell to zero then remove the page from the LRU and
 247 * free it.
 248 *
 249 * Avoid taking zone->lru_lock if possible, but if it is taken, retain it
 250 * for the remainder of the operation.
 251 *
 252 * The locking in this function is against shrink_cache(): we recheck the
 253 * page count inside the lock to see whether shrink_cache grabbed the page
 254 * via the LRU.  If it did, give up: shrink_cache will free it.
 255 */
 256void release_pages(struct page **pages, int nr, int cold)
 257{
 258        int i;
 259        struct pagevec pages_to_free;
 260        struct zone *zone = NULL;
 261
 262        pagevec_init(&pages_to_free, cold);
 263        for (i = 0; i < nr; i++) {
 264                struct page *page = pages[i];
 265
 266                if (unlikely(PageCompound(page))) {
 267                        if (zone) {
 268                                spin_unlock_irq(&zone->lru_lock);
 269                                zone = NULL;
 270                        }
 271                        put_compound_page(page);
 272                        continue;
 273                }
 274
 275                if (!put_page_testzero(page))
 276                        continue;
 277
 278                if (PageLRU(page)) {
 279                        struct zone *pagezone = page_zone(page);
 280                        if (pagezone != zone) {
 281                                if (zone)
 282                                        spin_unlock_irq(&zone->lru_lock);
 283                                zone = pagezone;
 284                                spin_lock_irq(&zone->lru_lock);
 285                        }
 286                        VM_BUG_ON(!PageLRU(page));
 287                        __ClearPageLRU(page);
 288                        del_page_from_lru(zone, page);
 289                }
 290
 291                if (!pagevec_add(&pages_to_free, page)) {
 292                        if (zone) {
 293                                spin_unlock_irq(&zone->lru_lock);
 294                                zone = NULL;
 295                        }
 296                        __pagevec_free(&pages_to_free);
 297                        pagevec_reinit(&pages_to_free);
 298                }
 299        }
 300        if (zone)
 301                spin_unlock_irq(&zone->lru_lock);
 302
 303        pagevec_free(&pages_to_free);
 304}
 305
 306/*
 307 * The pages which we're about to release may be in the deferred lru-addition
 308 * queues.  That would prevent them from really being freed right now.  That's
 309 * OK from a correctness point of view but is inefficient - those pages may be
 310 * cache-warm and we want to give them back to the page allocator ASAP.
 311 *
 312 * So __pagevec_release() will drain those queues here.  __pagevec_lru_add()
 313 * and __pagevec_lru_add_active() call release_pages() directly to avoid
 314 * mutual recursion.
 315 */
 316void __pagevec_release(struct pagevec *pvec)
 317{
 318        lru_add_drain();
 319        release_pages(pvec->pages, pagevec_count(pvec), pvec->cold);
 320        pagevec_reinit(pvec);
 321}
 322
 323EXPORT_SYMBOL(__pagevec_release);
 324
 325/*
 326 * pagevec_release() for pages which are known to not be on the LRU
 327 *
 328 * This function reinitialises the caller's pagevec.
 329 */
 330void __pagevec_release_nonlru(struct pagevec *pvec)
 331{
 332        int i;
 333        struct pagevec pages_to_free;
 334
 335        pagevec_init(&pages_to_free, pvec->cold);
 336        for (i = 0; i < pagevec_count(pvec); i++) {
 337                struct page *page = pvec->pages[i];
 338
 339                VM_BUG_ON(PageLRU(page));
 340                if (put_page_testzero(page))
 341                        pagevec_add(&pages_to_free, page);
 342        }
 343        pagevec_free(&pages_to_free);
 344        pagevec_reinit(pvec);
 345}
 346
 347/*
 348 * Add the passed pages to the LRU, then drop the caller's refcount
 349 * on them.  Reinitialises the caller's pagevec.
 350 */
 351void __pagevec_lru_add(struct pagevec *pvec)
 352{
 353        int i;
 354        struct zone *zone = NULL;
 355
 356        for (i = 0; i < pagevec_count(pvec); i++) {
 357                struct page *page = pvec->pages[i];
 358                struct zone *pagezone = page_zone(page);
 359
 360                if (pagezone != zone) {
 361                        if (zone)
 362                                spin_unlock_irq(&zone->lru_lock);
 363                        zone = pagezone;
 364                        spin_lock_irq(&zone->lru_lock);
 365                }
 366                VM_BUG_ON(PageLRU(page));
 367                SetPageLRU(page);
 368                add_page_to_inactive_list(zone, page);
 369        }
 370        if (zone)
 371                spin_unlock_irq(&zone->lru_lock);
 372        release_pages(pvec->pages, pvec->nr, pvec->cold);
 373        pagevec_reinit(pvec);
 374}
 375
 376EXPORT_SYMBOL(__pagevec_lru_add);
 377
 378void __pagevec_lru_add_active(struct pagevec *pvec)
 379{
 380        int i;
 381        struct zone *zone = NULL;
 382
 383        for (i = 0; i < pagevec_count(pvec); i++) {
 384                struct page *page = pvec->pages[i];
 385                struct zone *pagezone = page_zone(page);
 386
 387                if (pagezone != zone) {
 388                        if (zone)
 389                                spin_unlock_irq(&zone->lru_lock);
 390                        zone = pagezone;
 391                        spin_lock_irq(&zone->lru_lock);
 392                }
 393                VM_BUG_ON(PageLRU(page));
 394                SetPageLRU(page);
 395                VM_BUG_ON(PageActive(page));
 396                SetPageActive(page);
 397                add_page_to_active_list(zone, page);
 398        }
 399        if (zone)
 400                spin_unlock_irq(&zone->lru_lock);
 401        release_pages(pvec->pages, pvec->nr, pvec->cold);
 402        pagevec_reinit(pvec);
 403}
 404
 405/*
 406 * Try to drop buffers from the pages in a pagevec
 407 */
 408void pagevec_strip(struct pagevec *pvec)
 409{
 410        int i;
 411
 412        for (i = 0; i < pagevec_count(pvec); i++) {
 413                struct page *page = pvec->pages[i];
 414
 415                if (PagePrivate(page) && !TestSetPageLocked(page)) {
 416                        if (PagePrivate(page))
 417                                try_to_release_page(page, 0);
 418                        unlock_page(page);
 419                }
 420        }
 421}
 422
 423/**
 424 * pagevec_lookup - gang pagecache lookup
 425 * @pvec:       Where the resulting pages are placed
 426 * @mapping:    The address_space to search
 427 * @start:      The starting page index
 428 * @nr_pages:   The maximum number of pages
 429 *
 430 * pagevec_lookup() will search for and return a group of up to @nr_pages pages
 431 * in the mapping.  The pages are placed in @pvec.  pagevec_lookup() takes a
 432 * reference against the pages in @pvec.
 433 *
 434 * The search returns a group of mapping-contiguous pages with ascending
 435 * indexes.  There may be holes in the indices due to not-present pages.
 436 *
 437 * pagevec_lookup() returns the number of pages which were found.
 438 */
 439unsigned pagevec_lookup(struct pagevec *pvec, struct address_space *mapping,
 440                pgoff_t start, unsigned nr_pages)
 441{
 442        pvec->nr = find_get_pages(mapping, start, nr_pages, pvec->pages);
 443        return pagevec_count(pvec);
 444}
 445
 446EXPORT_SYMBOL(pagevec_lookup);
 447
 448unsigned pagevec_lookup_tag(struct pagevec *pvec, struct address_space *mapping,
 449                pgoff_t *index, int tag, unsigned nr_pages)
 450{
 451        pvec->nr = find_get_pages_tag(mapping, index, tag,
 452                                        nr_pages, pvec->pages);
 453        return pagevec_count(pvec);
 454}
 455
 456EXPORT_SYMBOL(pagevec_lookup_tag);
 457
 458#ifdef CONFIG_SMP
 459/*
 460 * We tolerate a little inaccuracy to avoid ping-ponging the counter between
 461 * CPUs
 462 */
 463#define ACCT_THRESHOLD  max(16, NR_CPUS * 2)
 464
 465static DEFINE_PER_CPU(long, committed_space) = 0;
 466
 467void vm_acct_memory(long pages)
 468{
 469        long *local;
 470
 471        preempt_disable();
 472        local = &__get_cpu_var(committed_space);
 473        *local += pages;
 474        if (*local > ACCT_THRESHOLD || *local < -ACCT_THRESHOLD) {
 475                atomic_add(*local, &vm_committed_space);
 476                *local = 0;
 477        }
 478        preempt_enable();
 479}
 480
 481#ifdef CONFIG_HOTPLUG_CPU
 482
 483/* Drop the CPU's cached committed space back into the central pool. */
 484static int cpu_swap_callback(struct notifier_block *nfb,
 485                             unsigned long action,
 486                             void *hcpu)
 487{
 488        long *committed;
 489
 490        committed = &per_cpu(committed_space, (long)hcpu);
 491        if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
 492                atomic_add(*committed, &vm_committed_space);
 493                *committed = 0;
 494                __lru_add_drain((long)hcpu);
 495        }
 496        return NOTIFY_OK;
 497}
 498#endif /* CONFIG_HOTPLUG_CPU */
 499#endif /* CONFIG_SMP */
 500
 501/*
 502 * Perform any setup for the swap system
 503 */
 504void __init swap_setup(void)
 505{
 506        unsigned long megs = num_physpages >> (20 - PAGE_SHIFT);
 507
 508        /* Use a smaller cluster for small-memory machines */
 509        if (megs < 16)
 510                page_cluster = 2;
 511        else
 512                page_cluster = 3;
 513        /*
 514         * Right now other parts of the system means that we
 515         * _really_ don't want to cluster much more
 516         */
 517#ifdef CONFIG_HOTPLUG_CPU
 518        hotcpu_notifier(cpu_swap_callback, 0);
 519#endif
 520}
 521
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.