linux/mm/vmstat.c
<<
>>
Prefs
   1/*
   2 *  linux/mm/vmstat.c
   3 *
   4 *  Manages VM statistics
   5 *  Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
   6 *
   7 *  zoned VM statistics
   8 *  Copyright (C) 2006 Silicon Graphics, Inc.,
   9 *              Christoph Lameter <christoph@lameter.com>
  10 */
  11#include <linux/fs.h>
  12#include <linux/mm.h>
  13#include <linux/err.h>
  14#include <linux/module.h>
  15#include <linux/cpu.h>
  16#include <linux/vmstat.h>
  17#include <linux/sched.h>
  18
  19#ifdef CONFIG_VM_EVENT_COUNTERS
  20DEFINE_PER_CPU(struct vm_event_state, vm_event_states) = {{0}};
  21EXPORT_PER_CPU_SYMBOL(vm_event_states);
  22
  23static void sum_vm_events(unsigned long *ret, const struct cpumask *cpumask)
  24{
  25        int cpu;
  26        int i;
  27
  28        memset(ret, 0, NR_VM_EVENT_ITEMS * sizeof(unsigned long));
  29
  30        for_each_cpu_mask_nr(cpu, *cpumask) {
  31                struct vm_event_state *this = &per_cpu(vm_event_states, cpu);
  32
  33                for (i = 0; i < NR_VM_EVENT_ITEMS; i++)
  34                        ret[i] += this->event[i];
  35        }
  36}
  37
  38/*
  39 * Accumulate the vm event counters across all CPUs.
  40 * The result is unavoidably approximate - it can change
  41 * during and after execution of this function.
  42*/
  43void all_vm_events(unsigned long *ret)
  44{
  45        get_online_cpus();
  46        sum_vm_events(ret, cpu_online_mask);
  47        put_online_cpus();
  48}
  49EXPORT_SYMBOL_GPL(all_vm_events);
  50
  51#ifdef CONFIG_HOTPLUG
  52/*
  53 * Fold the foreign cpu events into our own.
  54 *
  55 * This is adding to the events on one processor
  56 * but keeps the global counts constant.
  57 */
  58void vm_events_fold_cpu(int cpu)
  59{
  60        struct vm_event_state *fold_state = &per_cpu(vm_event_states, cpu);
  61        int i;
  62
  63        for (i = 0; i < NR_VM_EVENT_ITEMS; i++) {
  64                count_vm_events(i, fold_state->event[i]);
  65                fold_state->event[i] = 0;
  66        }
  67}
  68#endif /* CONFIG_HOTPLUG */
  69
  70#endif /* CONFIG_VM_EVENT_COUNTERS */
  71
  72/*
  73 * Manage combined zone based / global counters
  74 *
  75 * vm_stat contains the global counters
  76 */
  77atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS];
  78EXPORT_SYMBOL(vm_stat);
  79
  80#ifdef CONFIG_SMP
  81
  82static int calculate_threshold(struct zone *zone)
  83{
  84        int threshold;
  85        int mem;        /* memory in 128 MB units */
  86
  87        /*
  88         * The threshold scales with the number of processors and the amount
  89         * of memory per zone. More memory means that we can defer updates for
  90         * longer, more processors could lead to more contention.
  91         * fls() is used to have a cheap way of logarithmic scaling.
  92         *
  93         * Some sample thresholds:
  94         *
  95         * Threshold    Processors      (fls)   Zonesize        fls(mem+1)
  96         * ------------------------------------------------------------------
  97         * 8            1               1       0.9-1 GB        4
  98         * 16           2               2       0.9-1 GB        4
  99         * 20           2               2       1-2 GB          5
 100         * 24           2               2       2-4 GB          6
 101         * 28           2               2       4-8 GB          7
 102         * 32           2               2       8-16 GB         8
 103         * 4            2               2       <128M           1
 104         * 30           4               3       2-4 GB          5
 105         * 48           4               3       8-16 GB         8
 106         * 32           8               4       1-2 GB          4
 107         * 32           8               4       0.9-1GB         4
 108         * 10           16              5       <128M           1
 109         * 40           16              5       900M            4
 110         * 70           64              7       2-4 GB          5
 111         * 84           64              7       4-8 GB          6
 112         * 108          512             9       4-8 GB          6
 113         * 125          1024            10      8-16 GB         8
 114         * 125          1024            10      16-32 GB        9
 115         */
 116
 117        mem = zone->present_pages >> (27 - PAGE_SHIFT);
 118
 119        threshold = 2 * fls(num_online_cpus()) * (1 + fls(mem));
 120
 121        /*
 122         * Maximum threshold is 125
 123         */
 124        threshold = min(125, threshold);
 125
 126        return threshold;
 127}
 128
 129/*
 130 * Refresh the thresholds for each zone.
 131 */
 132static void refresh_zone_stat_thresholds(void)
 133{
 134        struct zone *zone;
 135        int cpu;
 136        int threshold;
 137
 138        for_each_zone(zone) {
 139
 140                if (!zone->present_pages)
 141                        continue;
 142
 143                threshold = calculate_threshold(zone);
 144
 145                for_each_online_cpu(cpu)
 146                        zone_pcp(zone, cpu)->stat_threshold = threshold;
 147        }
 148}
 149
 150/*
 151 * For use when we know that interrupts are disabled.
 152 */
 153void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
 154                                int delta)
 155{
 156        struct per_cpu_pageset *pcp = zone_pcp(zone, smp_processor_id());
 157        s8 *p = pcp->vm_stat_diff + item;
 158        long x;
 159
 160        x = delta + *p;
 161
 162        if (unlikely(x > pcp->stat_threshold || x < -pcp->stat_threshold)) {
 163                zone_page_state_add(x, zone, item);
 164                x = 0;
 165        }
 166        *p = x;
 167}
 168EXPORT_SYMBOL(__mod_zone_page_state);
 169
 170/*
 171 * For an unknown interrupt state
 172 */
 173void mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
 174                                        int delta)
 175{
 176        unsigned long flags;
 177
 178        local_irq_save(flags);
 179        __mod_zone_page_state(zone, item, delta);
 180        local_irq_restore(flags);
 181}
 182EXPORT_SYMBOL(mod_zone_page_state);
 183
 184/*
 185 * Optimized increment and decrement functions.
 186 *
 187 * These are only for a single page and therefore can take a struct page *
 188 * argument instead of struct zone *. This allows the inclusion of the code
 189 * generated for page_zone(page) into the optimized functions.
 190 *
 191 * No overflow check is necessary and therefore the differential can be
 192 * incremented or decremented in place which may allow the compilers to
 193 * generate better code.
 194 * The increment or decrement is known and therefore one boundary check can
 195 * be omitted.
 196 *
 197 * NOTE: These functions are very performance sensitive. Change only
 198 * with care.
 199 *
 200 * Some processors have inc/dec instructions that are atomic vs an interrupt.
 201 * However, the code must first determine the differential location in a zone
 202 * based on the processor number and then inc/dec the counter. There is no
 203 * guarantee without disabling preemption that the processor will not change
 204 * in between and therefore the atomicity vs. interrupt cannot be exploited
 205 * in a useful way here.
 206 */
 207void __inc_zone_state(struct zone *zone, enum zone_stat_item item)
 208{
 209        struct per_cpu_pageset *pcp = zone_pcp(zone, smp_processor_id());
 210        s8 *p = pcp->vm_stat_diff + item;
 211
 212        (*p)++;
 213
 214        if (unlikely(*p > pcp->stat_threshold)) {
 215                int overstep = pcp->stat_threshold / 2;
 216
 217                zone_page_state_add(*p + overstep, zone, item);
 218                *p = -overstep;
 219        }
 220}
 221
 222void __inc_zone_page_state(struct page *page, enum zone_stat_item item)
 223{
 224        __inc_zone_state(page_zone(page), item);
 225}
 226EXPORT_SYMBOL(__inc_zone_page_state);
 227
 228void __dec_zone_state(struct zone *zone, enum zone_stat_item item)
 229{
 230        struct per_cpu_pageset *pcp = zone_pcp(zone, smp_processor_id());
 231        s8 *p = pcp->vm_stat_diff + item;
 232
 233        (*p)--;
 234
 235        if (unlikely(*p < - pcp->stat_threshold)) {
 236                int overstep = pcp->stat_threshold / 2;
 237
 238                zone_page_state_add(*p - overstep, zone, item);
 239                *p = overstep;
 240        }
 241}
 242
 243void __dec_zone_page_state(struct page *page, enum zone_stat_item item)
 244{
 245        __dec_zone_state(page_zone(page), item);
 246}
 247EXPORT_SYMBOL(__dec_zone_page_state);
 248
 249void inc_zone_state(struct zone *zone, enum zone_stat_item item)
 250{
 251        unsigned long flags;
 252
 253        local_irq_save(flags);
 254        __inc_zone_state(zone, item);
 255        local_irq_restore(flags);
 256}
 257
 258void inc_zone_page_state(struct page *page, enum zone_stat_item item)
 259{
 260        unsigned long flags;
 261        struct zone *zone;
 262
 263        zone = page_zone(page);
 264        local_irq_save(flags);
 265        __inc_zone_state(zone, item);
 266        local_irq_restore(flags);
 267}
 268EXPORT_SYMBOL(inc_zone_page_state);
 269
 270void dec_zone_page_state(struct page *page, enum zone_stat_item item)
 271{
 272        unsigned long flags;
 273
 274        local_irq_save(flags);
 275        __dec_zone_page_state(page, item);
 276        local_irq_restore(flags);
 277}
 278EXPORT_SYMBOL(dec_zone_page_state);
 279
 280/*
 281 * Update the zone counters for one cpu.
 282 *
 283 * The cpu specified must be either the current cpu or a processor that
 284 * is not online. If it is the current cpu then the execution thread must
 285 * be pinned to the current cpu.
 286 *
 287 * Note that refresh_cpu_vm_stats strives to only access
 288 * node local memory. The per cpu pagesets on remote zones are placed
 289 * in the memory local to the processor using that pageset. So the
 290 * loop over all zones will access a series of cachelines local to
 291 * the processor.
 292 *
 293 * The call to zone_page_state_add updates the cachelines with the
 294 * statistics in the remote zone struct as well as the global cachelines
 295 * with the global counters. These could cause remote node cache line
 296 * bouncing and will have to be only done when necessary.
 297 */
 298void refresh_cpu_vm_stats(int cpu)
 299{
 300        struct zone *zone;
 301        int i;
 302        int global_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, };
 303
 304        for_each_zone(zone) {
 305                struct per_cpu_pageset *p;
 306
 307                if (!populated_zone(zone))
 308                        continue;
 309
 310                p = zone_pcp(zone, cpu);
 311
 312                for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
 313                        if (p->vm_stat_diff[i]) {
 314                                unsigned long flags;
 315                                int v;
 316
 317                                local_irq_save(flags);
 318                                v = p->vm_stat_diff[i];
 319                                p->vm_stat_diff[i] = 0;
 320                                local_irq_restore(flags);
 321                                atomic_long_add(v, &zone->vm_stat[i]);
 322                                global_diff[i] += v;
 323#ifdef CONFIG_NUMA
 324                                /* 3 seconds idle till flush */
 325                                p->expire = 3;
 326#endif
 327                        }
 328                cond_resched();
 329#ifdef CONFIG_NUMA
 330                /*
 331                 * Deal with draining the remote pageset of this
 332                 * processor
 333                 *
 334                 * Check if there are pages remaining in this pageset
 335                 * if not then there is nothing to expire.
 336                 */
 337                if (!p->expire || !p->pcp.count)
 338                        continue;
 339
 340                /*
 341                 * We never drain zones local to this processor.
 342                 */
 343                if (zone_to_nid(zone) == numa_node_id()) {
 344                        p->expire = 0;
 345                        continue;
 346                }
 347
 348                p->expire--;
 349                if (p->expire)
 350                        continue;
 351
 352                if (p->pcp.count)
 353                        drain_zone_pages(zone, &p->pcp);
 354#endif
 355        }
 356
 357        for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
 358                if (global_diff[i])
 359                        atomic_long_add(global_diff[i], &vm_stat[i]);
 360}
 361
 362#endif
 363
 364#ifdef CONFIG_NUMA
 365/*
 366 * zonelist = the list of zones passed to the allocator
 367 * z        = the zone from which the allocation occurred.
 368 *
 369 * Must be called with interrupts disabled.
 370 */
 371void zone_statistics(struct zone *preferred_zone, struct zone *z)
 372{
 373        if (z->zone_pgdat == preferred_zone->zone_pgdat) {
 374                __inc_zone_state(z, NUMA_HIT);
 375        } else {
 376                __inc_zone_state(z, NUMA_MISS);
 377                __inc_zone_state(preferred_zone, NUMA_FOREIGN);
 378        }
 379        if (z->node == numa_node_id())
 380                __inc_zone_state(z, NUMA_LOCAL);
 381        else
 382                __inc_zone_state(z, NUMA_OTHER);
 383}
 384#endif
 385
 386#ifdef CONFIG_PROC_FS
 387#include <linux/proc_fs.h>
 388#include <linux/seq_file.h>
 389
 390static char * const migratetype_names[MIGRATE_TYPES] = {
 391        "Unmovable",
 392        "Reclaimable",
 393        "Movable",
 394        "Reserve",
 395        "Isolate",
 396};
 397
 398static void *frag_start(struct seq_file *m, loff_t *pos)
 399{
 400        pg_data_t *pgdat;
 401        loff_t node = *pos;
 402        for (pgdat = first_online_pgdat();
 403             pgdat && node;
 404             pgdat = next_online_pgdat(pgdat))
 405                --node;
 406
 407        return pgdat;
 408}
 409
 410static void *frag_next(struct seq_file *m, void *arg, loff_t *pos)
 411{
 412        pg_data_t *pgdat = (pg_data_t *)arg;
 413
 414        (*pos)++;
 415        return next_online_pgdat(pgdat);
 416}
 417
 418static void frag_stop(struct seq_file *m, void *arg)
 419{
 420}
 421
 422/* Walk all the zones in a node and print using a callback */
 423static void walk_zones_in_node(struct seq_file *m, pg_data_t *pgdat,
 424                void (*print)(struct seq_file *m, pg_data_t *, struct zone *))
 425{
 426        struct zone *zone;
 427        struct zone *node_zones = pgdat->node_zones;
 428        unsigned long flags;
 429
 430        for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) {
 431                if (!populated_zone(zone))
 432                        continue;
 433
 434                spin_lock_irqsave(&zone->lock, flags);
 435                print(m, pgdat, zone);
 436                spin_unlock_irqrestore(&zone->lock, flags);
 437        }
 438}
 439
 440static void frag_show_print(struct seq_file *m, pg_data_t *pgdat,
 441                                                struct zone *zone)
 442{
 443        int order;
 444
 445        seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name);
 446        for (order = 0; order < MAX_ORDER; ++order)
 447                seq_printf(m, "%6lu ", zone->free_area[order].nr_free);
 448        seq_putc(m, '\n');
 449}
 450
 451/*
 452 * This walks the free areas for each zone.
 453 */
 454static int frag_show(struct seq_file *m, void *arg)
 455{
 456        pg_data_t *pgdat = (pg_data_t *)arg;
 457        walk_zones_in_node(m, pgdat, frag_show_print);
 458        return 0;
 459}
 460
 461static void pagetypeinfo_showfree_print(struct seq_file *m,
 462                                        pg_data_t *pgdat, struct zone *zone)
 463{
 464        int order, mtype;
 465
 466        for (mtype = 0; mtype < MIGRATE_TYPES; mtype++) {
 467                seq_printf(m, "Node %4d, zone %8s, type %12s ",
 468                                        pgdat->node_id,
 469                                        zone->name,
 470                                        migratetype_names[mtype]);
 471                for (order = 0; order < MAX_ORDER; ++order) {
 472                        unsigned long freecount = 0;
 473                        struct free_area *area;
 474                        struct list_head *curr;
 475
 476                        area = &(zone->free_area[order]);
 477
 478                        list_for_each(curr, &area->free_list[mtype])
 479                                freecount++;
 480                        seq_printf(m, "%6lu ", freecount);
 481                }
 482                seq_putc(m, '\n');
 483        }
 484}
 485
 486/* Print out the free pages at each order for each migatetype */
 487static int pagetypeinfo_showfree(struct seq_file *m, void *arg)
 488{
 489        int order;
 490        pg_data_t *pgdat = (pg_data_t *)arg;
 491
 492        /* Print header */
 493        seq_printf(m, "%-43s ", "Free pages count per migrate type at order");
 494        for (order = 0; order < MAX_ORDER; ++order)
 495                seq_printf(m, "%6d ", order);
 496        seq_putc(m, '\n');
 497
 498        walk_zones_in_node(m, pgdat, pagetypeinfo_showfree_print);
 499
 500        return 0;
 501}
 502
 503static void pagetypeinfo_showblockcount_print(struct seq_file *m,
 504                                        pg_data_t *pgdat, struct zone *zone)
 505{
 506        int mtype;
 507        unsigned long pfn;
 508        unsigned long start_pfn = zone->zone_start_pfn;
 509        unsigned long end_pfn = start_pfn + zone->spanned_pages;
 510        unsigned long count[MIGRATE_TYPES] = { 0, };
 511
 512        for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) {
 513                struct page *page;
 514
 515                if (!pfn_valid(pfn))
 516                        continue;
 517
 518                page = pfn_to_page(pfn);
 519#ifdef CONFIG_ARCH_FLATMEM_HAS_HOLES
 520                /*
 521                 * Ordinarily, memory holes in flatmem still have a valid
 522                 * memmap for the PFN range. However, an architecture for
 523                 * embedded systems (e.g. ARM) can free up the memmap backing
 524                 * holes to save memory on the assumption the memmap is
 525                 * never used. The page_zone linkages are then broken even
 526                 * though pfn_valid() returns true. Skip the page if the
 527                 * linkages are broken. Even if this test passed, the impact
 528                 * is that the counters for the movable type are off but
 529                 * fragmentation monitoring is likely meaningless on small
 530                 * systems.
 531                 */
 532                if (page_zone(page) != zone)
 533                        continue;
 534#endif
 535                mtype = get_pageblock_migratetype(page);
 536
 537                if (mtype < MIGRATE_TYPES)
 538                        count[mtype]++;
 539        }
 540
 541        /* Print counts */
 542        seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name);
 543        for (mtype = 0; mtype < MIGRATE_TYPES; mtype++)
 544                seq_printf(m, "%12lu ", count[mtype]);
 545        seq_putc(m, '\n');
 546}
 547
 548/* Print out the free pages at each order for each migratetype */
 549static int pagetypeinfo_showblockcount(struct seq_file *m, void *arg)
 550{
 551        int mtype;
 552        pg_data_t *pgdat = (pg_data_t *)arg;
 553
 554        seq_printf(m, "\n%-23s", "Number of blocks type ");
 555        for (mtype = 0; mtype < MIGRATE_TYPES; mtype++)
 556                seq_printf(m, "%12s ", migratetype_names[mtype]);
 557        seq_putc(m, '\n');
 558        walk_zones_in_node(m, pgdat, pagetypeinfo_showblockcount_print);
 559
 560        return 0;
 561}
 562
 563/*
 564 * This prints out statistics in relation to grouping pages by mobility.
 565 * It is expensive to collect so do not constantly read the file.
 566 */
 567static int pagetypeinfo_show(struct seq_file *m, void *arg)
 568{
 569        pg_data_t *pgdat = (pg_data_t *)arg;
 570
 571        /* check memoryless node */
 572        if (!node_state(pgdat->node_id, N_HIGH_MEMORY))
 573                return 0;
 574
 575        seq_printf(m, "Page block order: %d\n", pageblock_order);
 576        seq_printf(m, "Pages per block:  %lu\n", pageblock_nr_pages);
 577        seq_putc(m, '\n');
 578        pagetypeinfo_showfree(m, pgdat);
 579        pagetypeinfo_showblockcount(m, pgdat);
 580
 581        return 0;
 582}
 583
 584static const struct seq_operations fragmentation_op = {
 585        .start  = frag_start,
 586        .next   = frag_next,
 587        .stop   = frag_stop,
 588        .show   = frag_show,
 589};
 590
 591static int fragmentation_open(struct inode *inode, struct file *file)
 592{
 593        return seq_open(file, &fragmentation_op);
 594}
 595
 596static const struct file_operations fragmentation_file_operations = {
 597        .open           = fragmentation_open,
 598        .read           = seq_read,
 599        .llseek         = seq_lseek,
 600        .release        = seq_release,
 601};
 602
 603static const struct seq_operations pagetypeinfo_op = {
 604        .start  = frag_start,
 605        .next   = frag_next,
 606        .stop   = frag_stop,
 607        .show   = pagetypeinfo_show,
 608};
 609
 610static int pagetypeinfo_open(struct inode *inode, struct file *file)
 611{
 612        return seq_open(file, &pagetypeinfo_op);
 613}
 614
 615static const struct file_operations pagetypeinfo_file_ops = {
 616        .open           = pagetypeinfo_open,
 617        .read           = seq_read,
 618        .llseek         = seq_lseek,
 619        .release        = seq_release,
 620};
 621
 622#ifdef CONFIG_ZONE_DMA
 623#define TEXT_FOR_DMA(xx) xx "_dma",
 624#else
 625#define TEXT_FOR_DMA(xx)
 626#endif
 627
 628#ifdef CONFIG_ZONE_DMA32
 629#define TEXT_FOR_DMA32(xx) xx "_dma32",
 630#else
 631#define TEXT_FOR_DMA32(xx)
 632#endif
 633
 634#ifdef CONFIG_HIGHMEM
 635#define TEXT_FOR_HIGHMEM(xx) xx "_high",
 636#else
 637#define TEXT_FOR_HIGHMEM(xx)
 638#endif
 639
 640#define TEXTS_FOR_ZONES(xx) TEXT_FOR_DMA(xx) TEXT_FOR_DMA32(xx) xx "_normal", \
 641                                        TEXT_FOR_HIGHMEM(xx) xx "_movable",
 642
 643static const char * const vmstat_text[] = {
 644        /* Zoned VM counters */
 645        "nr_free_pages",
 646        "nr_inactive_anon",
 647        "nr_active_anon",
 648        "nr_inactive_file",
 649        "nr_active_file",
 650#ifdef CONFIG_UNEVICTABLE_LRU
 651        "nr_unevictable",
 652        "nr_mlock",
 653#endif
 654        "nr_anon_pages",
 655        "nr_mapped",
 656        "nr_file_pages",
 657        "nr_dirty",
 658        "nr_writeback",
 659        "nr_slab_reclaimable",
 660        "nr_slab_unreclaimable",
 661        "nr_page_table_pages",
 662        "nr_unstable",
 663        "nr_bounce",
 664        "nr_vmscan_write",
 665        "nr_writeback_temp",
 666
 667#ifdef CONFIG_NUMA
 668        "numa_hit",
 669        "numa_miss",
 670        "numa_foreign",
 671        "numa_interleave",
 672        "numa_local",
 673        "numa_other",
 674#endif
 675
 676#ifdef CONFIG_VM_EVENT_COUNTERS
 677        "pgpgin",
 678        "pgpgout",
 679        "pswpin",
 680        "pswpout",
 681
 682        TEXTS_FOR_ZONES("pgalloc")
 683
 684        "pgfree",
 685        "pgactivate",
 686        "pgdeactivate",
 687
 688        "pgfault",
 689        "pgmajfault",
 690
 691        TEXTS_FOR_ZONES("pgrefill")
 692        TEXTS_FOR_ZONES("pgsteal")
 693        TEXTS_FOR_ZONES("pgscan_kswapd")
 694        TEXTS_FOR_ZONES("pgscan_direct")
 695
 696        "pginodesteal",
 697        "slabs_scanned",
 698        "kswapd_steal",
 699        "kswapd_inodesteal",
 700        "pageoutrun",
 701        "allocstall",
 702
 703        "pgrotated",
 704#ifdef CONFIG_HUGETLB_PAGE
 705        "htlb_buddy_alloc_success",
 706        "htlb_buddy_alloc_fail",
 707#endif
 708#ifdef CONFIG_UNEVICTABLE_LRU
 709        "unevictable_pgs_culled",
 710        "unevictable_pgs_scanned",
 711        "unevictable_pgs_rescued",
 712        "unevictable_pgs_mlocked",
 713        "unevictable_pgs_munlocked",
 714        "unevictable_pgs_cleared",
 715        "unevictable_pgs_stranded",
 716        "unevictable_pgs_mlockfreed",
 717#endif
 718#endif
 719};
 720
 721static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
 722                                                        struct zone *zone)
 723{
 724        int i;
 725        seq_printf(m, "Node %d, zone %8s", pgdat->node_id, zone->name);
 726        seq_printf(m,
 727                   "\n  pages free     %lu"
 728                   "\n        min      %lu"
 729                   "\n        low      %lu"
 730                   "\n        high     %lu"
 731                   "\n        scanned  %lu (aa: %lu ia: %lu af: %lu if: %lu)"
 732                   "\n        spanned  %lu"
 733                   "\n        present  %lu",
 734                   zone_page_state(zone, NR_FREE_PAGES),
 735                   zone->pages_min,
 736                   zone->pages_low,
 737                   zone->pages_high,
 738                   zone->pages_scanned,
 739                   zone->lru[LRU_ACTIVE_ANON].nr_scan,
 740                   zone->lru[LRU_INACTIVE_ANON].nr_scan,
 741                   zone->lru[LRU_ACTIVE_FILE].nr_scan,
 742                   zone->lru[LRU_INACTIVE_FILE].nr_scan,
 743                   zone->spanned_pages,
 744                   zone->present_pages);
 745
 746        for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
 747                seq_printf(m, "\n    %-12s %lu", vmstat_text[i],
 748                                zone_page_state(zone, i));
 749
 750        seq_printf(m,
 751                   "\n        protection: (%lu",
 752                   zone->lowmem_reserve[0]);
 753        for (i = 1; i < ARRAY_SIZE(zone->lowmem_reserve); i++)
 754                seq_printf(m, ", %lu", zone->lowmem_reserve[i]);
 755        seq_printf(m,
 756                   ")"
 757                   "\n  pagesets");
 758        for_each_online_cpu(i) {
 759                struct per_cpu_pageset *pageset;
 760
 761                pageset = zone_pcp(zone, i);
 762                seq_printf(m,
 763                           "\n    cpu: %i"
 764                           "\n              count: %i"
 765                           "\n              high:  %i"
 766                           "\n              batch: %i",
 767                           i,
 768                           pageset->pcp.count,
 769                           pageset->pcp.high,
 770                           pageset->pcp.batch);
 771#ifdef CONFIG_SMP
 772                seq_printf(m, "\n  vm stats threshold: %d",
 773                                pageset->stat_threshold);
 774#endif
 775        }
 776        seq_printf(m,
 777                   "\n  all_unreclaimable: %u"
 778                   "\n  prev_priority:     %i"
 779                   "\n  start_pfn:         %lu"
 780                   "\n  inactive_ratio:    %u",
 781                           zone_is_all_unreclaimable(zone),
 782                   zone->prev_priority,
 783                   zone->zone_start_pfn,
 784                   zone->inactive_ratio);
 785        seq_putc(m, '\n');
 786}
 787
 788/*
 789 * Output information about zones in @pgdat.
 790 */
 791static int zoneinfo_show(struct seq_file *m, void *arg)
 792{
 793        pg_data_t *pgdat = (pg_data_t *)arg;
 794        walk_zones_in_node(m, pgdat, zoneinfo_show_print);
 795        return 0;
 796}
 797
 798static const struct seq_operations zoneinfo_op = {
 799        .start  = frag_start, /* iterate over all zones. The same as in
 800                               * fragmentation. */
 801        .next   = frag_next,
 802        .stop   = frag_stop,
 803        .show   = zoneinfo_show,
 804};
 805
 806static int zoneinfo_open(struct inode *inode, struct file *file)
 807{
 808        return seq_open(file, &zoneinfo_op);
 809}
 810
 811static const struct file_operations proc_zoneinfo_file_operations = {
 812        .open           = zoneinfo_open,
 813        .read           = seq_read,
 814        .llseek         = seq_lseek,
 815        .release        = seq_release,
 816};
 817
 818static void *vmstat_start(struct seq_file *m, loff_t *pos)
 819{
 820        unsigned long *v;
 821#ifdef CONFIG_VM_EVENT_COUNTERS
 822        unsigned long *e;
 823#endif
 824        int i;
 825
 826        if (*pos >= ARRAY_SIZE(vmstat_text))
 827                return NULL;
 828
 829#ifdef CONFIG_VM_EVENT_COUNTERS
 830        v = kmalloc(NR_VM_ZONE_STAT_ITEMS * sizeof(unsigned long)
 831                        + sizeof(struct vm_event_state), GFP_KERNEL);
 832#else
 833        v = kmalloc(NR_VM_ZONE_STAT_ITEMS * sizeof(unsigned long),
 834                        GFP_KERNEL);
 835#endif
 836        m->private = v;
 837        if (!v)
 838                return ERR_PTR(-ENOMEM);
 839        for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
 840                v[i] = global_page_state(i);
 841#ifdef CONFIG_VM_EVENT_COUNTERS
 842        e = v + NR_VM_ZONE_STAT_ITEMS;
 843        all_vm_events(e);
 844        e[PGPGIN] /= 2;         /* sectors -> kbytes */
 845        e[PGPGOUT] /= 2;
 846#endif
 847        return v + *pos;
 848}
 849
 850static void *vmstat_next(struct seq_file *m, void *arg, loff_t *pos)
 851{
 852        (*pos)++;
 853        if (*pos >= ARRAY_SIZE(vmstat_text))
 854                return NULL;
 855        return (unsigned long *)m->private + *pos;
 856}
 857
 858static int vmstat_show(struct seq_file *m, void *arg)
 859{
 860        unsigned long *l = arg;
 861        unsigned long off = l - (unsigned long *)m->private;
 862
 863        seq_printf(m, "%s %lu\n", vmstat_text[off], *l);
 864        return 0;
 865}
 866
 867static void vmstat_stop(struct seq_file *m, void *arg)
 868{
 869        kfree(m->private);
 870        m->private = NULL;
 871}
 872
 873static const struct seq_operations vmstat_op = {
 874        .start  = vmstat_start,
 875        .next   = vmstat_next,
 876        .stop   = vmstat_stop,
 877        .show   = vmstat_show,
 878};
 879
 880static int vmstat_open(struct inode *inode, struct file *file)
 881{
 882        return seq_open(file, &vmstat_op);
 883}
 884
 885static const struct file_operations proc_vmstat_file_operations = {
 886        .open           = vmstat_open,
 887        .read           = seq_read,
 888        .llseek         = seq_lseek,
 889        .release        = seq_release,
 890};
 891#endif /* CONFIG_PROC_FS */
 892
 893#ifdef CONFIG_SMP
 894static DEFINE_PER_CPU(struct delayed_work, vmstat_work);
 895int sysctl_stat_interval __read_mostly = HZ;
 896
 897static void vmstat_update(struct work_struct *w)
 898{
 899        refresh_cpu_vm_stats(smp_processor_id());
 900        schedule_delayed_work(&__get_cpu_var(vmstat_work),
 901                sysctl_stat_interval);
 902}
 903
 904static void __cpuinit start_cpu_timer(int cpu)
 905{
 906        struct delayed_work *vmstat_work = &per_cpu(vmstat_work, cpu);
 907
 908        INIT_DELAYED_WORK_DEFERRABLE(vmstat_work, vmstat_update);
 909        schedule_delayed_work_on(cpu, vmstat_work, HZ + cpu);
 910}
 911
 912/*
 913 * Use the cpu notifier to insure that the thresholds are recalculated
 914 * when necessary.
 915 */
 916static int __cpuinit vmstat_cpuup_callback(struct notifier_block *nfb,
 917                unsigned long action,
 918                void *hcpu)
 919{
 920        long cpu = (long)hcpu;
 921
 922        switch (action) {
 923        case CPU_ONLINE:
 924        case CPU_ONLINE_FROZEN:
 925                start_cpu_timer(cpu);
 926                break;
 927        case CPU_DOWN_PREPARE:
 928        case CPU_DOWN_PREPARE_FROZEN:
 929                cancel_rearming_delayed_work(&per_cpu(vmstat_work, cpu));
 930                per_cpu(vmstat_work, cpu).work.func = NULL;
 931                break;
 932        case CPU_DOWN_FAILED:
 933        case CPU_DOWN_FAILED_FROZEN:
 934                start_cpu_timer(cpu);
 935                break;
 936        case CPU_DEAD:
 937        case CPU_DEAD_FROZEN:
 938                refresh_zone_stat_thresholds();
 939                break;
 940        default:
 941                break;
 942        }
 943        return NOTIFY_OK;
 944}
 945
 946static struct notifier_block __cpuinitdata vmstat_notifier =
 947        { &vmstat_cpuup_callback, NULL, 0 };
 948#endif
 949
 950static int __init setup_vmstat(void)
 951{
 952#ifdef CONFIG_SMP
 953        int cpu;
 954
 955        refresh_zone_stat_thresholds();
 956        register_cpu_notifier(&vmstat_notifier);
 957
 958        for_each_online_cpu(cpu)
 959                start_cpu_timer(cpu);
 960#endif
 961#ifdef CONFIG_PROC_FS
 962        proc_create("buddyinfo", S_IRUGO, NULL, &fragmentation_file_operations);
 963        proc_create("pagetypeinfo", S_IRUGO, NULL, &pagetypeinfo_file_ops);
 964        proc_create("vmstat", S_IRUGO, NULL, &proc_vmstat_file_operations);
 965        proc_create("zoneinfo", S_IRUGO, NULL, &proc_zoneinfo_file_operations);
 966#endif
 967        return 0;
 968}
 969module_init(setup_vmstat)
 970
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.