linux/mm/page_counter.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Lockless hierarchical page accounting & limiting
   4 *
   5 * Copyright (C) 2014 Red Hat, Inc., Johannes Weiner
   6 */
   7
   8#include <linux/page_counter.h>
   9#include <linux/atomic.h>
  10#include <linux/kernel.h>
  11#include <linux/string.h>
  12#include <linux/sched.h>
  13#include <linux/bug.h>
  14#include <asm/page.h>
  15
  16static void propagate_protected_usage(struct page_counter *c,
  17                                      unsigned long usage)
  18{
  19        unsigned long protected, old_protected;
  20        unsigned long low, min;
  21        long delta;
  22
  23        if (!c->parent)
  24                return;
  25
  26        min = READ_ONCE(c->min);
  27        if (min || atomic_long_read(&c->min_usage)) {
  28                protected = min(usage, min);
  29                old_protected = atomic_long_xchg(&c->min_usage, protected);
  30                delta = protected - old_protected;
  31                if (delta)
  32                        atomic_long_add(delta, &c->parent->children_min_usage);
  33        }
  34
  35        low = READ_ONCE(c->low);
  36        if (low || atomic_long_read(&c->low_usage)) {
  37                protected = min(usage, low);
  38                old_protected = atomic_long_xchg(&c->low_usage, protected);
  39                delta = protected - old_protected;
  40                if (delta)
  41                        atomic_long_add(delta, &c->parent->children_low_usage);
  42        }
  43}
  44
  45/**
  46 * page_counter_cancel - take pages out of the local counter
  47 * @counter: counter
  48 * @nr_pages: number of pages to cancel
  49 */
  50void page_counter_cancel(struct page_counter *counter, unsigned long nr_pages)
  51{
  52        long new;
  53
  54        new = atomic_long_sub_return(nr_pages, &counter->usage);
  55        /* More uncharges than charges? */
  56        if (WARN_ONCE(new < 0, "page_counter underflow: %ld nr_pages=%lu\n",
  57                      new, nr_pages)) {
  58                new = 0;
  59                atomic_long_set(&counter->usage, new);
  60        }
  61        propagate_protected_usage(counter, new);
  62}
  63
  64/**
  65 * page_counter_charge - hierarchically charge pages
  66 * @counter: counter
  67 * @nr_pages: number of pages to charge
  68 *
  69 * NOTE: This does not consider any configured counter limits.
  70 */
  71void page_counter_charge(struct page_counter *counter, unsigned long nr_pages)
  72{
  73        struct page_counter *c;
  74
  75        for (c = counter; c; c = c->parent) {
  76                long new;
  77
  78                new = atomic_long_add_return(nr_pages, &c->usage);
  79                propagate_protected_usage(c, new);
  80                /*
  81                 * This is indeed racy, but we can live with some
  82                 * inaccuracy in the watermark.
  83                 */
  84                if (new > READ_ONCE(c->watermark))
  85                        WRITE_ONCE(c->watermark, new);
  86        }
  87}
  88
  89/**
  90 * page_counter_try_charge - try to hierarchically charge pages
  91 * @counter: counter
  92 * @nr_pages: number of pages to charge
  93 * @fail: points first counter to hit its limit, if any
  94 *
  95 * Returns %true on success, or %false and @fail if the counter or one
  96 * of its ancestors has hit its configured limit.
  97 */
  98bool page_counter_try_charge(struct page_counter *counter,
  99                             unsigned long nr_pages,
 100                             struct page_counter **fail)
 101{
 102        struct page_counter *c;
 103
 104        for (c = counter; c; c = c->parent) {
 105                long new;
 106                /*
 107                 * Charge speculatively to avoid an expensive CAS.  If
 108                 * a bigger charge fails, it might falsely lock out a
 109                 * racing smaller charge and send it into reclaim
 110                 * early, but the error is limited to the difference
 111                 * between the two sizes, which is less than 2M/4M in
 112                 * case of a THP locking out a regular page charge.
 113                 *
 114                 * The atomic_long_add_return() implies a full memory
 115                 * barrier between incrementing the count and reading
 116                 * the limit.  When racing with page_counter_set_max(),
 117                 * we either see the new limit or the setter sees the
 118                 * counter has changed and retries.
 119                 */
 120                new = atomic_long_add_return(nr_pages, &c->usage);
 121                if (new > c->max) {
 122                        atomic_long_sub(nr_pages, &c->usage);
 123                        propagate_protected_usage(c, new);
 124                        /*
 125                         * This is racy, but we can live with some
 126                         * inaccuracy in the failcnt which is only used
 127                         * to report stats.
 128                         */
 129                        data_race(c->failcnt++);
 130                        *fail = c;
 131                        goto failed;
 132                }
 133                propagate_protected_usage(c, new);
 134                /*
 135                 * Just like with failcnt, we can live with some
 136                 * inaccuracy in the watermark.
 137                 */
 138                if (new > READ_ONCE(c->watermark))
 139                        WRITE_ONCE(c->watermark, new);
 140        }
 141        return true;
 142
 143failed:
 144        for (c = counter; c != *fail; c = c->parent)
 145                page_counter_cancel(c, nr_pages);
 146
 147        return false;
 148}
 149
 150/**
 151 * page_counter_uncharge - hierarchically uncharge pages
 152 * @counter: counter
 153 * @nr_pages: number of pages to uncharge
 154 */
 155void page_counter_uncharge(struct page_counter *counter, unsigned long nr_pages)
 156{
 157        struct page_counter *c;
 158
 159        for (c = counter; c; c = c->parent)
 160                page_counter_cancel(c, nr_pages);
 161}
 162
 163/**
 164 * page_counter_set_max - set the maximum number of pages allowed
 165 * @counter: counter
 166 * @nr_pages: limit to set
 167 *
 168 * Returns 0 on success, -EBUSY if the current number of pages on the
 169 * counter already exceeds the specified limit.
 170 *
 171 * The caller must serialize invocations on the same counter.
 172 */
 173int page_counter_set_max(struct page_counter *counter, unsigned long nr_pages)
 174{
 175        for (;;) {
 176                unsigned long old;
 177                long usage;
 178
 179                /*
 180                 * Update the limit while making sure that it's not
 181                 * below the concurrently-changing counter value.
 182                 *
 183                 * The xchg implies two full memory barriers before
 184                 * and after, so the read-swap-read is ordered and
 185                 * ensures coherency with page_counter_try_charge():
 186                 * that function modifies the count before checking
 187                 * the limit, so if it sees the old limit, we see the
 188                 * modified counter and retry.
 189                 */
 190                usage = page_counter_read(counter);
 191
 192                if (usage > nr_pages)
 193                        return -EBUSY;
 194
 195                old = xchg(&counter->max, nr_pages);
 196
 197                if (page_counter_read(counter) <= usage)
 198                        return 0;
 199
 200                counter->max = old;
 201                cond_resched();
 202        }
 203}
 204
 205/**
 206 * page_counter_set_min - set the amount of protected memory
 207 * @counter: counter
 208 * @nr_pages: value to set
 209 *
 210 * The caller must serialize invocations on the same counter.
 211 */
 212void page_counter_set_min(struct page_counter *counter, unsigned long nr_pages)
 213{
 214        struct page_counter *c;
 215
 216        WRITE_ONCE(counter->min, nr_pages);
 217
 218        for (c = counter; c; c = c->parent)
 219                propagate_protected_usage(c, atomic_long_read(&c->usage));
 220}
 221
 222/**
 223 * page_counter_set_low - set the amount of protected memory
 224 * @counter: counter
 225 * @nr_pages: value to set
 226 *
 227 * The caller must serialize invocations on the same counter.
 228 */
 229void page_counter_set_low(struct page_counter *counter, unsigned long nr_pages)
 230{
 231        struct page_counter *c;
 232
 233        WRITE_ONCE(counter->low, nr_pages);
 234
 235        for (c = counter; c; c = c->parent)
 236                propagate_protected_usage(c, atomic_long_read(&c->usage));
 237}
 238
 239/**
 240 * page_counter_memparse - memparse() for page counter limits
 241 * @buf: string to parse
 242 * @max: string meaning maximum possible value
 243 * @nr_pages: returns the result in number of pages
 244 *
 245 * Returns -EINVAL, or 0 and @nr_pages on success.  @nr_pages will be
 246 * limited to %PAGE_COUNTER_MAX.
 247 */
 248int page_counter_memparse(const char *buf, const char *max,
 249                          unsigned long *nr_pages)
 250{
 251        char *end;
 252        u64 bytes;
 253
 254        if (!strcmp(buf, max)) {
 255                *nr_pages = PAGE_COUNTER_MAX;
 256                return 0;
 257        }
 258
 259        bytes = memparse(buf, &end);
 260        if (*end != '\0')
 261                return -EINVAL;
 262
 263        *nr_pages = min(bytes / PAGE_SIZE, (u64)PAGE_COUNTER_MAX);
 264
 265        return 0;
 266}
 267
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.