linux/net/ipv4/tcp_memcontrol.c
<<
>>
Prefs
   1#include <net/tcp.h>
   2#include <net/tcp_memcontrol.h>
   3#include <net/sock.h>
   4#include <net/ip.h>
   5#include <linux/nsproxy.h>
   6#include <linux/memcontrol.h>
   7#include <linux/module.h>
   8
   9static inline struct tcp_memcontrol *tcp_from_cgproto(struct cg_proto *cg_proto)
  10{
  11        return container_of(cg_proto, struct tcp_memcontrol, cg_proto);
  12}
  13
  14static void memcg_tcp_enter_memory_pressure(struct sock *sk)
  15{
  16        if (sk->sk_cgrp->memory_pressure)
  17                *sk->sk_cgrp->memory_pressure = 1;
  18}
  19EXPORT_SYMBOL(memcg_tcp_enter_memory_pressure);
  20
  21int tcp_init_cgroup(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
  22{
  23        /*
  24         * The root cgroup does not use res_counters, but rather,
  25         * rely on the data already collected by the network
  26         * subsystem
  27         */
  28        struct res_counter *res_parent = NULL;
  29        struct cg_proto *cg_proto, *parent_cg;
  30        struct tcp_memcontrol *tcp;
  31        struct mem_cgroup *parent = parent_mem_cgroup(memcg);
  32        struct net *net = current->nsproxy->net_ns;
  33
  34        cg_proto = tcp_prot.proto_cgroup(memcg);
  35        if (!cg_proto)
  36                return 0;
  37
  38        tcp = tcp_from_cgproto(cg_proto);
  39
  40        tcp->tcp_prot_mem[0] = net->ipv4.sysctl_tcp_mem[0];
  41        tcp->tcp_prot_mem[1] = net->ipv4.sysctl_tcp_mem[1];
  42        tcp->tcp_prot_mem[2] = net->ipv4.sysctl_tcp_mem[2];
  43        tcp->tcp_memory_pressure = 0;
  44
  45        parent_cg = tcp_prot.proto_cgroup(parent);
  46        if (parent_cg)
  47                res_parent = parent_cg->memory_allocated;
  48
  49        res_counter_init(&tcp->tcp_memory_allocated, res_parent);
  50        percpu_counter_init(&tcp->tcp_sockets_allocated, 0);
  51
  52        cg_proto->enter_memory_pressure = memcg_tcp_enter_memory_pressure;
  53        cg_proto->memory_pressure = &tcp->tcp_memory_pressure;
  54        cg_proto->sysctl_mem = tcp->tcp_prot_mem;
  55        cg_proto->memory_allocated = &tcp->tcp_memory_allocated;
  56        cg_proto->sockets_allocated = &tcp->tcp_sockets_allocated;
  57        cg_proto->memcg = memcg;
  58
  59        return 0;
  60}
  61EXPORT_SYMBOL(tcp_init_cgroup);
  62
  63void tcp_destroy_cgroup(struct mem_cgroup *memcg)
  64{
  65        struct cg_proto *cg_proto;
  66        struct tcp_memcontrol *tcp;
  67        u64 val;
  68
  69        cg_proto = tcp_prot.proto_cgroup(memcg);
  70        if (!cg_proto)
  71                return;
  72
  73        tcp = tcp_from_cgproto(cg_proto);
  74        percpu_counter_destroy(&tcp->tcp_sockets_allocated);
  75
  76        val = res_counter_read_u64(&tcp->tcp_memory_allocated, RES_LIMIT);
  77}
  78EXPORT_SYMBOL(tcp_destroy_cgroup);
  79
  80static int tcp_update_limit(struct mem_cgroup *memcg, u64 val)
  81{
  82        struct net *net = current->nsproxy->net_ns;
  83        struct tcp_memcontrol *tcp;
  84        struct cg_proto *cg_proto;
  85        u64 old_lim;
  86        int i;
  87        int ret;
  88
  89        cg_proto = tcp_prot.proto_cgroup(memcg);
  90        if (!cg_proto)
  91                return -EINVAL;
  92
  93        if (val > RESOURCE_MAX)
  94                val = RESOURCE_MAX;
  95
  96        tcp = tcp_from_cgproto(cg_proto);
  97
  98        old_lim = res_counter_read_u64(&tcp->tcp_memory_allocated, RES_LIMIT);
  99        ret = res_counter_set_limit(&tcp->tcp_memory_allocated, val);
 100        if (ret)
 101                return ret;
 102
 103        for (i = 0; i < 3; i++)
 104                tcp->tcp_prot_mem[i] = min_t(long, val >> PAGE_SHIFT,
 105                                             net->ipv4.sysctl_tcp_mem[i]);
 106
 107        if (val == RESOURCE_MAX)
 108                clear_bit(MEMCG_SOCK_ACTIVE, &cg_proto->flags);
 109        else if (val != RESOURCE_MAX) {
 110                /*
 111                 * The active bit needs to be written after the static_key
 112                 * update. This is what guarantees that the socket activation
 113                 * function is the last one to run. See sock_update_memcg() for
 114                 * details, and note that we don't mark any socket as belonging
 115                 * to this memcg until that flag is up.
 116                 *
 117                 * We need to do this, because static_keys will span multiple
 118                 * sites, but we can't control their order. If we mark a socket
 119                 * as accounted, but the accounting functions are not patched in
 120                 * yet, we'll lose accounting.
 121                 *
 122                 * We never race with the readers in sock_update_memcg(),
 123                 * because when this value change, the code to process it is not
 124                 * patched in yet.
 125                 *
 126                 * The activated bit is used to guarantee that no two writers
 127                 * will do the update in the same memcg. Without that, we can't
 128                 * properly shutdown the static key.
 129                 */
 130                if (!test_and_set_bit(MEMCG_SOCK_ACTIVATED, &cg_proto->flags))
 131                        static_key_slow_inc(&memcg_socket_limit_enabled);
 132                set_bit(MEMCG_SOCK_ACTIVE, &cg_proto->flags);
 133        }
 134
 135        return 0;
 136}
 137
 138static int tcp_cgroup_write(struct cgroup *cont, struct cftype *cft,
 139                            const char *buffer)
 140{
 141        struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
 142        unsigned long long val;
 143        int ret = 0;
 144
 145        switch (cft->private) {
 146        case RES_LIMIT:
 147                /* see memcontrol.c */
 148                ret = res_counter_memparse_write_strategy(buffer, &val);
 149                if (ret)
 150                        break;
 151                ret = tcp_update_limit(memcg, val);
 152                break;
 153        default:
 154                ret = -EINVAL;
 155                break;
 156        }
 157        return ret;
 158}
 159
 160static u64 tcp_read_stat(struct mem_cgroup *memcg, int type, u64 default_val)
 161{
 162        struct tcp_memcontrol *tcp;
 163        struct cg_proto *cg_proto;
 164
 165        cg_proto = tcp_prot.proto_cgroup(memcg);
 166        if (!cg_proto)
 167                return default_val;
 168
 169        tcp = tcp_from_cgproto(cg_proto);
 170        return res_counter_read_u64(&tcp->tcp_memory_allocated, type);
 171}
 172
 173static u64 tcp_read_usage(struct mem_cgroup *memcg)
 174{
 175        struct tcp_memcontrol *tcp;
 176        struct cg_proto *cg_proto;
 177
 178        cg_proto = tcp_prot.proto_cgroup(memcg);
 179        if (!cg_proto)
 180                return atomic_long_read(&tcp_memory_allocated) << PAGE_SHIFT;
 181
 182        tcp = tcp_from_cgproto(cg_proto);
 183        return res_counter_read_u64(&tcp->tcp_memory_allocated, RES_USAGE);
 184}
 185
 186static u64 tcp_cgroup_read(struct cgroup *cont, struct cftype *cft)
 187{
 188        struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
 189        u64 val;
 190
 191        switch (cft->private) {
 192        case RES_LIMIT:
 193                val = tcp_read_stat(memcg, RES_LIMIT, RESOURCE_MAX);
 194                break;
 195        case RES_USAGE:
 196                val = tcp_read_usage(memcg);
 197                break;
 198        case RES_FAILCNT:
 199        case RES_MAX_USAGE:
 200                val = tcp_read_stat(memcg, cft->private, 0);
 201                break;
 202        default:
 203                BUG();
 204        }
 205        return val;
 206}
 207
 208static int tcp_cgroup_reset(struct cgroup *cont, unsigned int event)
 209{
 210        struct mem_cgroup *memcg;
 211        struct tcp_memcontrol *tcp;
 212        struct cg_proto *cg_proto;
 213
 214        memcg = mem_cgroup_from_cont(cont);
 215        cg_proto = tcp_prot.proto_cgroup(memcg);
 216        if (!cg_proto)
 217                return 0;
 218        tcp = tcp_from_cgproto(cg_proto);
 219
 220        switch (event) {
 221        case RES_MAX_USAGE:
 222                res_counter_reset_max(&tcp->tcp_memory_allocated);
 223                break;
 224        case RES_FAILCNT:
 225                res_counter_reset_failcnt(&tcp->tcp_memory_allocated);
 226                break;
 227        }
 228
 229        return 0;
 230}
 231
 232unsigned long long tcp_max_memory(const struct mem_cgroup *memcg)
 233{
 234        struct tcp_memcontrol *tcp;
 235        struct cg_proto *cg_proto;
 236
 237        cg_proto = tcp_prot.proto_cgroup((struct mem_cgroup *)memcg);
 238        if (!cg_proto)
 239                return 0;
 240
 241        tcp = tcp_from_cgproto(cg_proto);
 242        return res_counter_read_u64(&tcp->tcp_memory_allocated, RES_LIMIT);
 243}
 244
 245void tcp_prot_mem(struct mem_cgroup *memcg, long val, int idx)
 246{
 247        struct tcp_memcontrol *tcp;
 248        struct cg_proto *cg_proto;
 249
 250        cg_proto = tcp_prot.proto_cgroup(memcg);
 251        if (!cg_proto)
 252                return;
 253
 254        tcp = tcp_from_cgproto(cg_proto);
 255
 256        tcp->tcp_prot_mem[idx] = val;
 257}
 258
 259static struct cftype tcp_files[] = {
 260        {
 261                .name = "kmem.tcp.limit_in_bytes",
 262                .write_string = tcp_cgroup_write,
 263                .read_u64 = tcp_cgroup_read,
 264                .private = RES_LIMIT,
 265        },
 266        {
 267                .name = "kmem.tcp.usage_in_bytes",
 268                .read_u64 = tcp_cgroup_read,
 269                .private = RES_USAGE,
 270        },
 271        {
 272                .name = "kmem.tcp.failcnt",
 273                .private = RES_FAILCNT,
 274                .trigger = tcp_cgroup_reset,
 275                .read_u64 = tcp_cgroup_read,
 276        },
 277        {
 278                .name = "kmem.tcp.max_usage_in_bytes",
 279                .private = RES_MAX_USAGE,
 280                .trigger = tcp_cgroup_reset,
 281                .read_u64 = tcp_cgroup_read,
 282        },
 283        { }     /* terminate */
 284};
 285
 286static int __init tcp_memcontrol_init(void)
 287{
 288        WARN_ON(cgroup_add_cftypes(&mem_cgroup_subsys, tcp_files));
 289        return 0;
 290}
 291__initcall(tcp_memcontrol_init);
 292
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.