linux/kernel/sysctl.c
<<
>>
Prefs
   1/*
   2 * sysctl.c: General linux system control interface
   3 *
   4 * Begun 24 March 1995, Stephen Tweedie
   5 * Added /proc support, Dec 1995
   6 * Added bdflush entry and intvec min/max checking, 2/23/96, Tom Dyas.
   7 * Added hooks for /proc/sys/net (minor, minor patch), 96/4/1, Mike Shaver.
   8 * Added kernel/java-{interpreter,appletviewer}, 96/5/10, Mike Shaver.
   9 * Dynamic registration fixes, Stephen Tweedie.
  10 * Added kswapd-interval, ctrl-alt-del, printk stuff, 1/8/97, Chris Horn.
  11 * Made sysctl support optional via CONFIG_SYSCTL, 1/10/97, Chris
  12 *  Horn.
  13 * Added proc_doulongvec_ms_jiffies_minmax, 09/08/99, Carlos H. Bauer.
  14 * Added proc_doulongvec_minmax, 09/08/99, Carlos H. Bauer.
  15 * Changed linked lists to use list.h instead of lists.h, 02/24/00, Bill
  16 *  Wendling.
  17 * The list_for_each() macro wasn't appropriate for the sysctl loop.
  18 *  Removed it and replaced it with older style, 03/23/00, Bill Wendling
  19 */
  20
  21#include <linux/module.h>
  22#include <linux/mm.h>
  23#include <linux/swap.h>
  24#include <linux/slab.h>
  25#include <linux/sysctl.h>
  26#include <linux/proc_fs.h>
  27#include <linux/capability.h>
  28#include <linux/ctype.h>
  29#include <linux/utsname.h>
  30#include <linux/capability.h>
  31#include <linux/smp_lock.h>
  32#include <linux/init.h>
  33#include <linux/kernel.h>
  34#include <linux/kobject.h>
  35#include <linux/net.h>
  36#include <linux/sysrq.h>
  37#include <linux/highuid.h>
  38#include <linux/writeback.h>
  39#include <linux/hugetlb.h>
  40#include <linux/security.h>
  41#include <linux/initrd.h>
  42#include <linux/times.h>
  43#include <linux/limits.h>
  44#include <linux/dcache.h>
  45#include <linux/syscalls.h>
  46#include <linux/nfs_fs.h>
  47#include <linux/acpi.h>
  48
  49#include <asm/uaccess.h>
  50#include <asm/processor.h>
  51
  52extern int proc_nr_files(ctl_table *table, int write, struct file *filp,
  53                     void __user *buffer, size_t *lenp, loff_t *ppos);
  54
  55#ifdef CONFIG_X86
  56#include <asm/nmi.h>
  57#endif
  58
  59#if defined(CONFIG_SYSCTL)
  60
  61/* External variables not in a header file. */
  62extern int C_A_D;
  63extern int sysctl_overcommit_memory;
  64extern int sysctl_overcommit_ratio;
  65extern int sysctl_panic_on_oom;
  66extern int max_threads;
  67extern int sysrq_enabled;
  68extern int core_uses_pid;
  69extern int suid_dumpable;
  70extern char core_pattern[];
  71extern int pid_max;
  72extern int min_free_kbytes;
  73extern int printk_ratelimit_jiffies;
  74extern int printk_ratelimit_burst;
  75extern int pid_max_min, pid_max_max;
  76extern int sysctl_drop_caches;
  77extern int percpu_pagelist_fraction;
  78extern int compat_log;
  79
  80/* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
  81static int maxolduid = 65535;
  82static int minolduid;
  83static int min_percpu_pagelist_fract = 8;
  84
  85static int ngroups_max = NGROUPS_MAX;
  86
  87#ifdef CONFIG_KMOD
  88extern char modprobe_path[];
  89#endif
  90#ifdef CONFIG_CHR_DEV_SG
  91extern int sg_big_buff;
  92#endif
  93#ifdef CONFIG_SYSVIPC
  94static int proc_do_ipc_string(ctl_table *table, int write, struct file *filp,
  95                void __user *buffer, size_t *lenp, loff_t *ppos);
  96#endif
  97
  98#ifdef __sparc__
  99extern char reboot_command [];
 100extern int stop_a_enabled;
 101extern int scons_pwroff;
 102#endif
 103
 104#ifdef __hppa__
 105extern int pwrsw_enabled;
 106extern int unaligned_enabled;
 107#endif
 108
 109#ifdef CONFIG_S390
 110#ifdef CONFIG_MATHEMU
 111extern int sysctl_ieee_emulation_warnings;
 112#endif
 113extern int sysctl_userprocess_debug;
 114extern int spin_retry;
 115#endif
 116
 117extern int sysctl_hz_timer;
 118
 119#ifdef CONFIG_BSD_PROCESS_ACCT
 120extern int acct_parm[];
 121#endif
 122
 123#ifdef CONFIG_IA64
 124extern int no_unaligned_warning;
 125#endif
 126
 127#ifdef CONFIG_RT_MUTEXES
 128extern int max_lock_depth;
 129#endif
 130
 131#ifdef CONFIG_SYSCTL_SYSCALL
 132static int parse_table(int __user *, int, void __user *, size_t __user *,
 133                void __user *, size_t, ctl_table *, void **);
 134#endif
 135
 136static int proc_do_uts_string(ctl_table *table, int write, struct file *filp,
 137                  void __user *buffer, size_t *lenp, loff_t *ppos);
 138
 139#ifdef CONFIG_PROC_SYSCTL
 140static int proc_do_cad_pid(ctl_table *table, int write, struct file *filp,
 141                  void __user *buffer, size_t *lenp, loff_t *ppos);
 142#endif
 143
 144static ctl_table root_table[];
 145static struct ctl_table_header root_table_header =
 146        { root_table, LIST_HEAD_INIT(root_table_header.ctl_entry) };
 147
 148static ctl_table kern_table[];
 149static ctl_table vm_table[];
 150static ctl_table fs_table[];
 151static ctl_table debug_table[];
 152static ctl_table dev_table[];
 153extern ctl_table random_table[];
 154#ifdef CONFIG_UNIX98_PTYS
 155extern ctl_table pty_table[];
 156#endif
 157#ifdef CONFIG_INOTIFY_USER
 158extern ctl_table inotify_table[];
 159#endif
 160
 161#ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
 162int sysctl_legacy_va_layout;
 163#endif
 164
 165/* /proc declarations: */
 166
 167#ifdef CONFIG_PROC_SYSCTL
 168
 169static ssize_t proc_readsys(struct file *, char __user *, size_t, loff_t *);
 170static ssize_t proc_writesys(struct file *, const char __user *, size_t, loff_t *);
 171static int proc_opensys(struct inode *, struct file *);
 172
 173struct file_operations proc_sys_file_operations = {
 174        .open           = proc_opensys,
 175        .read           = proc_readsys,
 176        .write          = proc_writesys,
 177};
 178
 179extern struct proc_dir_entry *proc_sys_root;
 180
 181static void register_proc_table(ctl_table *, struct proc_dir_entry *, void *);
 182static void unregister_proc_table(ctl_table *, struct proc_dir_entry *);
 183#endif
 184
 185/* The default sysctl tables: */
 186
 187static ctl_table root_table[] = {
 188        {
 189                .ctl_name       = CTL_KERN,
 190                .procname       = "kernel",
 191                .mode           = 0555,
 192                .child          = kern_table,
 193        },
 194        {
 195                .ctl_name       = CTL_VM,
 196                .procname       = "vm",
 197                .mode           = 0555,
 198                .child          = vm_table,
 199        },
 200#ifdef CONFIG_NET
 201        {
 202                .ctl_name       = CTL_NET,
 203                .procname       = "net",
 204                .mode           = 0555,
 205                .child          = net_table,
 206        },
 207#endif
 208        {
 209                .ctl_name       = CTL_FS,
 210                .procname       = "fs",
 211                .mode           = 0555,
 212                .child          = fs_table,
 213        },
 214        {
 215                .ctl_name       = CTL_DEBUG,
 216                .procname       = "debug",
 217                .mode           = 0555,
 218                .child          = debug_table,
 219        },
 220        {
 221                .ctl_name       = CTL_DEV,
 222                .procname       = "dev",
 223                .mode           = 0555,
 224                .child          = dev_table,
 225        },
 226
 227        { .ctl_name = 0 }
 228};
 229
 230static ctl_table kern_table[] = {
 231#ifndef CONFIG_UTS_NS
 232        {
 233                .ctl_name       = KERN_OSTYPE,
 234                .procname       = "ostype",
 235                .data           = init_uts_ns.name.sysname,
 236                .maxlen         = sizeof(init_uts_ns.name.sysname),
 237                .mode           = 0444,
 238                .proc_handler   = &proc_do_uts_string,
 239                .strategy       = &sysctl_string,
 240        },
 241        {
 242                .ctl_name       = KERN_OSRELEASE,
 243                .procname       = "osrelease",
 244                .data           = init_uts_ns.name.release,
 245                .maxlen         = sizeof(init_uts_ns.name.release),
 246                .mode           = 0444,
 247                .proc_handler   = &proc_do_uts_string,
 248                .strategy       = &sysctl_string,
 249        },
 250        {
 251                .ctl_name       = KERN_VERSION,
 252                .procname       = "version",
 253                .data           = init_uts_ns.name.version,
 254                .maxlen         = sizeof(init_uts_ns.name.version),
 255                .mode           = 0444,
 256                .proc_handler   = &proc_do_uts_string,
 257                .strategy       = &sysctl_string,
 258        },
 259        {
 260                .ctl_name       = KERN_NODENAME,
 261                .procname       = "hostname",
 262                .data           = init_uts_ns.name.nodename,
 263                .maxlen         = sizeof(init_uts_ns.name.nodename),
 264                .mode           = 0644,
 265                .proc_handler   = &proc_do_uts_string,
 266                .strategy       = &sysctl_string,
 267        },
 268        {
 269                .ctl_name       = KERN_DOMAINNAME,
 270                .procname       = "domainname",
 271                .data           = init_uts_ns.name.domainname,
 272                .maxlen         = sizeof(init_uts_ns.name.domainname),
 273                .mode           = 0644,
 274                .proc_handler   = &proc_do_uts_string,
 275                .strategy       = &sysctl_string,
 276        },
 277#else  /* !CONFIG_UTS_NS */
 278        {
 279                .ctl_name       = KERN_OSTYPE,
 280                .procname       = "ostype",
 281                .data           = NULL,
 282                /* could maybe use __NEW_UTS_LEN here? */
 283                .maxlen         = FIELD_SIZEOF(struct new_utsname, sysname),
 284                .mode           = 0444,
 285                .proc_handler   = &proc_do_uts_string,
 286                .strategy       = &sysctl_string,
 287        },
 288        {
 289                .ctl_name       = KERN_OSRELEASE,
 290                .procname       = "osrelease",
 291                .data           = NULL,
 292                .maxlen         = FIELD_SIZEOF(struct new_utsname, release),
 293                .mode           = 0444,
 294                .proc_handler   = &proc_do_uts_string,
 295                .strategy       = &sysctl_string,
 296        },
 297        {
 298                .ctl_name       = KERN_VERSION,
 299                .procname       = "version",
 300                .data           = NULL,
 301                .maxlen         = FIELD_SIZEOF(struct new_utsname, version),
 302                .mode           = 0444,
 303                .proc_handler   = &proc_do_uts_string,
 304                .strategy       = &sysctl_string,
 305        },
 306        {
 307                .ctl_name       = KERN_NODENAME,
 308                .procname       = "hostname",
 309                .data           = NULL,
 310                .maxlen         = FIELD_SIZEOF(struct new_utsname, nodename),
 311                .mode           = 0644,
 312                .proc_handler   = &proc_do_uts_string,
 313                .strategy       = &sysctl_string,
 314        },
 315        {
 316                .ctl_name       = KERN_DOMAINNAME,
 317                .procname       = "domainname",
 318                .data           = NULL,
 319                .maxlen         = FIELD_SIZEOF(struct new_utsname, domainname),
 320                .mode           = 0644,
 321                .proc_handler   = &proc_do_uts_string,
 322                .strategy       = &sysctl_string,
 323        },
 324#endif /* !CONFIG_UTS_NS */
 325        {
 326                .ctl_name       = KERN_PANIC,
 327                .procname       = "panic",
 328                .data           = &panic_timeout,
 329                .maxlen         = sizeof(int),
 330                .mode           = 0644,
 331                .proc_handler   = &proc_dointvec,
 332        },
 333        {
 334                .ctl_name       = KERN_CORE_USES_PID,
 335                .procname       = "core_uses_pid",
 336                .data           = &core_uses_pid,
 337                .maxlen         = sizeof(int),
 338                .mode           = 0644,
 339                .proc_handler   = &proc_dointvec,
 340        },
 341        {
 342                .ctl_name       = KERN_CORE_PATTERN,
 343                .procname       = "core_pattern",
 344                .data           = core_pattern,
 345                .maxlen         = 128,
 346                .mode           = 0644,
 347                .proc_handler   = &proc_dostring,
 348                .strategy       = &sysctl_string,
 349        },
 350        {
 351                .ctl_name       = KERN_TAINTED,
 352                .procname       = "tainted",
 353                .data           = &tainted,
 354                .maxlen         = sizeof(int),
 355                .mode           = 0444,
 356                .proc_handler   = &proc_dointvec,
 357        },
 358        {
 359                .ctl_name       = KERN_CAP_BSET,
 360                .procname       = "cap-bound",
 361                .data           = &cap_bset,
 362                .maxlen         = sizeof(kernel_cap_t),
 363                .mode           = 0600,
 364                .proc_handler   = &proc_dointvec_bset,
 365        },
 366#ifdef CONFIG_BLK_DEV_INITRD
 367        {
 368                .ctl_name       = KERN_REALROOTDEV,
 369                .procname       = "real-root-dev",
 370                .data           = &real_root_dev,
 371                .maxlen         = sizeof(int),
 372                .mode           = 0644,
 373                .proc_handler   = &proc_dointvec,
 374        },
 375#endif
 376#ifdef __sparc__
 377        {
 378                .ctl_name       = KERN_SPARC_REBOOT,
 379                .procname       = "reboot-cmd",
 380                .data           = reboot_command,
 381                .maxlen         = 256,
 382                .mode           = 0644,
 383                .proc_handler   = &proc_dostring,
 384                .strategy       = &sysctl_string,
 385        },
 386        {
 387                .ctl_name       = KERN_SPARC_STOP_A,
 388                .procname       = "stop-a",
 389                .data           = &stop_a_enabled,
 390                .maxlen         = sizeof (int),
 391                .mode           = 0644,
 392                .proc_handler   = &proc_dointvec,
 393        },
 394        {
 395                .ctl_name       = KERN_SPARC_SCONS_PWROFF,
 396                .procname       = "scons-poweroff",
 397                .data           = &scons_pwroff,
 398                .maxlen         = sizeof (int),
 399                .mode           = 0644,
 400                .proc_handler   = &proc_dointvec,
 401        },
 402#endif
 403#ifdef __hppa__
 404        {
 405                .ctl_name       = KERN_HPPA_PWRSW,
 406                .procname       = "soft-power",
 407                .data           = &pwrsw_enabled,
 408                .maxlen         = sizeof (int),
 409                .mode           = 0644,
 410                .proc_handler   = &proc_dointvec,
 411        },
 412        {
 413                .ctl_name       = KERN_HPPA_UNALIGNED,
 414                .procname       = "unaligned-trap",
 415                .data           = &unaligned_enabled,
 416                .maxlen         = sizeof (int),
 417                .mode           = 0644,
 418                .proc_handler   = &proc_dointvec,
 419        },
 420#endif
 421        {
 422                .ctl_name       = KERN_CTLALTDEL,
 423                .procname       = "ctrl-alt-del",
 424                .data           = &C_A_D,
 425                .maxlen         = sizeof(int),
 426                .mode           = 0644,
 427                .proc_handler   = &proc_dointvec,
 428        },
 429        {
 430                .ctl_name       = KERN_PRINTK,
 431                .procname       = "printk",
 432                .data           = &console_loglevel,
 433                .maxlen         = 4*sizeof(int),
 434                .mode           = 0644,
 435                .proc_handler   = &proc_dointvec,
 436        },
 437#ifdef CONFIG_KMOD
 438        {
 439                .ctl_name       = KERN_MODPROBE,
 440                .procname       = "modprobe",
 441                .data           = &modprobe_path,
 442                .maxlen         = KMOD_PATH_LEN,
 443                .mode           = 0644,
 444                .proc_handler   = &proc_dostring,
 445                .strategy       = &sysctl_string,
 446        },
 447#endif
 448#if defined(CONFIG_HOTPLUG) && defined(CONFIG_NET)
 449        {
 450                .ctl_name       = KERN_HOTPLUG,
 451                .procname       = "hotplug",
 452                .data           = &uevent_helper,
 453                .maxlen         = UEVENT_HELPER_PATH_LEN,
 454                .mode           = 0644,
 455                .proc_handler   = &proc_dostring,
 456                .strategy       = &sysctl_string,
 457        },
 458#endif
 459#ifdef CONFIG_CHR_DEV_SG
 460        {
 461                .ctl_name       = KERN_SG_BIG_BUFF,
 462                .procname       = "sg-big-buff",
 463                .data           = &sg_big_buff,
 464                .maxlen         = sizeof (int),
 465                .mode           = 0444,
 466                .proc_handler   = &proc_dointvec,
 467        },
 468#endif
 469#ifdef CONFIG_BSD_PROCESS_ACCT
 470        {
 471                .ctl_name       = KERN_ACCT,
 472                .procname       = "acct",
 473                .data           = &acct_parm,
 474                .maxlen         = 3*sizeof(int),
 475                .mode           = 0644,
 476                .proc_handler   = &proc_dointvec,
 477        },
 478#endif
 479#ifdef CONFIG_SYSVIPC
 480        {
 481                .ctl_name       = KERN_SHMMAX,
 482                .procname       = "shmmax",
 483                .data           = NULL,
 484                .maxlen         = sizeof (size_t),
 485                .mode           = 0644,
 486                .proc_handler   = &proc_do_ipc_string,
 487        },
 488        {
 489                .ctl_name       = KERN_SHMALL,
 490                .procname       = "shmall",
 491                .data           = NULL,
 492                .maxlen         = sizeof (size_t),
 493                .mode           = 0644,
 494                .proc_handler   = &proc_do_ipc_string,
 495        },
 496        {
 497                .ctl_name       = KERN_SHMMNI,
 498                .procname       = "shmmni",
 499                .data           = NULL,
 500                .maxlen         = sizeof (int),
 501                .mode           = 0644,
 502                .proc_handler   = &proc_do_ipc_string,
 503        },
 504        {
 505                .ctl_name       = KERN_MSGMAX,
 506                .procname       = "msgmax",
 507                .data           = NULL,
 508                .maxlen         = sizeof (int),
 509                .mode           = 0644,
 510                .proc_handler   = &proc_do_ipc_string,
 511        },
 512        {
 513                .ctl_name       = KERN_MSGMNI,
 514                .procname       = "msgmni",
 515                .data           = NULL,
 516                .maxlen         = sizeof (int),
 517                .mode           = 0644,
 518                .proc_handler   = &proc_do_ipc_string,
 519        },
 520        {
 521                .ctl_name       = KERN_MSGMNB,
 522                .procname       =  "msgmnb",
 523                .data           = NULL,
 524                .maxlen         = sizeof (int),
 525                .mode           = 0644,
 526                .proc_handler   = &proc_do_ipc_string,
 527        },
 528        {
 529                .ctl_name       = KERN_SEM,
 530                .procname       = "sem",
 531                .data           = NULL,
 532                .maxlen         = 4*sizeof (int),
 533                .mode           = 0644,
 534                .proc_handler   = &proc_do_ipc_string,
 535        },
 536#endif
 537#ifdef CONFIG_MAGIC_SYSRQ
 538        {
 539                .ctl_name       = KERN_SYSRQ,
 540                .procname       = "sysrq",
 541                .data           = &sysrq_enabled,
 542                .maxlen         = sizeof (int),
 543                .mode           = 0644,
 544                .proc_handler   = &proc_dointvec,
 545        },
 546#endif
 547#ifdef CONFIG_PROC_SYSCTL
 548        {
 549                .ctl_name       = KERN_CADPID,
 550                .procname       = "cad_pid",
 551                .data           = NULL,
 552                .maxlen         = sizeof (int),
 553                .mode           = 0600,
 554                .proc_handler   = &proc_do_cad_pid,
 555        },
 556#endif
 557        {
 558                .ctl_name       = KERN_MAX_THREADS,
 559                .procname       = "threads-max",
 560                .data           = &max_threads,
 561                .maxlen         = sizeof(int),
 562                .mode           = 0644,
 563                .proc_handler   = &proc_dointvec,
 564        },
 565        {
 566                .ctl_name       = KERN_RANDOM,
 567                .procname       = "random",
 568                .mode           = 0555,
 569                .child          = random_table,
 570        },
 571#ifdef CONFIG_UNIX98_PTYS
 572        {
 573                .ctl_name       = KERN_PTY,
 574                .procname       = "pty",
 575                .mode           = 0555,
 576                .child          = pty_table,
 577        },
 578#endif
 579        {
 580                .ctl_name       = KERN_OVERFLOWUID,
 581                .procname       = "overflowuid",
 582                .data           = &overflowuid,
 583                .maxlen         = sizeof(int),
 584                .mode           = 0644,
 585                .proc_handler   = &proc_dointvec_minmax,
 586                .strategy       = &sysctl_intvec,
 587                .extra1         = &minolduid,
 588                .extra2         = &maxolduid,
 589        },
 590        {
 591                .ctl_name       = KERN_OVERFLOWGID,
 592                .procname       = "overflowgid",
 593                .data           = &overflowgid,
 594                .maxlen         = sizeof(int),
 595                .mode           = 0644,
 596                .proc_handler   = &proc_dointvec_minmax,
 597                .strategy       = &sysctl_intvec,
 598                .extra1         = &minolduid,
 599                .extra2         = &maxolduid,
 600        },
 601#ifdef CONFIG_S390
 602#ifdef CONFIG_MATHEMU
 603        {
 604                .ctl_name       = KERN_IEEE_EMULATION_WARNINGS,
 605                .procname       = "ieee_emulation_warnings",
 606                .data           = &sysctl_ieee_emulation_warnings,
 607                .maxlen         = sizeof(int),
 608                .mode           = 0644,
 609                .proc_handler   = &proc_dointvec,
 610        },
 611#endif
 612#ifdef CONFIG_NO_IDLE_HZ
 613        {
 614                .ctl_name       = KERN_HZ_TIMER,
 615                .procname       = "hz_timer",
 616                .data           = &sysctl_hz_timer,
 617                .maxlen         = sizeof(int),
 618                .mode           = 0644,
 619                .proc_handler   = &proc_dointvec,
 620        },
 621#endif
 622        {
 623                .ctl_name       = KERN_S390_USER_DEBUG_LOGGING,
 624                .procname       = "userprocess_debug",
 625                .data           = &sysctl_userprocess_debug,
 626                .maxlen         = sizeof(int),
 627                .mode           = 0644,
 628                .proc_handler   = &proc_dointvec,
 629        },
 630#endif
 631        {
 632                .ctl_name       = KERN_PIDMAX,
 633                .procname       = "pid_max",
 634                .data           = &pid_max,
 635                .maxlen         = sizeof (int),
 636                .mode           = 0644,
 637                .proc_handler   = &proc_dointvec_minmax,
 638                .strategy       = sysctl_intvec,
 639                .extra1         = &pid_max_min,
 640                .extra2         = &pid_max_max,
 641        },
 642        {
 643                .ctl_name       = KERN_PANIC_ON_OOPS,
 644                .procname       = "panic_on_oops",
 645                .data           = &panic_on_oops,
 646                .maxlen         = sizeof(int),
 647                .mode           = 0644,
 648                .proc_handler   = &proc_dointvec,
 649        },
 650        {
 651                .ctl_name       = KERN_PRINTK_RATELIMIT,
 652                .procname       = "printk_ratelimit",
 653                .data           = &printk_ratelimit_jiffies,
 654                .maxlen         = sizeof(int),
 655                .mode           = 0644,
 656                .proc_handler   = &proc_dointvec_jiffies,
 657                .strategy       = &sysctl_jiffies,
 658        },
 659        {
 660                .ctl_name       = KERN_PRINTK_RATELIMIT_BURST,
 661                .procname       = "printk_ratelimit_burst",
 662                .data           = &printk_ratelimit_burst,
 663                .maxlen         = sizeof(int),
 664                .mode           = 0644,
 665                .proc_handler   = &proc_dointvec,
 666        },
 667        {
 668                .ctl_name       = KERN_NGROUPS_MAX,
 669                .procname       = "ngroups_max",
 670                .data           = &ngroups_max,
 671                .maxlen         = sizeof (int),
 672                .mode           = 0444,
 673                .proc_handler   = &proc_dointvec,
 674        },
 675#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
 676        {
 677                .ctl_name       = KERN_UNKNOWN_NMI_PANIC,
 678                .procname       = "unknown_nmi_panic",
 679                .data           = &unknown_nmi_panic,
 680                .maxlen         = sizeof (int),
 681                .mode           = 0644,
 682                .proc_handler   = &proc_dointvec,
 683        },
 684        {
 685                .ctl_name       = KERN_NMI_WATCHDOG,
 686                .procname       = "nmi_watchdog",
 687                .data           = &nmi_watchdog_enabled,
 688                .maxlen         = sizeof (int),
 689                .mode           = 0644,
 690                .proc_handler   = &proc_nmi_enabled,
 691        },
 692#endif
 693#if defined(CONFIG_X86)
 694        {
 695                .ctl_name       = KERN_PANIC_ON_NMI,
 696                .procname       = "panic_on_unrecovered_nmi",
 697                .data           = &panic_on_unrecovered_nmi,
 698                .maxlen         = sizeof(int),
 699                .mode           = 0644,
 700                .proc_handler   = &proc_dointvec,
 701        },
 702        {
 703                .ctl_name       = KERN_BOOTLOADER_TYPE,
 704                .procname       = "bootloader_type",
 705                .data           = &bootloader_type,
 706                .maxlen         = sizeof (int),
 707                .mode           = 0444,
 708                .proc_handler   = &proc_dointvec,
 709        },
 710#endif
 711#if defined(CONFIG_MMU)
 712        {
 713                .ctl_name       = KERN_RANDOMIZE,
 714                .procname       = "randomize_va_space",
 715                .data           = &randomize_va_space,
 716                .maxlen         = sizeof(int),
 717                .mode           = 0644,
 718                .proc_handler   = &proc_dointvec,
 719        },
 720#endif
 721#if defined(CONFIG_S390) && defined(CONFIG_SMP)
 722        {
 723                .ctl_name       = KERN_SPIN_RETRY,
 724                .procname       = "spin_retry",
 725                .data           = &spin_retry,
 726                .maxlen         = sizeof (int),
 727                .mode           = 0644,
 728                .proc_handler   = &proc_dointvec,
 729        },
 730#endif
 731#ifdef CONFIG_ACPI_SLEEP
 732        {
 733                .ctl_name       = KERN_ACPI_VIDEO_FLAGS,
 734                .procname       = "acpi_video_flags",
 735                .data           = &acpi_video_flags,
 736                .maxlen         = sizeof (unsigned long),
 737                .mode           = 0644,
 738                .proc_handler   = &proc_doulongvec_minmax,
 739        },
 740#endif
 741#ifdef CONFIG_IA64
 742        {
 743                .ctl_name       = KERN_IA64_UNALIGNED,
 744                .procname       = "ignore-unaligned-usertrap",
 745                .data           = &no_unaligned_warning,
 746                .maxlen         = sizeof (int),
 747                .mode           = 0644,
 748                .proc_handler   = &proc_dointvec,
 749        },
 750#endif
 751#ifdef CONFIG_COMPAT
 752        {
 753                .ctl_name       = KERN_COMPAT_LOG,
 754                .procname       = "compat-log",
 755                .data           = &compat_log,
 756                .maxlen         = sizeof (int),
 757                .mode           = 0644,
 758                .proc_handler   = &proc_dointvec,
 759        },
 760#endif
 761#ifdef CONFIG_RT_MUTEXES
 762        {
 763                .ctl_name       = KERN_MAX_LOCK_DEPTH,
 764                .procname       = "max_lock_depth",
 765                .data           = &max_lock_depth,
 766                .maxlen         = sizeof(int),
 767                .mode           = 0644,
 768                .proc_handler   = &proc_dointvec,
 769        },
 770#endif
 771
 772        { .ctl_name = 0 }
 773};
 774
 775/* Constants for minimum and maximum testing in vm_table.
 776   We use these as one-element integer vectors. */
 777static int zero;
 778static int one_hundred = 100;
 779
 780
 781static ctl_table vm_table[] = {
 782        {
 783                .ctl_name       = VM_OVERCOMMIT_MEMORY,
 784                .procname       = "overcommit_memory",
 785                .data           = &sysctl_overcommit_memory,
 786                .maxlen         = sizeof(sysctl_overcommit_memory),
 787                .mode           = 0644,
 788                .proc_handler   = &proc_dointvec,
 789        },
 790        {
 791                .ctl_name       = VM_PANIC_ON_OOM,
 792                .procname       = "panic_on_oom",
 793                .data           = &sysctl_panic_on_oom,
 794                .maxlen         = sizeof(sysctl_panic_on_oom),
 795                .mode           = 0644,
 796                .proc_handler   = &proc_dointvec,
 797        },
 798        {
 799                .ctl_name       = VM_OVERCOMMIT_RATIO,
 800                .procname       = "overcommit_ratio",
 801                .data           = &sysctl_overcommit_ratio,
 802                .maxlen         = sizeof(sysctl_overcommit_ratio),
 803                .mode           = 0644,
 804                .proc_handler   = &proc_dointvec,
 805        },
 806        {
 807                .ctl_name       = VM_PAGE_CLUSTER,
 808                .procname       = "page-cluster", 
 809                .data           = &page_cluster,
 810                .maxlen         = sizeof(int),
 811                .mode           = 0644,
 812                .proc_handler   = &proc_dointvec,
 813        },
 814        {
 815                .ctl_name       = VM_DIRTY_BACKGROUND,
 816                .procname       = "dirty_background_ratio",
 817                .data           = &dirty_background_ratio,
 818                .maxlen         = sizeof(dirty_background_ratio),
 819                .mode           = 0644,
 820                .proc_handler   = &proc_dointvec_minmax,
 821                .strategy       = &sysctl_intvec,
 822                .extra1         = &zero,
 823                .extra2         = &one_hundred,
 824        },
 825        {
 826                .ctl_name       = VM_DIRTY_RATIO,
 827                .procname       = "dirty_ratio",
 828                .data           = &vm_dirty_ratio,
 829                .maxlen         = sizeof(vm_dirty_ratio),
 830                .mode           = 0644,
 831                .proc_handler   = &proc_dointvec_minmax,
 832                .strategy       = &sysctl_intvec,
 833                .extra1         = &zero,
 834                .extra2         = &one_hundred,
 835        },
 836        {
 837                .ctl_name       = VM_DIRTY_WB_CS,
 838                .procname       = "dirty_writeback_centisecs",
 839                .data           = &dirty_writeback_interval,
 840                .maxlen         = sizeof(dirty_writeback_interval),
 841                .mode           = 0644,
 842                .proc_handler   = &dirty_writeback_centisecs_handler,
 843        },
 844        {
 845                .ctl_name       = VM_DIRTY_EXPIRE_CS,
 846                .procname       = "dirty_expire_centisecs",
 847                .data           = &dirty_expire_interval,
 848                .maxlen         = sizeof(dirty_expire_interval),
 849                .mode           = 0644,
 850                .proc_handler   = &proc_dointvec_userhz_jiffies,
 851        },
 852        {
 853                .ctl_name       = VM_NR_PDFLUSH_THREADS,
 854                .procname       = "nr_pdflush_threads",
 855                .data           = &nr_pdflush_threads,
 856                .maxlen         = sizeof nr_pdflush_threads,
 857                .mode           = 0444 /* read-only*/,
 858                .proc_handler   = &proc_dointvec,
 859        },
 860        {
 861                .ctl_name       = VM_SWAPPINESS,
 862                .procname       = "swappiness",
 863                .data           = &vm_swappiness,
 864                .maxlen         = sizeof(vm_swappiness),
 865                .mode           = 0644,
 866                .proc_handler   = &proc_dointvec_minmax,
 867                .strategy       = &sysctl_intvec,
 868                .extra1         = &zero,
 869                .extra2         = &one_hundred,
 870        },
 871#ifdef CONFIG_HUGETLB_PAGE
 872         {
 873                .ctl_name       = VM_HUGETLB_PAGES,
 874                .procname       = "nr_hugepages",
 875                .data           = &max_huge_pages,
 876                .maxlen         = sizeof(unsigned long),
 877                .mode           = 0644,
 878                .proc_handler   = &hugetlb_sysctl_handler,
 879                .extra1         = (void *)&hugetlb_zero,
 880                .extra2         = (void *)&hugetlb_infinity,
 881         },
 882         {
 883                .ctl_name       = VM_HUGETLB_GROUP,
 884                .procname       = "hugetlb_shm_group",
 885                .data           = &sysctl_hugetlb_shm_group,
 886                .maxlen         = sizeof(gid_t),
 887                .mode           = 0644,
 888                .proc_handler   = &proc_dointvec,
 889         },
 890#endif
 891        {
 892                .ctl_name       = VM_LOWMEM_RESERVE_RATIO,
 893                .procname       = "lowmem_reserve_ratio",
 894                .data           = &sysctl_lowmem_reserve_ratio,
 895                .maxlen         = sizeof(sysctl_lowmem_reserve_ratio),
 896                .mode           = 0644,
 897                .proc_handler   = &lowmem_reserve_ratio_sysctl_handler,
 898                .strategy       = &sysctl_intvec,
 899        },
 900        {
 901                .ctl_name       = VM_DROP_PAGECACHE,
 902                .procname       = "drop_caches",
 903                .data           = &sysctl_drop_caches,
 904                .maxlen         = sizeof(int),
 905                .mode           = 0644,
 906                .proc_handler   = drop_caches_sysctl_handler,
 907                .strategy       = &sysctl_intvec,
 908        },
 909        {
 910                .ctl_name       = VM_MIN_FREE_KBYTES,
 911                .procname       = "min_free_kbytes",
 912                .data           = &min_free_kbytes,
 913                .maxlen         = sizeof(min_free_kbytes),
 914                .mode           = 0644,
 915                .proc_handler   = &min_free_kbytes_sysctl_handler,
 916                .strategy       = &sysctl_intvec,
 917                .extra1         = &zero,
 918        },
 919        {
 920                .ctl_name       = VM_PERCPU_PAGELIST_FRACTION,
 921                .procname       = "percpu_pagelist_fraction",
 922                .data           = &percpu_pagelist_fraction,
 923                .maxlen         = sizeof(percpu_pagelist_fraction),
 924                .mode           = 0644,
 925                .proc_handler   = &percpu_pagelist_fraction_sysctl_handler,
 926                .strategy       = &sysctl_intvec,
 927                .extra1         = &min_percpu_pagelist_fract,
 928        },
 929#ifdef CONFIG_MMU
 930        {
 931                .ctl_name       = VM_MAX_MAP_COUNT,
 932                .procname       = "max_map_count",
 933                .data           = &sysctl_max_map_count,
 934                .maxlen         = sizeof(sysctl_max_map_count),
 935                .mode           = 0644,
 936                .proc_handler   = &proc_dointvec
 937        },
 938#endif
 939        {
 940                .ctl_name       = VM_LAPTOP_MODE,
 941                .procname       = "laptop_mode",
 942                .data           = &laptop_mode,
 943                .maxlen         = sizeof(laptop_mode),
 944                .mode           = 0644,
 945                .proc_handler   = &proc_dointvec_jiffies,
 946                .strategy       = &sysctl_jiffies,
 947        },
 948        {
 949                .ctl_name       = VM_BLOCK_DUMP,
 950                .procname       = "block_dump",
 951                .data           = &block_dump,
 952                .maxlen         = sizeof(block_dump),
 953                .mode           = 0644,
 954                .proc_handler   = &proc_dointvec,
 955                .strategy       = &sysctl_intvec,
 956                .extra1         = &zero,
 957        },
 958        {
 959                .ctl_name       = VM_VFS_CACHE_PRESSURE,
 960                .procname       = "vfs_cache_pressure",
 961                .data           = &sysctl_vfs_cache_pressure,
 962                .maxlen         = sizeof(sysctl_vfs_cache_pressure),
 963                .mode           = 0644,
 964                .proc_handler   = &proc_dointvec,
 965                .strategy       = &sysctl_intvec,
 966                .extra1         = &zero,
 967        },
 968#ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
 969        {
 970                .ctl_name       = VM_LEGACY_VA_LAYOUT,
 971                .procname       = "legacy_va_layout",
 972                .data           = &sysctl_legacy_va_layout,
 973                .maxlen         = sizeof(sysctl_legacy_va_layout),
 974                .mode           = 0644,
 975                .proc_handler   = &proc_dointvec,
 976                .strategy       = &sysctl_intvec,
 977                .extra1         = &zero,
 978        },
 979#endif
 980#ifdef CONFIG_SWAP
 981        {
 982                .ctl_name       = VM_SWAP_TOKEN_TIMEOUT,
 983                .procname       = "swap_token_timeout",
 984                .data           = &swap_token_default_timeout,
 985                .maxlen         = sizeof(swap_token_default_timeout),
 986                .mode           = 0644,
 987                .proc_handler   = &proc_dointvec_jiffies,
 988                .strategy       = &sysctl_jiffies,
 989        },
 990#endif
 991#ifdef CONFIG_NUMA
 992        {
 993                .ctl_name       = VM_ZONE_RECLAIM_MODE,
 994                .procname       = "zone_reclaim_mode",
 995                .data           = &zone_reclaim_mode,
 996                .maxlen         = sizeof(zone_reclaim_mode),
 997                .mode           = 0644,
 998                .proc_handler   = &proc_dointvec,
 999                .strategy       = &sysctl_intvec,
1000                .extra1         = &zero,
1001        },
1002        {
1003                .ctl_name       = VM_MIN_UNMAPPED,
1004                .procname       = "min_unmapped_ratio",
1005                .data           = &sysctl_min_unmapped_ratio,
1006                .maxlen         = sizeof(sysctl_min_unmapped_ratio),
1007                .mode           = 0644,
1008                .proc_handler   = &sysctl_min_unmapped_ratio_sysctl_handler,
1009                .strategy       = &sysctl_intvec,
1010                .extra1         = &zero,
1011                .extra2         = &one_hundred,
1012        },
1013        {
1014                .ctl_name       = VM_MIN_SLAB,
1015                .procname       = "min_slab_ratio",
1016                .data           = &sysctl_min_slab_ratio,
1017                .maxlen         = sizeof(sysctl_min_slab_ratio),
1018                .mode           = 0644,
1019                .proc_handler   = &sysctl_min_slab_ratio_sysctl_handler,
1020                .strategy       = &sysctl_intvec,
1021                .extra1         = &zero,
1022                .extra2         = &one_hundred,
1023        },
1024#endif
1025#ifdef CONFIG_X86_32
1026        {
1027                .ctl_name       = VM_VDSO_ENABLED,
1028                .procname       = "vdso_enabled",
1029                .data           = &vdso_enabled,
1030                .maxlen         = sizeof(vdso_enabled),
1031                .mode           = 0644,
1032                .proc_handler   = &proc_dointvec,
1033                .strategy       = &sysctl_intvec,
1034                .extra1         = &zero,
1035        },
1036#endif
1037        { .ctl_name = 0 }
1038};
1039
1040static ctl_table fs_table[] = {
1041        {
1042                .ctl_name       = FS_NRINODE,
1043                .procname       = "inode-nr",
1044                .data           = &inodes_stat,
1045                .maxlen         = 2*sizeof(int),
1046                .mode           = 0444,
1047                .proc_handler   = &proc_dointvec,
1048        },
1049        {
1050                .ctl_name       = FS_STATINODE,
1051                .procname       = "inode-state",
1052                .data           = &inodes_stat,
1053                .maxlen         = 7*sizeof(int),
1054                .mode           = 0444,
1055                .proc_handler   = &proc_dointvec,
1056        },
1057        {
1058                .ctl_name       = FS_NRFILE,
1059                .procname       = "file-nr",
1060                .data           = &files_stat,
1061                .maxlen         = 3*sizeof(int),
1062                .mode           = 0444,
1063                .proc_handler   = &proc_nr_files,
1064        },
1065        {
1066                .ctl_name       = FS_MAXFILE,
1067                .procname       = "file-max",
1068                .data           = &files_stat.max_files,
1069                .maxlen         = sizeof(int),
1070                .mode           = 0644,
1071                .proc_handler   = &proc_dointvec,
1072        },
1073        {
1074                .ctl_name       = FS_DENTRY,
1075                .procname       = "dentry-state",
1076                .data           = &dentry_stat,
1077                .maxlen         = 6*sizeof(int),
1078                .mode           = 0444,
1079                .proc_handler   = &proc_dointvec,
1080        },
1081        {
1082                .ctl_name       = FS_OVERFLOWUID,
1083                .procname       = "overflowuid",
1084                .data           = &fs_overflowuid,
1085                .maxlen         = sizeof(int),
1086                .mode           = 0644,
1087                .proc_handler   = &proc_dointvec_minmax,
1088                .strategy       = &sysctl_intvec,
1089                .extra1         = &minolduid,
1090                .extra2         = &maxolduid,
1091        },
1092        {
1093                .ctl_name       = FS_OVERFLOWGID,
1094                .procname       = "overflowgid",
1095                .data           = &fs_overflowgid,
1096                .maxlen         = sizeof(int),
1097                .mode           = 0644,
1098                .proc_handler   = &proc_dointvec_minmax,
1099                .strategy       = &sysctl_intvec,
1100                .extra1         = &minolduid,
1101                .extra2         = &maxolduid,
1102        },
1103        {
1104                .ctl_name       = FS_LEASES,
1105                .procname       = "leases-enable",
1106                .data           = &leases_enable,
1107                .maxlen         = sizeof(int),
1108                .mode           = 0644,
1109                .proc_handler   = &proc_dointvec,
1110        },
1111#ifdef CONFIG_DNOTIFY
1112        {
1113                .ctl_name       = FS_DIR_NOTIFY,
1114                .procname       = "dir-notify-enable",
1115                .data           = &dir_notify_enable,
1116                .maxlen         = sizeof(int),
1117                .mode           = 0644,
1118                .proc_handler   = &proc_dointvec,
1119        },
1120#endif
1121#ifdef CONFIG_MMU
1122        {
1123                .ctl_name       = FS_LEASE_TIME,
1124                .procname       = "lease-break-time",
1125                .data           = &lease_break_time,
1126                .maxlen         = sizeof(int),
1127                .mode           = 0644,
1128                .proc_handler   = &proc_dointvec,
1129        },
1130        {
1131                .ctl_name       = FS_AIO_NR,
1132                .procname       = "aio-nr",
1133                .data           = &aio_nr,
1134                .maxlen         = sizeof(aio_nr),
1135                .mode           = 0444,
1136                .proc_handler   = &proc_doulongvec_minmax,
1137        },
1138        {
1139                .ctl_name       = FS_AIO_MAX_NR,
1140                .procname       = "aio-max-nr",
1141                .data           = &aio_max_nr,
1142                .maxlen         = sizeof(aio_max_nr),
1143                .mode           = 0644,
1144                .proc_handler   = &proc_doulongvec_minmax,
1145        },
1146#ifdef CONFIG_INOTIFY_USER
1147        {
1148                .ctl_name       = FS_INOTIFY,
1149                .procname       = "inotify",
1150                .mode           = 0555,
1151                .child          = inotify_table,
1152        },
1153#endif  
1154#endif
1155        {
1156                .ctl_name       = KERN_SETUID_DUMPABLE,
1157                .procname       = "suid_dumpable",
1158                .data           = &suid_dumpable,
1159                .maxlen         = sizeof(int),
1160                .mode           = 0644,
1161                .proc_handler   = &proc_dointvec,
1162        },
1163        { .ctl_name = 0 }
1164};
1165
1166static ctl_table debug_table[] = {
1167        { .ctl_name = 0 }
1168};
1169
1170static ctl_table dev_table[] = {
1171        { .ctl_name = 0 }
1172};
1173
1174extern void init_irq_proc (void);
1175
1176static DEFINE_SPINLOCK(sysctl_lock);
1177
1178/* called under sysctl_lock */
1179static int use_table(struct ctl_table_header *p)
1180{
1181        if (unlikely(p->unregistering))
1182                return 0;
1183        p->used++;
1184        return 1;
1185}
1186
1187/* called under sysctl_lock */
1188static void unuse_table(struct ctl_table_header *p)
1189{
1190        if (!--p->used)
1191                if (unlikely(p->unregistering))
1192                        complete(p->unregistering);
1193}
1194
1195/* called under sysctl_lock, will reacquire if has to wait */
1196static void start_unregistering(struct ctl_table_header *p)
1197{
1198        /*
1199         * if p->used is 0, nobody will ever touch that entry again;
1200         * we'll eliminate all paths to it before dropping sysctl_lock
1201         */
1202        if (unlikely(p->used)) {
1203                struct completion wait;
1204                init_completion(&wait);
1205                p->unregistering = &wait;
1206                spin_unlock(&sysctl_lock);
1207                wait_for_completion(&wait);
1208                spin_lock(&sysctl_lock);
1209        }
1210        /*
1211         * do not remove from the list until nobody holds it; walking the
1212         * list in do_sysctl() relies on that.
1213         */
1214        list_del_init(&p->ctl_entry);
1215}
1216
1217void __init sysctl_init(void)
1218{
1219#ifdef CONFIG_PROC_SYSCTL
1220        register_proc_table(root_table, proc_sys_root, &root_table_header);
1221        init_irq_proc();
1222#endif
1223}
1224
1225#ifdef CONFIG_SYSCTL_SYSCALL
1226int do_sysctl(int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp,
1227               void __user *newval, size_t newlen)
1228{
1229        struct list_head *tmp;
1230        int error = -ENOTDIR;
1231
1232        if (nlen <= 0 || nlen >= CTL_MAXNAME)
1233                return -ENOTDIR;
1234        if (oldval) {
1235                int old_len;
1236                if (!oldlenp || get_user(old_len, oldlenp))
1237                        return -EFAULT;
1238        }
1239        spin_lock(&sysctl_lock);
1240        tmp = &root_table_header.ctl_entry;
1241        do {
1242                struct ctl_table_header *head =
1243                        list_entry(tmp, struct ctl_table_header, ctl_entry);
1244                void *context = NULL;
1245
1246                if (!use_table(head))
1247                        continue;
1248
1249                spin_unlock(&sysctl_lock);
1250
1251                error = parse_table(name, nlen, oldval, oldlenp, 
1252                                        newval, newlen, head->ctl_table,
1253                                        &context);
1254                kfree(context);
1255
1256                spin_lock(&sysctl_lock);
1257                unuse_table(head);
1258                if (error != -ENOTDIR)
1259                        break;
1260        } while ((tmp = tmp->next) != &root_table_header.ctl_entry);
1261        spin_unlock(&sysctl_lock);
1262        return error;
1263}
1264
1265asmlinkage long sys_sysctl(struct __sysctl_args __user *args)
1266{
1267        struct __sysctl_args tmp;
1268        int error;
1269
1270        if (copy_from_user(&tmp, args, sizeof(tmp)))
1271                return -EFAULT;
1272
1273        lock_kernel();
1274        error = do_sysctl(tmp.name, tmp.nlen, tmp.oldval, tmp.oldlenp,
1275                          tmp.newval, tmp.newlen);
1276        unlock_kernel();
1277        return error;
1278}
1279#endif /* CONFIG_SYSCTL_SYSCALL */
1280
1281/*
1282 * ctl_perm does NOT grant the superuser all rights automatically, because
1283 * some sysctl variables are readonly even to root.
1284 */
1285
1286static int test_perm(int mode, int op)
1287{
1288        if (!current->euid)
1289                mode >>= 6;
1290        else if (in_egroup_p(0))
1291                mode >>= 3;
1292        if ((mode & op & 0007) == op)
1293                return 0;
1294        return -EACCES;
1295}
1296
1297static inline int ctl_perm(ctl_table *table, int op)
1298{
1299        int error;
1300        error = security_sysctl(table, op);
1301        if (error)
1302                return error;
1303        return test_perm(table->mode, op);
1304}
1305
1306#ifdef CONFIG_SYSCTL_SYSCALL
1307static int parse_table(int __user *name, int nlen,
1308                       void __user *oldval, size_t __user *oldlenp,
1309                       void __user *newval, size_t newlen,
1310                       ctl_table *table, void **context)
1311{
1312        int n;
1313repeat:
1314        if (!nlen)
1315                return -ENOTDIR;
1316        if (get_user(n, name))
1317                return -EFAULT;
1318        for ( ; table->ctl_name || table->procname; table++) {
1319                if (!table->ctl_name)
1320                        continue;
1321                if (n == table->ctl_name || table->ctl_name == CTL_ANY) {
1322                        int error;
1323                        if (table->child) {
1324                                if (ctl_perm(table, 001))
1325                                        return -EPERM;
1326                                if (table->strategy) {
1327                                        error = table->strategy(
1328                                                table, name, nlen,
1329                                                oldval, oldlenp,
1330                                                newval, newlen, context);
1331                                        if (error)
1332                                                return error;
1333                                }
1334                                name++;
1335                                nlen--;
1336                                table = table->child;
1337                                goto repeat;
1338                        }
1339                        error = do_sysctl_strategy(table, name, nlen,
1340                                                   oldval, oldlenp,
1341                                                   newval, newlen, context);
1342                        return error;
1343                }
1344        }
1345        return -ENOTDIR;
1346}
1347
1348/* Perform the actual read/write of a sysctl table entry. */
1349int do_sysctl_strategy (ctl_table *table, 
1350                        int __user *name, int nlen,
1351                        void __user *oldval, size_t __user *oldlenp,
1352                        void __user *newval, size_t newlen, void **context)
1353{
1354        int op = 0, rc;
1355        size_t len;
1356
1357        if (oldval)
1358                op |= 004;
1359        if (newval) 
1360                op |= 002;
1361        if (ctl_perm(table, op))
1362                return -EPERM;
1363
1364        if (table->strategy) {
1365                rc = table->strategy(table, name, nlen, oldval, oldlenp,
1366                                     newval, newlen, context);
1367                if (rc < 0)
1368                        return rc;
1369                if (rc > 0)
1370                        return 0;
1371        }
1372
1373        /* If there is no strategy routine, or if the strategy returns
1374         * zero, proceed with automatic r/w */
1375        if (table->data && table->maxlen) {
1376                if (oldval && oldlenp) {
1377                        if (get_user(len, oldlenp))
1378                                return -EFAULT;
1379                        if (len) {
1380                                if (len > table->maxlen)
1381                                        len = table->maxlen;
1382                                if(copy_to_user(oldval, table->data, len))
1383                                        return -EFAULT;
1384                                if(put_user(len, oldlenp))
1385                                        return -EFAULT;
1386                        }
1387                }
1388                if (newval && newlen) {
1389                        len = newlen;
1390                        if (len > table->maxlen)
1391                                len = table->maxlen;
1392                        if(copy_from_user(table->data, newval, len))
1393                                return -EFAULT;
1394                }
1395        }
1396        return 0;
1397}
1398#endif /* CONFIG_SYSCTL_SYSCALL */
1399
1400/**
1401 * register_sysctl_table - register a sysctl hierarchy
1402 * @table: the top-level table structure
1403 * @insert_at_head: whether the entry should be inserted in front or at the end
1404 *
1405 * Register a sysctl table hierarchy. @table should be a filled in ctl_table
1406 * array. An entry with a ctl_name of 0 terminates the table. 
1407 *
1408 * The members of the &ctl_table structure are used as follows:
1409 *
1410 * ctl_name - This is the numeric sysctl value used by sysctl(2). The number
1411 *            must be unique within that level of sysctl
1412 *
1413 * procname - the name of the sysctl file under /proc/sys. Set to %NULL to not
1414 *            enter a sysctl file
1415 *
1416 * data - a pointer to data for use by proc_handler
1417 *
1418 * maxlen - the maximum size in bytes of the data
1419 *
1420 * mode - the file permissions for the /proc/sys file, and for sysctl(2)
1421 *
1422 * child - a pointer to the child sysctl table if this entry is a directory, or
1423 *         %NULL.
1424 *
1425 * proc_handler - the text handler routine (described below)
1426 *
1427 * strategy - the strategy routine (described below)
1428 *
1429 * de - for internal use by the sysctl routines
1430 *
1431 * extra1, extra2 - extra pointers usable by the proc handler routines
1432 *
1433 * Leaf nodes in the sysctl tree will be represented by a single file
1434 * under /proc; non-leaf nodes will be represented by directories.
1435 *
1436 * sysctl(2) can automatically manage read and write requests through
1437 * the sysctl table.  The data and maxlen fields of the ctl_table
1438 * struct enable minimal validation of the values being written to be
1439 * performed, and the mode field allows minimal authentication.
1440 *
1441 * More sophisticated management can be enabled by the provision of a
1442 * strategy routine with the table entry.  This will be called before
1443 * any automatic read or write of the data is performed.
1444 *
1445 * The strategy routine may return
1446 *
1447 * < 0 - Error occurred (error is passed to user process)
1448 *
1449 * 0   - OK - proceed with automatic read or write.
1450 *
1451 * > 0 - OK - read or write has been done by the strategy routine, so
1452 *       return immediately.
1453 *
1454 * There must be a proc_handler routine for any terminal nodes
1455 * mirrored under /proc/sys (non-terminals are handled by a built-in
1456 * directory handler).  Several default handlers are available to
1457 * cover common cases -
1458 *
1459 * proc_dostring(), proc_dointvec(), proc_dointvec_jiffies(),
1460 * proc_dointvec_userhz_jiffies(), proc_dointvec_minmax(), 
1461 * proc_doulongvec_ms_jiffies_minmax(), proc_doulongvec_minmax()
1462 *
1463 * It is the handler's job to read the input buffer from user memory
1464 * and process it. The handler should return 0 on success.
1465 *
1466 * This routine returns %NULL on a failure to register, and a pointer
1467 * to the table header on success.
1468 */
1469struct ctl_table_header *register_sysctl_table(ctl_table * table, 
1470                                               int insert_at_head)
1471{
1472        struct ctl_table_header *tmp;
1473        tmp = kmalloc(sizeof(struct ctl_table_header), GFP_KERNEL);
1474        if (!tmp)
1475                return NULL;
1476        tmp->ctl_table = table;
1477        INIT_LIST_HEAD(&tmp->ctl_entry);
1478        tmp->used = 0;
1479        tmp->unregistering = NULL;
1480        spin_lock(&sysctl_lock);
1481        if (insert_at_head)
1482                list_add(&tmp->ctl_entry, &root_table_header.ctl_entry);
1483        else
1484                list_add_tail(&tmp->ctl_entry, &root_table_header.ctl_entry);
1485        spin_unlock(&sysctl_lock);
1486#ifdef CONFIG_PROC_SYSCTL
1487        register_proc_table(table, proc_sys_root, tmp);
1488#endif
1489        return tmp;
1490}
1491
1492/**
1493 * unregister_sysctl_table - unregister a sysctl table hierarchy
1494 * @header: the header returned from register_sysctl_table
1495 *
1496 * Unregisters the sysctl table and all children. proc entries may not
1497 * actually be removed until they are no longer used by anyone.
1498 */
1499void unregister_sysctl_table(struct ctl_table_header * header)
1500{
1501        might_sleep();
1502        spin_lock(&sysctl_lock);
1503        start_unregistering(header);
1504#ifdef CONFIG_PROC_SYSCTL
1505        unregister_proc_table(header->ctl_table, proc_sys_root);
1506#endif
1507        spin_unlock(&sysctl_lock);
1508        kfree(header);
1509}
1510
1511#else /* !CONFIG_SYSCTL */
1512struct ctl_table_header * register_sysctl_table(ctl_table * table,
1513                                                int insert_at_head)
1514{
1515        return NULL;
1516}
1517
1518void unregister_sysctl_table(struct ctl_table_header * table)
1519{
1520}
1521
1522#endif /* CONFIG_SYSCTL */
1523
1524/*
1525 * /proc/sys support
1526 */
1527
1528#ifdef CONFIG_PROC_SYSCTL
1529
1530/* Scan the sysctl entries in table and add them all into /proc */
1531static void register_proc_table(ctl_table * table, struct proc_dir_entry *root, void *set)
1532{
1533        struct proc_dir_entry *de;
1534        int len;
1535        mode_t mode;
1536        
1537        for (; table->ctl_name || table->procname; table++) {
1538                /* Can't do anything without a proc name. */
1539                if (!table->procname)
1540                        continue;
1541                /* Maybe we can't do anything with it... */
1542                if (!table->proc_handler && !table->child) {
1543                        printk(KERN_WARNING "SYSCTL: Can't register %s\n",
1544                                table->procname);
1545                        continue;
1546                }
1547
1548                len = strlen(table->procname);
1549                mode = table->mode;
1550
1551                de = NULL;
1552                if (table->proc_handler)
1553                        mode |= S_IFREG;
1554                else {
1555                        mode |= S_IFDIR;
1556                        for (de = root->subdir; de; de = de->next) {
1557                                if (proc_match(len, table->procname, de))
1558                                        break;
1559                        }
1560                        /* If the subdir exists already, de is non-NULL */
1561                }
1562
1563                if (!de) {
1564                        de = create_proc_entry(table->procname, mode, root);
1565                        if (!de)
1566                                continue;
1567                        de->set = set;
1568                        de->data = (void *) table;
1569                        if (table->proc_handler)
1570                                de->proc_fops = &proc_sys_file_operations;
1571                }
1572                table->de = de;
1573                if (de->mode & S_IFDIR)
1574                        register_proc_table(table->child, de, set);
1575        }
1576}
1577
1578/*
1579 * Unregister a /proc sysctl table and any subdirectories.
1580 */
1581static void unregister_proc_table(ctl_table * table, struct proc_dir_entry *root)
1582{
1583        struct proc_dir_entry *de;
1584        for (; table->ctl_name || table->procname; table++) {
1585                if (!(de = table->de))
1586                        continue;
1587                if (de->mode & S_IFDIR) {
1588                        if (!table->child) {
1589                                printk (KERN_ALERT "Help - malformed sysctl tree on free\n");
1590                                continue;
1591                        }
1592                        unregister_proc_table(table->child, de);
1593
1594                        /* Don't unregister directories which still have entries.. */
1595                        if (de->subdir)
1596                                continue;
1597                }
1598
1599                /*
1600                 * In any case, mark the entry as goner; we'll keep it
1601                 * around if it's busy, but we'll know to do nothing with
1602                 * its fields.  We are under sysctl_lock here.
1603                 */
1604                de->data = NULL;
1605
1606                /* Don't unregister proc entries that are still being used.. */
1607                if (atomic_read(&de->count))
1608                        continue;
1609
1610                table->de = NULL;
1611                remove_proc_entry(table->procname, root);
1612        }
1613}
1614
1615static ssize_t do_rw_proc(int write, struct file * file, char __user * buf,
1616                          size_t count, loff_t *ppos)
1617{
1618        int op;
1619        struct proc_dir_entry *de = PDE(file->f_dentry->d_inode);
1620        struct ctl_table *table;
1621        size_t res;
1622        ssize_t error = -ENOTDIR;
1623        
1624        spin_lock(&sysctl_lock);
1625        if (de && de->data && use_table(de->set)) {
1626                /*
1627                 * at that point we know that sysctl was not unregistered
1628                 * and won't be until we finish
1629                 */
1630                spin_unlock(&sysctl_lock);
1631                table = (struct ctl_table *) de->data;
1632                if (!table || !table->proc_handler)
1633                        goto out;
1634                error = -EPERM;
1635                op = (write ? 002 : 004);
1636                if (ctl_perm(table, op))
1637                        goto out;
1638                
1639                /* careful: calling conventions are nasty here */
1640                res = count;
1641                error = (*table->proc_handler)(table, write, file,
1642                                                buf, &res, ppos);
1643                if (!error)
1644                        error = res;
1645        out:
1646                spin_lock(&sysctl_lock);
1647                unuse_table(de->set);
1648        }
1649        spin_unlock(&sysctl_lock);
1650        return error;
1651}
1652
1653static int proc_opensys(struct inode *inode, struct file *file)
1654{
1655        if (file->f_mode & FMODE_WRITE) {
1656                /*
1657                 * sysctl entries that are not writable,
1658                 * are _NOT_ writable, capabilities or not.
1659                 */
1660                if (!(inode->i_mode & S_IWUSR))
1661                        return -EPERM;
1662        }
1663
1664        return 0;
1665}
1666
1667static ssize_t proc_readsys(struct file * file, char __user * buf,
1668                            size_t count, loff_t *ppos)
1669{
1670        return do_rw_proc(0, file, buf, count, ppos);
1671}
1672
1673static ssize_t proc_writesys(struct file * file, const char __user * buf,
1674                             size_t count, loff_t *ppos)
1675{
1676        return do_rw_proc(1, file, (char __user *) buf, count, ppos);
1677}
1678
1679static int _proc_do_string(void* data, int maxlen, int write,
1680                           struct file *filp, void __user *buffer,
1681                           size_t *lenp, loff_t *ppos)
1682{
1683        size_t len;
1684        char __user *p;
1685        char c;
1686        
1687        if (!data || !maxlen || !*lenp ||
1688            (*ppos && !write)) {
1689                *lenp = 0;
1690                return 0;
1691        }
1692        
1693        if (write) {
1694                len = 0;
1695                p = buffer;
1696                while (len < *lenp) {
1697                        if (get_user(c, p++))
1698                                return -EFAULT;
1699                        if (c == 0 || c == '\n')
1700                                break;
1701                        len++;
1702                }
1703                if (len >= maxlen)
1704                        len = maxlen-1;
1705                if(copy_from_user(data, buffer, len))
1706                        return -EFAULT;
1707                ((char *) data)[len] = 0;
1708                *ppos += *lenp;
1709        } else {
1710                len = strlen(data);
1711                if (len > maxlen)
1712                        len = maxlen;
1713                if (len > *lenp)
1714                        len = *lenp;
1715                if (len)
1716                        if(copy_to_user(buffer, data, len))
1717                                return -EFAULT;
1718                if (len < *lenp) {
1719                        if(put_user('\n', ((char __user *) buffer) + len))
1720                                return -EFAULT;
1721                        len++;
1722                }
1723                *lenp = len;
1724                *ppos += len;
1725        }
1726        return 0;
1727}
1728
1729/**
1730 * proc_dostring - read a string sysctl
1731 * @table: the sysctl table
1732 * @write: %TRUE if this is a write to the sysctl file
1733 * @filp: the file structure
1734 * @buffer: the user buffer
1735 * @lenp: the size of the user buffer
1736 * @ppos: file position
1737 *
1738 * Reads/writes a string from/to the user buffer. If the kernel
1739 * buffer provided is not large enough to hold the string, the
1740 * string is truncated. The copied string is %NULL-terminated.
1741 * If the string is being read by the user process, it is copied
1742 * and a newline '\n' is added. It is truncated if the buffer is
1743 * not large enough.
1744 *
1745 * Returns 0 on success.
1746 */
1747int proc_dostring(ctl_table *table, int write, struct file *filp,
1748                  void __user *buffer, size_t *lenp, loff_t *ppos)
1749{
1750        return _proc_do_string(table->data, table->maxlen, write, filp,
1751                               buffer, lenp, ppos);
1752}
1753
1754/*
1755 *      Special case of dostring for the UTS structure. This has locks
1756 *      to observe. Should this be in kernel/sys.c ????
1757 */
1758 
1759#ifndef CONFIG_UTS_NS
1760static int proc_do_uts_string(ctl_table *table, int write, struct file *filp,
1761                  void __user *buffer, size_t *lenp, loff_t *ppos)
1762{
1763        int r;
1764
1765        if (!write) {
1766                down_read(&uts_sem);
1767                r=proc_dostring(table,0,filp,buffer,lenp, ppos);
1768                up_read(&uts_sem);
1769        } else {
1770                down_write(&uts_sem);
1771                r=proc_dostring(table,1,filp,buffer,lenp, ppos);
1772                up_write(&uts_sem);
1773        }
1774        return r;
1775}
1776#else /* !CONFIG_UTS_NS */
1777static int proc_do_uts_string(ctl_table *table, int write, struct file *filp,
1778                  void __user *buffer, size_t *lenp, loff_t *ppos)
1779{
1780        int r;
1781        struct uts_namespace* uts_ns = current->nsproxy->uts_ns;
1782        char* which;
1783
1784        switch (table->ctl_name) {
1785        case KERN_OSTYPE:
1786                which = uts_ns->name.sysname;
1787                break;
1788        case KERN_NODENAME:
1789                which = uts_ns->name.nodename;
1790                break;
1791        case KERN_OSRELEASE:
1792                which = uts_ns->name.release;
1793                break;
1794        case KERN_VERSION:
1795                which = uts_ns->name.version;
1796                break;
1797        case KERN_DOMAINNAME:
1798                which = uts_ns->name.domainname;
1799                break;
1800        default:
1801                r = -EINVAL;
1802                goto out;
1803        }
1804
1805        if (!write) {
1806                down_read(&uts_sem);
1807                r=_proc_do_string(which,table->maxlen,0,filp,buffer,lenp, ppos);
1808                up_read(&uts_sem);
1809        } else {
1810                down_write(&uts_sem);
1811                r=_proc_do_string(which,table->maxlen,1,filp,buffer,lenp, ppos);
1812                up_write(&uts_sem);
1813        }
1814 out:
1815        return r;
1816}
1817#endif /* !CONFIG_UTS_NS */
1818
1819static int do_proc_dointvec_conv(int *negp, unsigned long *lvalp,
1820                                 int *valp,
1821                                 int write, void *data)
1822{
1823        if (write) {
1824                *valp = *negp ? -*lvalp : *lvalp;
1825        } else {
1826                int val = *valp;
1827                if (val < 0) {
1828                        *negp = -1;
1829                        *lvalp = (unsigned long)-val;
1830                } else {
1831                        *negp = 0;
1832                        *lvalp = (unsigned long)val;
1833                }
1834        }
1835        return 0;
1836}
1837
1838static int __do_proc_dointvec(void *tbl_data, ctl_table *table,
1839                  int write, struct file *filp, void __user *buffer,
1840                  size_t *lenp, loff_t *ppos,
1841                  int (*conv)(int *negp, unsigned long *lvalp, int *valp,
1842                              int write, void *data),
1843                  void *data)
1844{
1845#define TMPBUFLEN 21
1846        int *i, vleft, first=1, neg, val;
1847        unsigned long lval;
1848        size_t left, len;
1849        
1850        char buf[TMPBUFLEN], *p;
1851        char __user *s = buffer;
1852        
1853        if (!tbl_data || !table->maxlen || !*lenp ||
1854            (*ppos && !write)) {
1855                *lenp = 0;
1856                return 0;
1857        }
1858        
1859        i = (int *) tbl_data;
1860        vleft = table->maxlen / sizeof(*i);
1861        left = *lenp;
1862
1863        if (!conv)
1864                conv = do_proc_dointvec_conv;
1865
1866        for (; left && vleft--; i++, first=0) {
1867                if (write) {
1868                        while (left) {
1869                                char c;
1870                                if (get_user(c, s))
1871                                        return -EFAULT;
1872                                if (!isspace(c))
1873                                        break;
1874                                left--;
1875                                s++;
1876                        }
1877                        if (!left)
1878                                break;
1879                        neg = 0;
1880                        len = left;
1881                        if (len > sizeof(buf) - 1)
1882                                len = sizeof(buf) - 1;
1883                        if (copy_from_user(buf, s, len))
1884                                return -EFAULT;
1885                        buf[len] = 0;
1886                        p = buf;
1887                        if (*p == '-' && left > 1) {
1888                                neg = 1;
1889                                left--, p++;
1890                        }
1891                        if (*p < '0' || *p > '9')
1892                                break;
1893
1894                        lval = simple_strtoul(p, &p, 0);
1895
1896                        len = p-buf;
1897                        if ((len < left) && *p && !isspace(*p))
1898                                break;
1899                        if (neg)
1900                                val = -val;
1901                        s += len;
1902                        left -= len;
1903
1904                        if (conv(&neg, &lval, i, 1, data))
1905                                break;
1906                } else {
1907                        p = buf;
1908                        if (!first)
1909                                *p++ = '\t';
1910        
1911                        if (conv(&neg, &lval, i, 0, data))
1912                                break;
1913
1914                        sprintf(p, "%s%lu", neg ? "-" : "", lval);
1915                        len = strlen(buf);
1916                        if (len > left)
1917                                len = left;
1918                        if(copy_to_user(s, buf, len))
1919                                return -EFAULT;
1920                        left -= len;
1921                        s += len;
1922                }
1923        }
1924
1925        if (!write && !first && left) {
1926                if(put_user('\n', s))
1927                        return -EFAULT;
1928                left--, s++;
1929        }
1930        if (write) {
1931                while (left) {
1932                        char c;
1933                        if (get_user(c, s++))
1934                                return -EFAULT;
1935                        if (!isspace(c))
1936                                break;
1937                        left--;
1938                }
1939        }
1940        if (write && first)
1941                return -EINVAL;
1942        *lenp -= left;
1943        *ppos += *lenp;
1944        return 0;
1945#undef TMPBUFLEN
1946}
1947
1948static int do_proc_dointvec(ctl_table *table, int write, struct file *filp,
1949                  void __user *buffer, size_t *lenp, loff_t *ppos,
1950                  int (*conv)(int *negp, unsigned long *lvalp, int *valp,
1951                              int write, void *data),
1952                  void *data)
1953{
1954        return __do_proc_dointvec(table->data, table, write, filp,
1955                        buffer, lenp, ppos, conv, data);
1956}
1957
1958/**
1959 * proc_dointvec - read a vector of integers
1960 * @table: the sysctl table
1961 * @write: %TRUE if this is a write to the sysctl file
1962 * @filp: the file structure
1963 * @buffer: the user buffer
1964 * @lenp: the size of the user buffer
1965 * @ppos: file position
1966 *
1967 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
1968 * values from/to the user buffer, treated as an ASCII string. 
1969 *
1970 * Returns 0 on success.
1971 */
1972int proc_dointvec(ctl_table *table, int write, struct file *filp,
1973                     void __user *buffer, size_t *lenp, loff_t *ppos)
1974{
1975    return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
1976                            NULL,NULL);
1977}
1978
1979#define OP_SET  0
1980#define OP_AND  1
1981#define OP_OR   2
1982#define OP_MAX  3
1983#define OP_MIN  4
1984
1985static int do_proc_dointvec_bset_conv(int *negp, unsigned long *lvalp,
1986                                      int *valp,
1987                                      int write, void *data)
1988{
1989        int op = *(int *)data;
1990        if (write) {
1991                int val = *negp ? -*lvalp : *lvalp;
1992                switch(op) {
1993                case OP_SET:    *valp = val; break;
1994                case OP_AND:    *valp &= val; break;
1995                case OP_OR:     *valp |= val; break;
1996                case OP_MAX:    if(*valp < val)
1997                                        *valp = val;
1998                                break;
1999                case OP_MIN:    if(*valp > val)
2000                                *valp = val;
2001                                break;
2002                }
2003        } else {
2004                int val = *valp;
2005                if (val < 0) {
2006                        *negp = -1;
2007                        *lvalp = (unsigned long)-val;
2008                } else {
2009                        *negp = 0;
2010                        *lvalp = (unsigned long)val;
2011                }
2012        }
2013        return 0;
2014}
2015
2016/*
2017 *      init may raise the set.
2018 */
2019 
2020int proc_dointvec_bset(ctl_table *table, int write, struct file *filp,
2021                        void __user *buffer, size_t *lenp, loff_t *ppos)
2022{
2023        int op;
2024
2025        if (!capable(CAP_SYS_MODULE)) {
2026                return -EPERM;
2027        }
2028
2029        op = is_init(current) ? OP_SET : OP_AND;
2030        return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
2031                                do_proc_dointvec_bset_conv,&op);
2032}
2033
2034struct do_proc_dointvec_minmax_conv_param {
2035        int *min;
2036        int *max;
2037};
2038
2039static int do_proc_dointvec_minmax_conv(int *negp, unsigned long *lvalp, 
2040                                        int *valp, 
2041                                        int write, void *data)
2042{
2043        struct do_proc_dointvec_minmax_conv_param *param = data;
2044        if (write) {
2045                int val = *negp ? -*lvalp : *lvalp;
2046                if ((param->min && *param->min > val) ||
2047                    (param->max && *param->max < val))
2048                        return -EINVAL;
2049                *valp = val;
2050        } else {
2051                int val = *valp;
2052                if (val < 0) {
2053                        *negp = -1;
2054                        *lvalp = (unsigned long)-val;
2055                } else {
2056                        *negp = 0;
2057                        *lvalp = (unsigned long)val;
2058                }
2059        }
2060        return 0;
2061}
2062
2063/**
2064 * proc_dointvec_minmax - read a vector of integers with min/max values
2065 * @table: the sysctl table
2066 * @write: %TRUE if this is a write to the sysctl file
2067 * @filp: the file structure
2068 * @buffer: the user buffer
2069 * @lenp: the size of the user buffer
2070 * @ppos: file position
2071 *
2072 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2073 * values from/to the user buffer, treated as an ASCII string.
2074 *
2075 * This routine will ensure the values are within the range specified by
2076 * table->extra1 (min) and table->extra2 (max).
2077 *
2078 * Returns 0 on success.
2079 */
2080int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp,
2081                  void __user *buffer, size_t *lenp, loff_t *ppos)
2082{
2083        struct do_proc_dointvec_minmax_conv_param param = {
2084                .min = (int *) table->extra1,
2085                .max = (int *) table->extra2,
2086        };
2087        return do_proc_dointvec(table, write, filp, buffer, lenp, ppos,
2088                                do_proc_dointvec_minmax_conv, &param);
2089}
2090
2091static int __do_proc_doulongvec_minmax(void *data, ctl_table *table, int write,
2092                                     struct file *filp,
2093                                     void __user *buffer,
2094                                     size_t *lenp, loff_t *ppos,
2095                                     unsigned long convmul,
2096                                     unsigned long convdiv)
2097{
2098#define TMPBUFLEN 21
2099        unsigned long *i, *min, *max, val;
2100        int vleft, first=1, neg;
2101        size_t len, left;
2102        char buf[TMPBUFLEN], *p;
2103        char __user *s = buffer;
2104        
2105        if (!data || !table->maxlen || !*lenp ||
2106            (*ppos && !write)) {
2107                *lenp = 0;
2108                return 0;
2109        }
2110        
2111        i = (unsigned long *) data;
2112        min = (unsigned long *) table->extra1;
2113        max = (unsigned long *) table->extra2;
2114        vleft = table->maxlen / sizeof(unsigned long);
2115        left = *lenp;
2116        
2117        for (; left && vleft--; i++, min++, max++, first=0) {
2118                if (write) {
2119                        while (left) {
2120                                char c;
2121                                if (get_user(c, s))
2122                                        return -EFAULT;
2123                                if (!isspace(c))
2124                                        break;
2125                                left--;
2126                                s++;
2127                        }
2128                        if (!left)
2129                                break;
2130                        neg = 0;
2131                        len = left;
2132                        if (len > TMPBUFLEN-1)
2133                                len = TMPBUFLEN-1;
2134                        if (copy_from_user(buf, s, len))
2135                                return -EFAULT;
2136                        buf[len] = 0;
2137                        p = buf;
2138                        if (*p == '-' && left > 1) {
2139                                neg = 1;
2140                                left--, p++;
2141                        }
2142                        if (*p < '0' || *p > '9')
2143                                break;
2144                        val = simple_strtoul(p, &p, 0) * convmul / convdiv ;
2145                        len = p-buf;
2146                        if ((len < left) && *p && !isspace(*p))
2147                                break;
2148                        if (neg)
2149                                val = -val;
2150                        s += len;
2151                        left -= len;
2152
2153                        if(neg)
2154                                continue;
2155                        if ((min && val < *min) || (max && val > *max))
2156                                continue;
2157                        *i = val;
2158                } else {
2159                        p = buf;
2160                        if (!first)
2161                                *p++ = '\t';
2162                        sprintf(p, "%lu", convdiv * (*i) / convmul);
2163                        len = strlen(buf);
2164                        if (len > left)
2165                                len = left;
2166                        if(copy_to_user(s, buf, len))
2167                                return -EFAULT;
2168                        left -= len;
2169                        s += len;
2170                }
2171        }
2172
2173        if (!write && !first && left) {
2174                if(put_user('\n', s))
2175                        return -EFAULT;
2176                left--, s++;
2177        }
2178        if (write) {
2179                while (left) {
2180                        char c;
2181                        if (get_user(c, s++))
2182                                return -EFAULT;
2183                        if (!isspace(c))
2184                                break;
2185                        left--;
2186                }
2187        }
2188        if (write && first)
2189                return -EINVAL;
2190        *lenp -= left;
2191        *ppos += *lenp;
2192        return 0;
2193#undef TMPBUFLEN
2194}
2195
2196static int do_proc_doulongvec_minmax(ctl_table *table, int write,
2197                                     struct file *filp,
2198                                     void __user *buffer,
2199                                     size_t *lenp, loff_t *ppos,
2200                                     unsigned long convmul,
2201                                     unsigned long convdiv)
2202{
2203        return __do_proc_doulongvec_minmax(table->data, table, write,
2204                        filp, buffer, lenp, ppos, convmul, convdiv);
2205}
2206
2207/**
2208 * proc_doulongvec_minmax - read a vector of long integers with min/max values
2209 * @table: the sysctl table
2210 * @write: %TRUE if this is a write to the sysctl file
2211 * @filp: the file structure
2212 * @buffer: the user buffer
2213 * @lenp: the size of the user buffer
2214 * @ppos: file position
2215 *
2216 * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2217 * values from/to the user buffer, treated as an ASCII string.
2218 *
2219 * This routine will ensure the values are within the range specified by
2220 * table->extra1 (min) and table->extra2 (max).
2221 *
2222 * Returns 0 on success.
2223 */
2224int proc_doulongvec_minmax(ctl_table *table, int write, struct file *filp,
2225                           void __user *buffer, size_t *lenp, loff_t *ppos)
2226{
2227    return do_proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos, 1l, 1l);
2228}
2229
2230/**
2231 * proc_doulongvec_ms_jiffies_minmax - read a vector of millisecond values with min/max values
2232 * @table: the sysctl table
2233 * @write: %TRUE if this is a write to the sysctl file
2234 * @filp: the file structure
2235 * @buffer: the user buffer
2236 * @lenp: the size of the user buffer
2237 * @ppos: file position
2238 *
2239 * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2240 * values from/to the user buffer, treated as an ASCII string. The values
2241 * are treated as milliseconds, and converted to jiffies when they are stored.
2242 *
2243 * This routine will ensure the values are within the range specified by
2244 * table->extra1 (min) and table->extra2 (max).
2245 *
2246 * Returns 0 on success.
2247 */
2248int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int write,
2249                                      struct file *filp,
2250                                      void __user *buffer,
2251                                      size_t *lenp, loff_t *ppos)
2252{
2253    return do_proc_doulongvec_minmax(table, write, filp, buffer,
2254                                     lenp, ppos, HZ, 1000l);
2255}
2256
2257
2258static int do_proc_dointvec_jiffies_conv(int *negp, unsigned long *lvalp,
2259                                         int *valp,
2260                                         int write, void *data)
2261{
2262        if (write) {
2263                if (*lvalp > LONG_MAX / HZ)
2264                        return 1;
2265                *valp = *negp ? -(*lvalp*HZ) : (*lvalp*HZ);
2266        } else {
2267                int val = *valp;
2268                unsigned long lval;
2269                if (val < 0) {
2270                        *negp = -1;
2271                        lval = (unsigned long)-val;
2272                } else {
2273                        *negp = 0;
2274                        lval = (unsigned long)val;
2275                }
2276                *lvalp = lval / HZ;
2277        }
2278        return 0;
2279}
2280
2281static int do_proc_dointvec_userhz_jiffies_conv(int *negp, unsigned long *lvalp,
2282                                                int *valp,
2283                                                int write, void *data)
2284{
2285        if (write) {
2286                if (USER_HZ < HZ && *lvalp > (LONG_MAX / HZ) * USER_HZ)
2287                        return 1;
2288                *valp = clock_t_to_jiffies(*negp ? -*lvalp : *lvalp);
2289        } else {
2290                int val = *valp;
2291                unsigned long lval;
2292                if (val < 0) {
2293                        *negp = -1;
2294                        lval = (unsigned long)-val;
2295                } else {
2296                        *negp = 0;
2297                        lval = (unsigned long)val;
2298                }
2299                *lvalp = jiffies_to_clock_t(lval);
2300        }
2301        return 0;
2302}
2303
2304static int do_proc_dointvec_ms_jiffies_conv(int *negp, unsigned long *lvalp,
2305                                            int *valp,
2306                                            int write, void *data)
2307{
2308        if (write) {
2309                *valp = msecs_to_jiffies(*negp ? -*lvalp : *lvalp);
2310        } else {
2311                int val = *valp;
2312                unsigned long lval;
2313                if (val < 0) {
2314                        *negp = -1;
2315                        lval = (unsigned long)-val;
2316                } else {
2317                        *negp = 0;
2318                        lval = (unsigned long)val;
2319                }
2320                *lvalp = jiffies_to_msecs(lval);
2321        }
2322        return 0;
2323}
2324
2325/**
2326 * proc_dointvec_jiffies - read a vector of integers as seconds
2327 * @table: the sysctl table
2328 * @write: %TRUE if this is a write to the sysctl file
2329 * @filp: the file structure
2330 * @buffer: the user buffer
2331 * @lenp: the size of the user buffer
2332 * @ppos: file position
2333 *
2334 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2335 * values from/to the user buffer, treated as an ASCII string. 
2336 * The values read are assumed to be in seconds, and are converted into
2337 * jiffies.
2338 *
2339 * Returns 0 on success.
2340 */
2341int proc_dointvec_jiffies(ctl_table *table, int write, struct file *filp,
2342                          void __user *buffer, size_t *lenp, loff_t *ppos)
2343{
2344    return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
2345                            do_proc_dointvec_jiffies_conv,NULL);
2346}
2347
2348/**
2349 * proc_dointvec_userhz_jiffies - read a vector of integers as 1/USER_HZ seconds
2350 * @table: the sysctl table
2351 * @write: %TRUE if this is a write to the sysctl file
2352 * @filp: the file structure
2353 * @buffer: the user buffer
2354 * @lenp: the size of the user buffer
2355 * @ppos: pointer to the file position
2356 *
2357 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2358 * values from/to the user buffer, treated as an ASCII string. 
2359 * The values read are assumed to be in 1/USER_HZ seconds, and 
2360 * are converted into jiffies.
2361 *
2362 * Returns 0 on success.
2363 */
2364int proc_dointvec_userhz_jiffies(ctl_table *table, int write, struct file *filp,
2365                                 void __user *buffer, size_t *lenp, loff_t *ppos)
2366{
2367    return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
2368                            do_proc_dointvec_userhz_jiffies_conv,NULL);
2369}
2370
2371/**
2372 * proc_dointvec_ms_jiffies - read a vector of integers as 1 milliseconds
2373 * @table: the sysctl table
2374 * @write: %TRUE if this is a write to the sysctl file
2375 * @filp: the file structure
2376 * @buffer: the user buffer
2377 * @lenp: the size of the user buffer
2378 * @ppos: file position
2379 * @ppos: the current position in the file
2380 *
2381 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2382 * values from/to the user buffer, treated as an ASCII string. 
2383 * The values read are assumed to be in 1/1000 seconds, and 
2384 * are converted into jiffies.
2385 *
2386 * Returns 0 on success.
2387 */
2388int proc_dointvec_ms_jiffies(ctl_table *table, int write, struct file *filp,
2389                             void __user *buffer, size_t *lenp, loff_t *ppos)
2390{
2391        return do_proc_dointvec(table, write, filp, buffer, lenp, ppos,
2392                                do_proc_dointvec_ms_jiffies_conv, NULL);
2393}
2394
2395#ifdef CONFIG_SYSVIPC
2396static int proc_do_ipc_string(ctl_table *table, int write, struct file *filp,
2397                void __user *buffer, size_t *lenp, loff_t *ppos)
2398{
2399        void *data;
2400        struct ipc_namespace *ns;
2401
2402        ns = current->nsproxy->ipc_ns;
2403
2404        switch (table->ctl_name) {
2405        case KERN_SHMMAX:
2406                data = &ns->shm_ctlmax;
2407                goto proc_minmax;
2408        case KERN_SHMALL:
2409                data = &ns->shm_ctlall;
2410                goto proc_minmax;
2411        case KERN_SHMMNI:
2412                data = &ns->shm_ctlmni;
2413                break;
2414        case KERN_MSGMAX:
2415                data = &ns->msg_ctlmax;
2416                break;
2417        case KERN_MSGMNI:
2418                data = &ns->msg_ctlmni;
2419                break;
2420        case KERN_MSGMNB:
2421                data = &ns->msg_ctlmnb;
2422                break;
2423        case KERN_SEM:
2424                data = &ns->sem_ctls;
2425                break;
2426        default:
2427                return -EINVAL;
2428        }
2429
2430        return __do_proc_dointvec(data, table, write, filp, buffer,
2431                        lenp, ppos, NULL, NULL);
2432proc_minmax:
2433        return __do_proc_doulongvec_minmax(data, table, write, filp, buffer,
2434                        lenp, ppos, 1l, 1l);
2435}
2436#endif
2437
2438static int proc_do_cad_pid(ctl_table *table, int write, struct file *filp,
2439                           void __user *buffer, size_t *lenp, loff_t *ppos)
2440{
2441        struct pid *new_pid;
2442        pid_t tmp;
2443        int r;
2444
2445        tmp = pid_nr(cad_pid);
2446
2447        r = __do_proc_dointvec(&tmp, table, write, filp, buffer,
2448                               lenp, ppos, NULL, NULL);
2449        if (r || !write)
2450                return r;
2451
2452        new_pid = find_get_pid(tmp);
2453        if (!new_pid)
2454                return -ESRCH;
2455
2456        put_pid(xchg(&cad_pid, new_pid));
2457        return 0;
2458}
2459
2460#else /* CONFIG_PROC_FS */
2461
2462int proc_dostring(ctl_table *table, int write, struct file *filp,
2463                  void __user *buffer, size_t *lenp, loff_t *ppos)
2464{
2465        return -ENOSYS;
2466}
2467
2468static int proc_do_uts_string(ctl_table *table, int write, struct file *filp,
2469                void __user *buffer, size_t *lenp, loff_t *ppos)
2470{
2471        return -ENOSYS;
2472}
2473
2474#ifdef CONFIG_SYSVIPC
2475static int proc_do_ipc_string(ctl_table *table, int write, struct file *filp,
2476                void __user *buffer, size_t *lenp, loff_t *ppos)
2477{
2478        return -ENOSYS;
2479}
2480#endif
2481
2482int proc_dointvec(ctl_table *table, int write, struct file *filp,
2483                  void __user *buffer, size_t *lenp, loff_t *ppos)
2484{
2485        return -ENOSYS;
2486}
2487
2488int proc_dointvec_bset(ctl_table *table, int write, struct file *filp,
2489                        void __user *buffer, size_t *lenp, loff_t *ppos)
2490{
2491        return -ENOSYS;
2492}
2493
2494int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp,
2495                    void __user *buffer, size_t *lenp, loff_t *ppos)
2496{
2497        return -ENOSYS;
2498}
2499
2500int proc_dointvec_jiffies(ctl_table *table, int write, struct file *filp,
2501                    void __user *buffer, size_t *lenp, loff_t *ppos)
2502{
2503        return -ENOSYS;
2504}
2505
2506int proc_dointvec_userhz_jiffies(ctl_table *table, int write, struct file *filp,
2507                    void __user *buffer, size_t *lenp, loff_t *ppos)
2508{
2509        return -ENOSYS;
2510}
2511
2512int proc_dointvec_ms_jiffies(ctl_table *table, int write, struct file *filp,
2513                             void __user *buffer, size_t *lenp, loff_t *ppos)
2514{
2515        return -ENOSYS;
2516}
2517
2518int proc_doulongvec_minmax(ctl_table *table, int write, struct file *filp,
2519                    void __user *buffer, size_t *lenp, loff_t *ppos)
2520{
2521        return -ENOSYS;
2522}
2523
2524int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int write,
2525                                      struct file *filp,
2526                                      void __user *buffer,
2527                                      size_t *lenp, loff_t *ppos)
2528{
2529    return -ENOSYS;
2530}
2531
2532
2533#endif /* CONFIG_PROC_FS */
2534
2535
2536#ifdef CONFIG_SYSCTL_SYSCALL
2537/*
2538 * General sysctl support routines 
2539 */
2540
2541/* The generic string strategy routine: */
2542int sysctl_string(ctl_table *table, int __user *name, int nlen,
2543                  void __user *oldval, size_t __user *oldlenp,
2544                  void __user *newval, size_t newlen, void **context)
2545{
2546        if (!table->data || !table->maxlen) 
2547                return -ENOTDIR;
2548        
2549        if (oldval && oldlenp) {
2550                size_t bufsize;
2551                if (get_user(bufsize, oldlenp))
2552                        return -EFAULT;
2553                if (bufsize) {
2554                        size_t len = strlen(table->data), copied;
2555
2556                        /* This shouldn't trigger for a well-formed sysctl */
2557                        if (len > table->maxlen)
2558                                len = table->maxlen;
2559
2560                        /* Copy up to a max of bufsize-1 bytes of the string */
2561                        copied = (len >= bufsize) ? bufsize - 1 : len;
2562
2563                        if (copy_to_user(oldval, table->data, copied) ||
2564                            put_user(0, (char __user *)(oldval + copied)))
2565                                return -EFAULT;
2566                        if (put_user(len, oldlenp))
2567                                return -EFAULT;
2568                }
2569        }
2570        if (newval && newlen) {
2571                size_t len = newlen;
2572                if (len > table->maxlen)
2573                        len = table->maxlen;
2574                if(copy_from_user(table->data, newval, len))
2575                        return -EFAULT;
2576                if (len == table->maxlen)
2577                        len--;
2578                ((char *) table->data)[len] = 0;
2579        }
2580        return 1;
2581}
2582
2583/*
2584 * This function makes sure that all of the integers in the vector
2585 * are between the minimum and maximum values given in the arrays
2586 * table->extra1 and table->extra2, respectively.
2587 */
2588int sysctl_intvec(ctl_table *table, int __user *name, int nlen,
2589                void __user *oldval, size_t __user *oldlenp,
2590                void __user *newval, size_t newlen, void **context)
2591{
2592
2593        if (newval && newlen) {
2594                int __user *vec = (int __user *) newval;
2595                int *min = (int *) table->extra1;
2596                int *max = (int *) table->extra2;
2597                size_t length;
2598                int i;
2599
2600                if (newlen % sizeof(int) != 0)
2601                        return -EINVAL;
2602
2603                if (!table->extra1 && !table->extra2)
2604                        return 0;
2605
2606                if (newlen > table->maxlen)
2607                        newlen = table->maxlen;
2608                length = newlen / sizeof(int);
2609
2610                for (i = 0; i < length; i++) {
2611                        int value;
2612                        if (get_user(value, vec + i))
2613                                return -EFAULT;
2614                        if (min && value < min[i])
2615                                return -EINVAL;
2616                        if (max && value > max[i])
2617                                return -EINVAL;
2618                }
2619        }
2620        return 0;
2621}
2622
2623/* Strategy function to convert jiffies to seconds */ 
2624int sysctl_jiffies(ctl_table *table, int __user *name, int nlen,
2625                void __user *oldval, size_t __user *oldlenp,
2626                void __user *newval, size_t newlen, void **context)
2627{
2628        if (oldval) {
2629                size_t olen;
2630                if (oldlenp) { 
2631                        if (get_user(olen, oldlenp))
2632                                return -EFAULT;
2633                        if (olen!=sizeof(int))
2634                                return -EINVAL; 
2635                }
2636                if (put_user(*(int *)(table->data)/HZ, (int __user *)oldval) ||
2637                    (oldlenp && put_user(sizeof(int),oldlenp)))
2638                        return -EFAULT;
2639        }
2640        if (newval && newlen) { 
2641                int new;
2642                if (newlen != sizeof(int))
2643                        return -EINVAL; 
2644                if (get_user(new, (int __user *)newval))
2645                        return -EFAULT;
2646                *(int *)(table->data) = new*HZ; 
2647        }
2648        return 1;
2649}
2650
2651/* Strategy function to convert jiffies to seconds */ 
2652int sysctl_ms_jiffies(ctl_table *table, int __user *name, int nlen,
2653                void __user *oldval, size_t __user *oldlenp,
2654                void __user *newval, size_t newlen, void **context)
2655{
2656        if (oldval) {
2657                size_t olen;
2658                if (oldlenp) { 
2659                        if (get_user(olen, oldlenp))
2660                                return -EFAULT;
2661                        if (olen!=sizeof(int))
2662                                return -EINVAL; 
2663                }
2664                if (put_user(jiffies_to_msecs(*(int *)(table->data)), (int __user *)oldval) ||
2665                    (oldlenp && put_user(sizeof(int),oldlenp)))
2666                        return -EFAULT;
2667        }
2668        if (newval && newlen) { 
2669                int new;
2670                if (newlen != sizeof(int))
2671                        return -EINVAL; 
2672                if (get_user(new, (int __user *)newval))
2673                        return -EFAULT;
2674                *(int *)(table->data) = msecs_to_jiffies(new);
2675        }
2676        return 1;
2677}
2678
2679#else /* CONFIG_SYSCTL_SYSCALL */
2680
2681
2682asmlinkage long sys_sysctl(struct __sysctl_args __user *args)
2683{
2684        static int msg_count;
2685        struct __sysctl_args tmp;
2686        int name[CTL_MAXNAME];
2687        int i;
2688
2689        /* Read in the sysctl name for better debug message logging */
2690        if (copy_from_user(&tmp, args, sizeof(tmp)))
2691                return -EFAULT;
2692        if (tmp.nlen <= 0 || tmp.nlen >= CTL_MAXNAME)
2693                return -ENOTDIR;
2694        for (i = 0; i < tmp.nlen; i++)
2695                if (get_user(name[i], tmp.name + i))
2696                        return -EFAULT;
2697
2698        /* Ignore accesses to kernel.version */
2699        if ((tmp.nlen == 2) && (name[0] == CTL_KERN) && (name[1] == KERN_VERSION))
2700                goto out;
2701
2702        if (msg_count < 5) {
2703                msg_count++;
2704                printk(KERN_INFO
2705                        "warning: process `%s' used the removed sysctl "
2706                        "system call with ", current->comm);
2707                for (i = 0; i < tmp.nlen; i++)
2708                        printk("%d.", name[i]);
2709                printk("\n");
2710        }
2711out:
2712        return -ENOSYS;
2713}
2714
2715int sysctl_string(ctl_table *table, int __user *name, int nlen,
2716                  void __user *oldval, size_t __user *oldlenp,
2717                  void __user *newval, size_t newlen, void **context)
2718{
2719        return -ENOSYS;
2720}
2721
2722int sysctl_intvec(ctl_table *table, int __user *name, int nlen,
2723                void __user *oldval, size_t __user *oldlenp,
2724                void __user *newval, size_t newlen, void **context)
2725{
2726        return -ENOSYS;
2727}
2728
2729int sysctl_jiffies(ctl_table *table, int __user *name, int nlen,
2730                void __user *oldval, size_t __user *oldlenp,
2731                void __user *newval, size_t newlen, void **context)
2732{
2733        return -ENOSYS;
2734}
2735
2736int sysctl_ms_jiffies(ctl_table *table, int __user *name, int nlen,
2737                void __user *oldval, size_t __user *oldlenp,
2738                void __user *newval, size_t newlen, void **context)
2739{
2740        return -ENOSYS;
2741}
2742
2743#endif /* CONFIG_SYSCTL_SYSCALL */
2744
2745/*
2746 * No sense putting this after each symbol definition, twice,
2747 * exception granted :-)
2748 */
2749EXPORT_SYMBOL(proc_dointvec);
2750EXPORT_SYMBOL(proc_dointvec_jiffies);
2751EXPORT_SYMBOL(proc_dointvec_minmax);
2752EXPORT_SYMBOL(proc_dointvec_userhz_jiffies);
2753EXPORT_SYMBOL(proc_dointvec_ms_jiffies);
2754EXPORT_SYMBOL(proc_dostring);
2755EXPORT_SYMBOL(proc_doulongvec_minmax);
2756EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax);
2757EXPORT_SYMBOL(register_sysctl_table);
2758EXPORT_SYMBOL(sysctl_intvec);
2759EXPORT_SYMBOL(sysctl_jiffies);
2760EXPORT_SYMBOL(sysctl_ms_jiffies);
2761EXPORT_SYMBOL(sysctl_string);
2762EXPORT_SYMBOL(unregister_sysctl_table);
2763