linux/fs/proc/base.c
<<
>>
Prefs
   1/*
   2 *  linux/fs/proc/base.c
   3 *
   4 *  Copyright (C) 1991, 1992 Linus Torvalds
   5 *
   6 *  proc base directory handling functions
   7 *
   8 *  1999, Al Viro. Rewritten. Now it covers the whole per-process part.
   9 *  Instead of using magical inumbers to determine the kind of object
  10 *  we allocate and fill in-core inodes upon lookup. They don't even
  11 *  go into icache. We cache the reference to task_struct upon lookup too.
  12 *  Eventually it should become a filesystem in its own. We don't use the
  13 *  rest of procfs anymore.
  14 *
  15 *
  16 *  Changelog:
  17 *  17-Jan-2005
  18 *  Allan Bezerra
  19 *  Bruna Moreira <bruna.moreira@indt.org.br>
  20 *  Edjard Mota <edjard.mota@indt.org.br>
  21 *  Ilias Biris <ilias.biris@indt.org.br>
  22 *  Mauricio Lin <mauricio.lin@indt.org.br>
  23 *
  24 *  Embedded Linux Lab - 10LE Instituto Nokia de Tecnologia - INdT
  25 *
  26 *  A new process specific entry (smaps) included in /proc. It shows the
  27 *  size of rss for each memory area. The maps entry lacks information
  28 *  about physical memory size (rss) for each mapped file, i.e.,
  29 *  rss information for executables and library files.
  30 *  This additional information is useful for any tools that need to know
  31 *  about physical memory consumption for a process specific library.
  32 *
  33 *  Changelog:
  34 *  21-Feb-2005
  35 *  Embedded Linux Lab - 10LE Instituto Nokia de Tecnologia - INdT
  36 *  Pud inclusion in the page table walking.
  37 *
  38 *  ChangeLog:
  39 *  10-Mar-2005
  40 *  10LE Instituto Nokia de Tecnologia - INdT:
  41 *  A better way to walks through the page table as suggested by Hugh Dickins.
  42 *
  43 *  Simo Piiroinen <simo.piiroinen@nokia.com>:
  44 *  Smaps information related to shared, private, clean and dirty pages.
  45 *
  46 *  Paul Mundt <paul.mundt@nokia.com>:
  47 *  Overall revision about smaps.
  48 */
  49
  50#include <asm/uaccess.h>
  51
  52#include <linux/errno.h>
  53#include <linux/time.h>
  54#include <linux/proc_fs.h>
  55#include <linux/stat.h>
  56#include <linux/task_io_accounting_ops.h>
  57#include <linux/init.h>
  58#include <linux/capability.h>
  59#include <linux/file.h>
  60#include <linux/fdtable.h>
  61#include <linux/string.h>
  62#include <linux/seq_file.h>
  63#include <linux/namei.h>
  64#include <linux/mnt_namespace.h>
  65#include <linux/mm.h>
  66#include <linux/swap.h>
  67#include <linux/rcupdate.h>
  68#include <linux/kallsyms.h>
  69#include <linux/stacktrace.h>
  70#include <linux/resource.h>
  71#include <linux/module.h>
  72#include <linux/mount.h>
  73#include <linux/security.h>
  74#include <linux/ptrace.h>
  75#include <linux/tracehook.h>
  76#include <linux/cgroup.h>
  77#include <linux/cpuset.h>
  78#include <linux/audit.h>
  79#include <linux/poll.h>
  80#include <linux/nsproxy.h>
  81#include <linux/oom.h>
  82#include <linux/elf.h>
  83#include <linux/pid_namespace.h>
  84#include <linux/fs_struct.h>
  85#include <linux/slab.h>
  86#include "internal.h"
  87
  88/* NOTE:
  89 *      Implementing inode permission operations in /proc is almost
  90 *      certainly an error.  Permission checks need to happen during
  91 *      each system call not at open time.  The reason is that most of
  92 *      what we wish to check for permissions in /proc varies at runtime.
  93 *
  94 *      The classic example of a problem is opening file descriptors
  95 *      in /proc for a task before it execs a suid executable.
  96 */
  97
  98struct pid_entry {
  99        char *name;
 100        int len;
 101        mode_t mode;
 102        const struct inode_operations *iop;
 103        const struct file_operations *fop;
 104        union proc_op op;
 105};
 106
 107#define NOD(NAME, MODE, IOP, FOP, OP) {                 \
 108        .name = (NAME),                                 \
 109        .len  = sizeof(NAME) - 1,                       \
 110        .mode = MODE,                                   \
 111        .iop  = IOP,                                    \
 112        .fop  = FOP,                                    \
 113        .op   = OP,                                     \
 114}
 115
 116#define DIR(NAME, MODE, iops, fops)     \
 117        NOD(NAME, (S_IFDIR|(MODE)), &iops, &fops, {} )
 118#define LNK(NAME, get_link)                                     \
 119        NOD(NAME, (S_IFLNK|S_IRWXUGO),                          \
 120                &proc_pid_link_inode_operations, NULL,          \
 121                { .proc_get_link = get_link } )
 122#define REG(NAME, MODE, fops)                           \
 123        NOD(NAME, (S_IFREG|(MODE)), NULL, &fops, {})
 124#define INF(NAME, MODE, read)                           \
 125        NOD(NAME, (S_IFREG|(MODE)),                     \
 126                NULL, &proc_info_file_operations,       \
 127                { .proc_read = read } )
 128#define ONE(NAME, MODE, show)                           \
 129        NOD(NAME, (S_IFREG|(MODE)),                     \
 130                NULL, &proc_single_file_operations,     \
 131                { .proc_show = show } )
 132
 133/*
 134 * Count the number of hardlinks for the pid_entry table, excluding the .
 135 * and .. links.
 136 */
 137static unsigned int pid_entry_count_dirs(const struct pid_entry *entries,
 138        unsigned int n)
 139{
 140        unsigned int i;
 141        unsigned int count;
 142
 143        count = 0;
 144        for (i = 0; i < n; ++i) {
 145                if (S_ISDIR(entries[i].mode))
 146                        ++count;
 147        }
 148
 149        return count;
 150}
 151
 152static int get_task_root(struct task_struct *task, struct path *root)
 153{
 154        int result = -ENOENT;
 155
 156        task_lock(task);
 157        if (task->fs) {
 158                get_fs_root(task->fs, root);
 159                result = 0;
 160        }
 161        task_unlock(task);
 162        return result;
 163}
 164
 165static int proc_cwd_link(struct inode *inode, struct path *path)
 166{
 167        struct task_struct *task = get_proc_task(inode);
 168        int result = -ENOENT;
 169
 170        if (task) {
 171                task_lock(task);
 172                if (task->fs) {
 173                        get_fs_pwd(task->fs, path);
 174                        result = 0;
 175                }
 176                task_unlock(task);
 177                put_task_struct(task);
 178        }
 179        return result;
 180}
 181
 182static int proc_root_link(struct inode *inode, struct path *path)
 183{
 184        struct task_struct *task = get_proc_task(inode);
 185        int result = -ENOENT;
 186
 187        if (task) {
 188                result = get_task_root(task, path);
 189                put_task_struct(task);
 190        }
 191        return result;
 192}
 193
 194/*
 195 * Return zero if current may access user memory in @task, -error if not.
 196 */
 197static int check_mem_permission(struct task_struct *task)
 198{
 199        /*
 200         * A task can always look at itself, in case it chooses
 201         * to use system calls instead of load instructions.
 202         */
 203        if (task == current)
 204                return 0;
 205
 206        /*
 207         * If current is actively ptrace'ing, and would also be
 208         * permitted to freshly attach with ptrace now, permit it.
 209         */
 210        if (task_is_stopped_or_traced(task)) {
 211                int match;
 212                rcu_read_lock();
 213                match = (tracehook_tracer_task(task) == current);
 214                rcu_read_unlock();
 215                if (match && ptrace_may_access(task, PTRACE_MODE_ATTACH))
 216                        return 0;
 217        }
 218
 219        /*
 220         * Noone else is allowed.
 221         */
 222        return -EPERM;
 223}
 224
 225struct mm_struct *mm_for_maps(struct task_struct *task)
 226{
 227        struct mm_struct *mm;
 228
 229        if (mutex_lock_killable(&task->cred_guard_mutex))
 230                return NULL;
 231
 232        mm = get_task_mm(task);
 233        if (mm && mm != current->mm &&
 234                        !ptrace_may_access(task, PTRACE_MODE_READ)) {
 235                mmput(mm);
 236                mm = NULL;
 237        }
 238        mutex_unlock(&task->cred_guard_mutex);
 239
 240        return mm;
 241}
 242
 243static int proc_pid_cmdline(struct task_struct *task, char * buffer)
 244{
 245        int res = 0;
 246        unsigned int len;
 247        struct mm_struct *mm = get_task_mm(task);
 248        if (!mm)
 249                goto out;
 250        if (!mm->arg_end)
 251                goto out_mm;    /* Shh! No looking before we're done */
 252
 253        len = mm->arg_end - mm->arg_start;
 254 
 255        if (len > PAGE_SIZE)
 256                len = PAGE_SIZE;
 257 
 258        res = access_process_vm(task, mm->arg_start, buffer, len, 0);
 259
 260        // If the nul at the end of args has been overwritten, then
 261        // assume application is using setproctitle(3).
 262        if (res > 0 && buffer[res-1] != '\0' && len < PAGE_SIZE) {
 263                len = strnlen(buffer, res);
 264                if (len < res) {
 265                    res = len;
 266                } else {
 267                        len = mm->env_end - mm->env_start;
 268                        if (len > PAGE_SIZE - res)
 269                                len = PAGE_SIZE - res;
 270                        res += access_process_vm(task, mm->env_start, buffer+res, len, 0);
 271                        res = strnlen(buffer, res);
 272                }
 273        }
 274out_mm:
 275        mmput(mm);
 276out:
 277        return res;
 278}
 279
 280static int proc_pid_auxv(struct task_struct *task, char *buffer)
 281{
 282        int res = 0;
 283        struct mm_struct *mm = get_task_mm(task);
 284        if (mm) {
 285                unsigned int nwords = 0;
 286                do {
 287                        nwords += 2;
 288                } while (mm->saved_auxv[nwords - 2] != 0); /* AT_NULL */
 289                res = nwords * sizeof(mm->saved_auxv[0]);
 290                if (res > PAGE_SIZE)
 291                        res = PAGE_SIZE;
 292                memcpy(buffer, mm->saved_auxv, res);
 293                mmput(mm);
 294        }
 295        return res;
 296}
 297
 298
 299#ifdef CONFIG_KALLSYMS
 300/*
 301 * Provides a wchan file via kallsyms in a proper one-value-per-file format.
 302 * Returns the resolved symbol.  If that fails, simply return the address.
 303 */
 304static int proc_pid_wchan(struct task_struct *task, char *buffer)
 305{
 306        unsigned long wchan;
 307        char symname[KSYM_NAME_LEN];
 308
 309        wchan = get_wchan(task);
 310
 311        if (lookup_symbol_name(wchan, symname) < 0)
 312                if (!ptrace_may_access(task, PTRACE_MODE_READ))
 313                        return 0;
 314                else
 315                        return sprintf(buffer, "%lu", wchan);
 316        else
 317                return sprintf(buffer, "%s", symname);
 318}
 319#endif /* CONFIG_KALLSYMS */
 320
 321#ifdef CONFIG_STACKTRACE
 322
 323#define MAX_STACK_TRACE_DEPTH   64
 324
 325static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns,
 326                          struct pid *pid, struct task_struct *task)
 327{
 328        struct stack_trace trace;
 329        unsigned long *entries;
 330        int i;
 331
 332        entries = kmalloc(MAX_STACK_TRACE_DEPTH * sizeof(*entries), GFP_KERNEL);
 333        if (!entries)
 334                return -ENOMEM;
 335
 336        trace.nr_entries        = 0;
 337        trace.max_entries       = MAX_STACK_TRACE_DEPTH;
 338        trace.entries           = entries;
 339        trace.skip              = 0;
 340        save_stack_trace_tsk(task, &trace);
 341
 342        for (i = 0; i < trace.nr_entries; i++) {
 343                seq_printf(m, "[<%p>] %pS\n",
 344                           (void *)entries[i], (void *)entries[i]);
 345        }
 346        kfree(entries);
 347
 348        return 0;
 349}
 350#endif
 351
 352#ifdef CONFIG_SCHEDSTATS
 353/*
 354 * Provides /proc/PID/schedstat
 355 */
 356static int proc_pid_schedstat(struct task_struct *task, char *buffer)
 357{
 358        return sprintf(buffer, "%llu %llu %lu\n",
 359                        (unsigned long long)task->se.sum_exec_runtime,
 360                        (unsigned long long)task->sched_info.run_delay,
 361                        task->sched_info.pcount);
 362}
 363#endif
 364
 365#ifdef CONFIG_LATENCYTOP
 366static int lstats_show_proc(struct seq_file *m, void *v)
 367{
 368        int i;
 369        struct inode *inode = m->private;
 370        struct task_struct *task = get_proc_task(inode);
 371
 372        if (!task)
 373                return -ESRCH;
 374        seq_puts(m, "Latency Top version : v0.1\n");
 375        for (i = 0; i < 32; i++) {
 376                if (task->latency_record[i].backtrace[0]) {
 377                        int q;
 378                        seq_printf(m, "%i %li %li ",
 379                                task->latency_record[i].count,
 380                                task->latency_record[i].time,
 381                                task->latency_record[i].max);
 382                        for (q = 0; q < LT_BACKTRACEDEPTH; q++) {
 383                                char sym[KSYM_SYMBOL_LEN];
 384                                char *c;
 385                                if (!task->latency_record[i].backtrace[q])
 386                                        break;
 387                                if (task->latency_record[i].backtrace[q] == ULONG_MAX)
 388                                        break;
 389                                sprint_symbol(sym, task->latency_record[i].backtrace[q]);
 390                                c = strchr(sym, '+');
 391                                if (c)
 392                                        *c = 0;
 393                                seq_printf(m, "%s ", sym);
 394                        }
 395                        seq_printf(m, "\n");
 396                }
 397
 398        }
 399        put_task_struct(task);
 400        return 0;
 401}
 402
 403static int lstats_open(struct inode *inode, struct file *file)
 404{
 405        return single_open(file, lstats_show_proc, inode);
 406}
 407
 408static ssize_t lstats_write(struct file *file, const char __user *buf,
 409                            size_t count, loff_t *offs)
 410{
 411        struct task_struct *task = get_proc_task(file->f_dentry->d_inode);
 412
 413        if (!task)
 414                return -ESRCH;
 415        clear_all_latency_tracing(task);
 416        put_task_struct(task);
 417
 418        return count;
 419}
 420
 421static const struct file_operations proc_lstats_operations = {
 422        .open           = lstats_open,
 423        .read           = seq_read,
 424        .write          = lstats_write,
 425        .llseek         = seq_lseek,
 426        .release        = single_release,
 427};
 428
 429#endif
 430
 431static int proc_oom_score(struct task_struct *task, char *buffer)
 432{
 433        unsigned long points = 0;
 434
 435        read_lock(&tasklist_lock);
 436        if (pid_alive(task))
 437                points = oom_badness(task, NULL, NULL,
 438                                        totalram_pages + total_swap_pages);
 439        read_unlock(&tasklist_lock);
 440        return sprintf(buffer, "%lu\n", points);
 441}
 442
 443struct limit_names {
 444        char *name;
 445        char *unit;
 446};
 447
 448static const struct limit_names lnames[RLIM_NLIMITS] = {
 449        [RLIMIT_CPU] = {"Max cpu time", "seconds"},
 450        [RLIMIT_FSIZE] = {"Max file size", "bytes"},
 451        [RLIMIT_DATA] = {"Max data size", "bytes"},
 452        [RLIMIT_STACK] = {"Max stack size", "bytes"},
 453        [RLIMIT_CORE] = {"Max core file size", "bytes"},
 454        [RLIMIT_RSS] = {"Max resident set", "bytes"},
 455        [RLIMIT_NPROC] = {"Max processes", "processes"},
 456        [RLIMIT_NOFILE] = {"Max open files", "files"},
 457        [RLIMIT_MEMLOCK] = {"Max locked memory", "bytes"},
 458        [RLIMIT_AS] = {"Max address space", "bytes"},
 459        [RLIMIT_LOCKS] = {"Max file locks", "locks"},
 460        [RLIMIT_SIGPENDING] = {"Max pending signals", "signals"},
 461        [RLIMIT_MSGQUEUE] = {"Max msgqueue size", "bytes"},
 462        [RLIMIT_NICE] = {"Max nice priority", NULL},
 463        [RLIMIT_RTPRIO] = {"Max realtime priority", NULL},
 464        [RLIMIT_RTTIME] = {"Max realtime timeout", "us"},
 465};
 466
 467/* Display limits for a process */
 468static int proc_pid_limits(struct task_struct *task, char *buffer)
 469{
 470        unsigned int i;
 471        int count = 0;
 472        unsigned long flags;
 473        char *bufptr = buffer;
 474
 475        struct rlimit rlim[RLIM_NLIMITS];
 476
 477        if (!lock_task_sighand(task, &flags))
 478                return 0;
 479        memcpy(rlim, task->signal->rlim, sizeof(struct rlimit) * RLIM_NLIMITS);
 480        unlock_task_sighand(task, &flags);
 481
 482        /*
 483         * print the file header
 484         */
 485        count += sprintf(&bufptr[count], "%-25s %-20s %-20s %-10s\n",
 486                        "Limit", "Soft Limit", "Hard Limit", "Units");
 487
 488        for (i = 0; i < RLIM_NLIMITS; i++) {
 489                if (rlim[i].rlim_cur == RLIM_INFINITY)
 490                        count += sprintf(&bufptr[count], "%-25s %-20s ",
 491                                         lnames[i].name, "unlimited");
 492                else
 493                        count += sprintf(&bufptr[count], "%-25s %-20lu ",
 494                                         lnames[i].name, rlim[i].rlim_cur);
 495
 496                if (rlim[i].rlim_max == RLIM_INFINITY)
 497                        count += sprintf(&bufptr[count], "%-20s ", "unlimited");
 498                else
 499                        count += sprintf(&bufptr[count], "%-20lu ",
 500                                         rlim[i].rlim_max);
 501
 502                if (lnames[i].unit)
 503                        count += sprintf(&bufptr[count], "%-10s\n",
 504                                         lnames[i].unit);
 505                else
 506                        count += sprintf(&bufptr[count], "\n");
 507        }
 508
 509        return count;
 510}
 511
 512#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
 513static int proc_pid_syscall(struct task_struct *task, char *buffer)
 514{
 515        long nr;
 516        unsigned long args[6], sp, pc;
 517
 518        if (task_current_syscall(task, &nr, args, 6, &sp, &pc))
 519                return sprintf(buffer, "running\n");
 520
 521        if (nr < 0)
 522                return sprintf(buffer, "%ld 0x%lx 0x%lx\n", nr, sp, pc);
 523
 524        return sprintf(buffer,
 525                       "%ld 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n",
 526                       nr,
 527                       args[0], args[1], args[2], args[3], args[4], args[5],
 528                       sp, pc);
 529}
 530#endif /* CONFIG_HAVE_ARCH_TRACEHOOK */
 531
 532/************************************************************************/
 533/*                       Here the fs part begins                        */
 534/************************************************************************/
 535
 536/* permission checks */
 537static int proc_fd_access_allowed(struct inode *inode)
 538{
 539        struct task_struct *task;
 540        int allowed = 0;
 541        /* Allow access to a task's file descriptors if it is us or we
 542         * may use ptrace attach to the process and find out that
 543         * information.
 544         */
 545        task = get_proc_task(inode);
 546        if (task) {
 547                allowed = ptrace_may_access(task, PTRACE_MODE_READ);
 548                put_task_struct(task);
 549        }
 550        return allowed;
 551}
 552
 553static int proc_setattr(struct dentry *dentry, struct iattr *attr)
 554{
 555        int error;
 556        struct inode *inode = dentry->d_inode;
 557
 558        if (attr->ia_valid & ATTR_MODE)
 559                return -EPERM;
 560
 561        error = inode_change_ok(inode, attr);
 562        if (error)
 563                return error;
 564
 565        if ((attr->ia_valid & ATTR_SIZE) &&
 566            attr->ia_size != i_size_read(inode)) {
 567                error = vmtruncate(inode, attr->ia_size);
 568                if (error)
 569                        return error;
 570        }
 571
 572        setattr_copy(inode, attr);
 573        mark_inode_dirty(inode);
 574        return 0;
 575}
 576
 577static const struct inode_operations proc_def_inode_operations = {
 578        .setattr        = proc_setattr,
 579};
 580
 581static int mounts_open_common(struct inode *inode, struct file *file,
 582                              const struct seq_operations *op)
 583{
 584        struct task_struct *task = get_proc_task(inode);
 585        struct nsproxy *nsp;
 586        struct mnt_namespace *ns = NULL;
 587        struct path root;
 588        struct proc_mounts *p;
 589        int ret = -EINVAL;
 590
 591        if (task) {
 592                rcu_read_lock();
 593                nsp = task_nsproxy(task);
 594                if (nsp) {
 595                        ns = nsp->mnt_ns;
 596                        if (ns)
 597                                get_mnt_ns(ns);
 598                }
 599                rcu_read_unlock();
 600                if (ns && get_task_root(task, &root) == 0)
 601                        ret = 0;
 602                put_task_struct(task);
 603        }
 604
 605        if (!ns)
 606                goto err;
 607        if (ret)
 608                goto err_put_ns;
 609
 610        ret = -ENOMEM;
 611        p = kmalloc(sizeof(struct proc_mounts), GFP_KERNEL);
 612        if (!p)
 613                goto err_put_path;
 614
 615        file->private_data = &p->m;
 616        ret = seq_open(file, op);
 617        if (ret)
 618                goto err_free;
 619
 620        p->m.private = p;
 621        p->ns = ns;
 622        p->root = root;
 623        p->event = ns->event;
 624
 625        return 0;
 626
 627 err_free:
 628        kfree(p);
 629 err_put_path:
 630        path_put(&root);
 631 err_put_ns:
 632        put_mnt_ns(ns);
 633 err:
 634        return ret;
 635}
 636
 637static int mounts_release(struct inode *inode, struct file *file)
 638{
 639        struct proc_mounts *p = file->private_data;
 640        path_put(&p->root);
 641        put_mnt_ns(p->ns);
 642        return seq_release(inode, file);
 643}
 644
 645static unsigned mounts_poll(struct file *file, poll_table *wait)
 646{
 647        struct proc_mounts *p = file->private_data;
 648        unsigned res = POLLIN | POLLRDNORM;
 649
 650        poll_wait(file, &p->ns->poll, wait);
 651        if (mnt_had_events(p))
 652                res |= POLLERR | POLLPRI;
 653
 654        return res;
 655}
 656
 657static int mounts_open(struct inode *inode, struct file *file)
 658{
 659        return mounts_open_common(inode, file, &mounts_op);
 660}
 661
 662static const struct file_operations proc_mounts_operations = {
 663        .open           = mounts_open,
 664        .read           = seq_read,
 665        .llseek         = seq_lseek,
 666        .release        = mounts_release,
 667        .poll           = mounts_poll,
 668};
 669
 670static int mountinfo_open(struct inode *inode, struct file *file)
 671{
 672        return mounts_open_common(inode, file, &mountinfo_op);
 673}
 674
 675static const struct file_operations proc_mountinfo_operations = {
 676        .open           = mountinfo_open,
 677        .read           = seq_read,
 678        .llseek         = seq_lseek,
 679        .release        = mounts_release,
 680        .poll           = mounts_poll,
 681};
 682
 683static int mountstats_open(struct inode *inode, struct file *file)
 684{
 685        return mounts_open_common(inode, file, &mountstats_op);
 686}
 687
 688static const struct file_operations proc_mountstats_operations = {
 689        .open           = mountstats_open,
 690        .read           = seq_read,
 691        .llseek         = seq_lseek,
 692        .release        = mounts_release,
 693};
 694
 695#define PROC_BLOCK_SIZE (3*1024)                /* 4K page size but our output routines use some slack for overruns */
 696
 697static ssize_t proc_info_read(struct file * file, char __user * buf,
 698                          size_t count, loff_t *ppos)
 699{
 700        struct inode * inode = file->f_path.dentry->d_inode;
 701        unsigned long page;
 702        ssize_t length;
 703        struct task_struct *task = get_proc_task(inode);
 704
 705        length = -ESRCH;
 706        if (!task)
 707                goto out_no_task;
 708
 709        if (count > PROC_BLOCK_SIZE)
 710                count = PROC_BLOCK_SIZE;
 711
 712        length = -ENOMEM;
 713        if (!(page = __get_free_page(GFP_TEMPORARY)))
 714                goto out;
 715
 716        length = PROC_I(inode)->op.proc_read(task, (char*)page);
 717
 718        if (length >= 0)
 719                length = simple_read_from_buffer(buf, count, ppos, (char *)page, length);
 720        free_page(page);
 721out:
 722        put_task_struct(task);
 723out_no_task:
 724        return length;
 725}
 726
 727static const struct file_operations proc_info_file_operations = {
 728        .read           = proc_info_read,
 729        .llseek         = generic_file_llseek,
 730};
 731
 732static int proc_single_show(struct seq_file *m, void *v)
 733{
 734        struct inode *inode = m->private;
 735        struct pid_namespace *ns;
 736        struct pid *pid;
 737        struct task_struct *task;
 738        int ret;
 739
 740        ns = inode->i_sb->s_fs_info;
 741        pid = proc_pid(inode);
 742        task = get_pid_task(pid, PIDTYPE_PID);
 743        if (!task)
 744                return -ESRCH;
 745
 746        ret = PROC_I(inode)->op.proc_show(m, ns, pid, task);
 747
 748        put_task_struct(task);
 749        return ret;
 750}
 751
 752static int proc_single_open(struct inode *inode, struct file *filp)
 753{
 754        int ret;
 755        ret = single_open(filp, proc_single_show, NULL);
 756        if (!ret) {
 757                struct seq_file *m = filp->private_data;
 758
 759                m->private = inode;
 760        }
 761        return ret;
 762}
 763
 764static const struct file_operations proc_single_file_operations = {
 765        .open           = proc_single_open,
 766        .read           = seq_read,
 767        .llseek         = seq_lseek,
 768        .release        = single_release,
 769};
 770
 771static int mem_open(struct inode* inode, struct file* file)
 772{
 773        file->private_data = (void*)((long)current->self_exec_id);
 774        return 0;
 775}
 776
 777static ssize_t mem_read(struct file * file, char __user * buf,
 778                        size_t count, loff_t *ppos)
 779{
 780        struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
 781        char *page;
 782        unsigned long src = *ppos;
 783        int ret = -ESRCH;
 784        struct mm_struct *mm;
 785
 786        if (!task)
 787                goto out_no_task;
 788
 789        if (check_mem_permission(task))
 790                goto out;
 791
 792        ret = -ENOMEM;
 793        page = (char *)__get_free_page(GFP_TEMPORARY);
 794        if (!page)
 795                goto out;
 796
 797        ret = 0;
 798 
 799        mm = get_task_mm(task);
 800        if (!mm)
 801                goto out_free;
 802
 803        ret = -EIO;
 804 
 805        if (file->private_data != (void*)((long)current->self_exec_id))
 806                goto out_put;
 807
 808        ret = 0;
 809 
 810        while (count > 0) {
 811                int this_len, retval;
 812
 813                this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count;
 814                retval = access_process_vm(task, src, page, this_len, 0);
 815                if (!retval || check_mem_permission(task)) {
 816                        if (!ret)
 817                                ret = -EIO;
 818                        break;
 819                }
 820
 821                if (copy_to_user(buf, page, retval)) {
 822                        ret = -EFAULT;
 823                        break;
 824                }
 825 
 826                ret += retval;
 827                src += retval;
 828                buf += retval;
 829                count -= retval;
 830        }
 831        *ppos = src;
 832
 833out_put:
 834        mmput(mm);
 835out_free:
 836        free_page((unsigned long) page);
 837out:
 838        put_task_struct(task);
 839out_no_task:
 840        return ret;
 841}
 842
 843#define mem_write NULL
 844
 845#ifndef mem_write
 846/* This is a security hazard */
 847static ssize_t mem_write(struct file * file, const char __user *buf,
 848                         size_t count, loff_t *ppos)
 849{
 850        int copied;
 851        char *page;
 852        struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
 853        unsigned long dst = *ppos;
 854
 855        copied = -ESRCH;
 856        if (!task)
 857                goto out_no_task;
 858
 859        if (check_mem_permission(task))
 860                goto out;
 861
 862        copied = -ENOMEM;
 863        page = (char *)__get_free_page(GFP_TEMPORARY);
 864        if (!page)
 865                goto out;
 866
 867        copied = 0;
 868        while (count > 0) {
 869                int this_len, retval;
 870
 871                this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count;
 872                if (copy_from_user(page, buf, this_len)) {
 873                        copied = -EFAULT;
 874                        break;
 875                }
 876                retval = access_process_vm(task, dst, page, this_len, 1);
 877                if (!retval) {
 878                        if (!copied)
 879                                copied = -EIO;
 880                        break;
 881                }
 882                copied += retval;
 883                buf += retval;
 884                dst += retval;
 885                count -= retval;                        
 886        }
 887        *ppos = dst;
 888        free_page((unsigned long) page);
 889out:
 890        put_task_struct(task);
 891out_no_task:
 892        return copied;
 893}
 894#endif
 895
 896loff_t mem_lseek(struct file *file, loff_t offset, int orig)
 897{
 898        switch (orig) {
 899        case 0:
 900                file->f_pos = offset;
 901                break;
 902        case 1:
 903                file->f_pos += offset;
 904                break;
 905        default:
 906                return -EINVAL;
 907        }
 908        force_successful_syscall_return();
 909        return file->f_pos;
 910}
 911
 912static const struct file_operations proc_mem_operations = {
 913        .llseek         = mem_lseek,
 914        .read           = mem_read,
 915        .write          = mem_write,
 916        .open           = mem_open,
 917};
 918
 919static ssize_t environ_read(struct file *file, char __user *buf,
 920                        size_t count, loff_t *ppos)
 921{
 922        struct task_struct *task = get_proc_task(file->f_dentry->d_inode);
 923        char *page;
 924        unsigned long src = *ppos;
 925        int ret = -ESRCH;
 926        struct mm_struct *mm;
 927
 928        if (!task)
 929                goto out_no_task;
 930
 931        if (!ptrace_may_access(task, PTRACE_MODE_READ))
 932                goto out;
 933
 934        ret = -ENOMEM;
 935        page = (char *)__get_free_page(GFP_TEMPORARY);
 936        if (!page)
 937                goto out;
 938
 939        ret = 0;
 940
 941        mm = get_task_mm(task);
 942        if (!mm)
 943                goto out_free;
 944
 945        while (count > 0) {
 946                int this_len, retval, max_len;
 947
 948                this_len = mm->env_end - (mm->env_start + src);
 949
 950                if (this_len <= 0)
 951                        break;
 952
 953                max_len = (count > PAGE_SIZE) ? PAGE_SIZE : count;
 954                this_len = (this_len > max_len) ? max_len : this_len;
 955
 956                retval = access_process_vm(task, (mm->env_start + src),
 957                        page, this_len, 0);
 958
 959                if (retval <= 0) {
 960                        ret = retval;
 961                        break;
 962                }
 963
 964                if (copy_to_user(buf, page, retval)) {
 965                        ret = -EFAULT;
 966                        break;
 967                }
 968
 969                ret += retval;
 970                src += retval;
 971                buf += retval;
 972                count -= retval;
 973        }
 974        *ppos = src;
 975
 976        mmput(mm);
 977out_free:
 978        free_page((unsigned long) page);
 979out:
 980        put_task_struct(task);
 981out_no_task:
 982        return ret;
 983}
 984
 985static const struct file_operations proc_environ_operations = {
 986        .read           = environ_read,
 987        .llseek         = generic_file_llseek,
 988};
 989
 990static ssize_t oom_adjust_read(struct file *file, char __user *buf,
 991                                size_t count, loff_t *ppos)
 992{
 993        struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
 994        char buffer[PROC_NUMBUF];
 995        size_t len;
 996        int oom_adjust = OOM_DISABLE;
 997        unsigned long flags;
 998
 999        if (!task)
1000                return -ESRCH;
1001
1002        if (lock_task_sighand(task, &flags)) {
1003                oom_adjust = task->signal->oom_adj;
1004                unlock_task_sighand(task, &flags);
1005        }
1006
1007        put_task_struct(task);
1008
1009        len = snprintf(buffer, sizeof(buffer), "%i\n", oom_adjust);
1010
1011        return simple_read_from_buffer(buf, count, ppos, buffer, len);
1012}
1013
1014static ssize_t oom_adjust_write(struct file *file, const char __user *buf,
1015                                size_t count, loff_t *ppos)
1016{
1017        struct task_struct *task;
1018        char buffer[PROC_NUMBUF];
1019        long oom_adjust;
1020        unsigned long flags;
1021        int err;
1022
1023        memset(buffer, 0, sizeof(buffer));
1024        if (count > sizeof(buffer) - 1)
1025                count = sizeof(buffer) - 1;
1026        if (copy_from_user(buffer, buf, count))
1027                return -EFAULT;
1028
1029        err = strict_strtol(strstrip(buffer), 0, &oom_adjust);
1030        if (err)
1031                return -EINVAL;
1032        if ((oom_adjust < OOM_ADJUST_MIN || oom_adjust > OOM_ADJUST_MAX) &&
1033             oom_adjust != OOM_DISABLE)
1034                return -EINVAL;
1035
1036        task = get_proc_task(file->f_path.dentry->d_inode);
1037        if (!task)
1038                return -ESRCH;
1039        if (!lock_task_sighand(task, &flags)) {
1040                put_task_struct(task);
1041                return -ESRCH;
1042        }
1043
1044        if (oom_adjust < task->signal->oom_adj && !capable(CAP_SYS_RESOURCE)) {
1045                unlock_task_sighand(task, &flags);
1046                put_task_struct(task);
1047                return -EACCES;
1048        }
1049
1050        /*
1051         * Warn that /proc/pid/oom_adj is deprecated, see
1052         * Documentation/feature-removal-schedule.txt.
1053         */
1054        printk_once(KERN_WARNING "%s (%d): /proc/%d/oom_adj is deprecated, "
1055                        "please use /proc/%d/oom_score_adj instead.\n",
1056                        current->comm, task_pid_nr(current),
1057                        task_pid_nr(task), task_pid_nr(task));
1058        task->signal->oom_adj = oom_adjust;
1059        /*
1060         * Scale /proc/pid/oom_score_adj appropriately ensuring that a maximum
1061         * value is always attainable.
1062         */
1063        if (task->signal->oom_adj == OOM_ADJUST_MAX)
1064                task->signal->oom_score_adj = OOM_SCORE_ADJ_MAX;
1065        else
1066                task->signal->oom_score_adj = (oom_adjust * OOM_SCORE_ADJ_MAX) /
1067                                                                -OOM_DISABLE;
1068        unlock_task_sighand(task, &flags);
1069        put_task_struct(task);
1070
1071        return count;
1072}
1073
1074static const struct file_operations proc_oom_adjust_operations = {
1075        .read           = oom_adjust_read,
1076        .write          = oom_adjust_write,
1077        .llseek         = generic_file_llseek,
1078};
1079
1080static ssize_t oom_score_adj_read(struct file *file, char __user *buf,
1081                                        size_t count, loff_t *ppos)
1082{
1083        struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
1084        char buffer[PROC_NUMBUF];
1085        int oom_score_adj = OOM_SCORE_ADJ_MIN;
1086        unsigned long flags;
1087        size_t len;
1088
1089        if (!task)
1090                return -ESRCH;
1091        if (lock_task_sighand(task, &flags)) {
1092                oom_score_adj = task->signal->oom_score_adj;
1093                unlock_task_sighand(task, &flags);
1094        }
1095        put_task_struct(task);
1096        len = snprintf(buffer, sizeof(buffer), "%d\n", oom_score_adj);
1097        return simple_read_from_buffer(buf, count, ppos, buffer, len);
1098}
1099
1100static ssize_t oom_score_adj_write(struct file *file, const char __user *buf,
1101                                        size_t count, loff_t *ppos)
1102{
1103        struct task_struct *task;
1104        char buffer[PROC_NUMBUF];
1105        unsigned long flags;
1106        long oom_score_adj;
1107        int err;
1108
1109        memset(buffer, 0, sizeof(buffer));
1110        if (count > sizeof(buffer) - 1)
1111                count = sizeof(buffer) - 1;
1112        if (copy_from_user(buffer, buf, count))
1113                return -EFAULT;
1114
1115        err = strict_strtol(strstrip(buffer), 0, &oom_score_adj);
1116        if (err)
1117                return -EINVAL;
1118        if (oom_score_adj < OOM_SCORE_ADJ_MIN ||
1119                        oom_score_adj > OOM_SCORE_ADJ_MAX)
1120                return -EINVAL;
1121
1122        task = get_proc_task(file->f_path.dentry->d_inode);
1123        if (!task)
1124                return -ESRCH;
1125        if (!lock_task_sighand(task, &flags)) {
1126                put_task_struct(task);
1127                return -ESRCH;
1128        }
1129        if (oom_score_adj < task->signal->oom_score_adj &&
1130                        !capable(CAP_SYS_RESOURCE)) {
1131                unlock_task_sighand(task, &flags);
1132                put_task_struct(task);
1133                return -EACCES;
1134        }
1135
1136        task->signal->oom_score_adj = oom_score_adj;
1137        /*
1138         * Scale /proc/pid/oom_adj appropriately ensuring that OOM_DISABLE is
1139         * always attainable.
1140         */
1141        if (task->signal->oom_score_adj == OOM_SCORE_ADJ_MIN)
1142                task->signal->oom_adj = OOM_DISABLE;
1143        else
1144                task->signal->oom_adj = (oom_score_adj * OOM_ADJUST_MAX) /
1145                                                        OOM_SCORE_ADJ_MAX;
1146        unlock_task_sighand(task, &flags);
1147        put_task_struct(task);
1148        return count;
1149}
1150
1151static const struct file_operations proc_oom_score_adj_operations = {
1152        .read           = oom_score_adj_read,
1153        .write          = oom_score_adj_write,
1154};
1155
1156#ifdef CONFIG_AUDITSYSCALL
1157#define TMPBUFLEN 21
1158static ssize_t proc_loginuid_read(struct file * file, char __user * buf,
1159                                  size_t count, loff_t *ppos)
1160{
1161        struct inode * inode = file->f_path.dentry->d_inode;
1162        struct task_struct *task = get_proc_task(inode);
1163        ssize_t length;
1164        char tmpbuf[TMPBUFLEN];
1165
1166        if (!task)
1167                return -ESRCH;
1168        length = scnprintf(tmpbuf, TMPBUFLEN, "%u",
1169                                audit_get_loginuid(task));
1170        put_task_struct(task);
1171        return simple_read_from_buffer(buf, count, ppos, tmpbuf, length);
1172}
1173
1174static ssize_t proc_loginuid_write(struct file * file, const char __user * buf,
1175                                   size_t count, loff_t *ppos)
1176{
1177        struct inode * inode = file->f_path.dentry->d_inode;
1178        char *page, *tmp;
1179        ssize_t length;
1180        uid_t loginuid;
1181
1182        if (!capable(CAP_AUDIT_CONTROL))
1183                return -EPERM;
1184
1185        rcu_read_lock();
1186        if (current != pid_task(proc_pid(inode), PIDTYPE_PID)) {
1187                rcu_read_unlock();
1188                return -EPERM;
1189        }
1190        rcu_read_unlock();
1191
1192        if (count >= PAGE_SIZE)
1193                count = PAGE_SIZE - 1;
1194
1195        if (*ppos != 0) {
1196                /* No partial writes. */
1197                return -EINVAL;
1198        }
1199        page = (char*)__get_free_page(GFP_TEMPORARY);
1200        if (!page)
1201                return -ENOMEM;
1202        length = -EFAULT;
1203        if (copy_from_user(page, buf, count))
1204                goto out_free_page;
1205
1206        page[count] = '\0';
1207        loginuid = simple_strtoul(page, &tmp, 10);
1208        if (tmp == page) {
1209                length = -EINVAL;
1210                goto out_free_page;
1211
1212        }
1213        length = audit_set_loginuid(current, loginuid);
1214        if (likely(length == 0))
1215                length = count;
1216
1217out_free_page:
1218        free_page((unsigned long) page);
1219        return length;
1220}
1221
1222static const struct file_operations proc_loginuid_operations = {
1223        .read           = proc_loginuid_read,
1224        .write          = proc_loginuid_write,
1225        .llseek         = generic_file_llseek,
1226};
1227
1228static ssize_t proc_sessionid_read(struct file * file, char __user * buf,
1229                                  size_t count, loff_t *ppos)
1230{
1231        struct inode * inode = file->f_path.dentry->d_inode;
1232        struct task_struct *task = get_proc_task(inode);
1233        ssize_t length;
1234        char tmpbuf[TMPBUFLEN];
1235
1236        if (!task)
1237                return -ESRCH;
1238        length = scnprintf(tmpbuf, TMPBUFLEN, "%u",
1239                                audit_get_sessionid(task));
1240        put_task_struct(task);
1241        return simple_read_from_buffer(buf, count, ppos, tmpbuf, length);
1242}
1243
1244static const struct file_operations proc_sessionid_operations = {
1245        .read           = proc_sessionid_read,
1246        .llseek         = generic_file_llseek,
1247};
1248#endif
1249
1250#ifdef CONFIG_FAULT_INJECTION
1251static ssize_t proc_fault_inject_read(struct file * file, char __user * buf,
1252                                      size_t count, loff_t *ppos)
1253{
1254        struct task_struct *task = get_proc_task(file->f_dentry->d_inode);
1255        char buffer[PROC_NUMBUF];
1256        size_t len;
1257        int make_it_fail;
1258
1259        if (!task)
1260                return -ESRCH;
1261        make_it_fail = task->make_it_fail;
1262        put_task_struct(task);
1263
1264        len = snprintf(buffer, sizeof(buffer), "%i\n", make_it_fail);
1265
1266        return simple_read_from_buffer(buf, count, ppos, buffer, len);
1267}
1268
1269static ssize_t proc_fault_inject_write(struct file * file,
1270                        const char __user * buf, size_t count, loff_t *ppos)
1271{
1272        struct task_struct *task;
1273        char buffer[PROC_NUMBUF], *end;
1274        int make_it_fail;
1275
1276        if (!capable(CAP_SYS_RESOURCE))
1277                return -EPERM;
1278        memset(buffer, 0, sizeof(buffer));
1279        if (count > sizeof(buffer) - 1)
1280                count = sizeof(buffer) - 1;
1281        if (copy_from_user(buffer, buf, count))
1282                return -EFAULT;
1283        make_it_fail = simple_strtol(strstrip(buffer), &end, 0);
1284        if (*end)
1285                return -EINVAL;
1286        task = get_proc_task(file->f_dentry->d_inode);
1287        if (!task)
1288                return -ESRCH;
1289        task->make_it_fail = make_it_fail;
1290        put_task_struct(task);
1291
1292        return count;
1293}
1294
1295static const struct file_operations proc_fault_inject_operations = {
1296        .read           = proc_fault_inject_read,
1297        .write          = proc_fault_inject_write,
1298        .llseek         = generic_file_llseek,
1299};
1300#endif
1301
1302
1303#ifdef CONFIG_SCHED_DEBUG
1304/*
1305 * Print out various scheduling related per-task fields:
1306 */
1307static int sched_show(struct seq_file *m, void *v)
1308{
1309        struct inode *inode = m->private;
1310        struct task_struct *p;
1311
1312        p = get_proc_task(inode);
1313        if (!p)
1314                return -ESRCH;
1315        proc_sched_show_task(p, m);
1316
1317        put_task_struct(p);
1318
1319        return 0;
1320}
1321
1322static ssize_t
1323sched_write(struct file *file, const char __user *buf,
1324            size_t count, loff_t *offset)
1325{
1326        struct inode *inode = file->f_path.dentry->d_inode;
1327        struct task_struct *p;
1328
1329        p = get_proc_task(inode);
1330        if (!p)
1331                return -ESRCH;
1332        proc_sched_set_task(p);
1333
1334        put_task_struct(p);
1335
1336        return count;
1337}
1338
1339static int sched_open(struct inode *inode, struct file *filp)
1340{
1341        int ret;
1342
1343        ret = single_open(filp, sched_show, NULL);
1344        if (!ret) {
1345                struct seq_file *m = filp->private_data;
1346
1347                m->private = inode;
1348        }
1349        return ret;
1350}
1351
1352static const struct file_operations proc_pid_sched_operations = {
1353        .open           = sched_open,
1354        .read           = seq_read,
1355        .write          = sched_write,
1356        .llseek         = seq_lseek,
1357        .release        = single_release,
1358};
1359
1360#endif
1361
1362static ssize_t comm_write(struct file *file, const char __user *buf,
1363                                size_t count, loff_t *offset)
1364{
1365        struct inode *inode = file->f_path.dentry->d_inode;
1366        struct task_struct *p;
1367        char buffer[TASK_COMM_LEN];
1368
1369        memset(buffer, 0, sizeof(buffer));
1370        if (count > sizeof(buffer) - 1)
1371                count = sizeof(buffer) - 1;
1372        if (copy_from_user(buffer, buf, count))
1373                return -EFAULT;
1374
1375        p = get_proc_task(inode);
1376        if (!p)
1377                return -ESRCH;
1378
1379        if (same_thread_group(current, p))
1380                set_task_comm(p, buffer);
1381        else
1382                count = -EINVAL;
1383
1384        put_task_struct(p);
1385
1386        return count;
1387}
1388
1389static int comm_show(struct seq_file *m, void *v)
1390{
1391        struct inode *inode = m->private;
1392        struct task_struct *p;
1393
1394        p = get_proc_task(inode);
1395        if (!p)
1396                return -ESRCH;
1397
1398        task_lock(p);
1399        seq_printf(m, "%s\n", p->comm);
1400        task_unlock(p);
1401
1402        put_task_struct(p);
1403
1404        return 0;
1405}
1406
1407static int comm_open(struct inode *inode, struct file *filp)
1408{
1409        int ret;
1410
1411        ret = single_open(filp, comm_show, NULL);
1412        if (!ret) {
1413                struct seq_file *m = filp->private_data;
1414
1415                m->private = inode;
1416        }
1417        return ret;
1418}
1419
1420static const struct file_operations proc_pid_set_comm_operations = {
1421        .open           = comm_open,
1422        .read           = seq_read,
1423        .write          = comm_write,
1424        .llseek         = seq_lseek,
1425        .release        = single_release,
1426};
1427
1428/*
1429 * We added or removed a vma mapping the executable. The vmas are only mapped
1430 * during exec and are not mapped with the mmap system call.
1431 * Callers must hold down_write() on the mm's mmap_sem for these
1432 */
1433void added_exe_file_vma(struct mm_struct *mm)
1434{
1435        mm->num_exe_file_vmas++;
1436}
1437
1438void removed_exe_file_vma(struct mm_struct *mm)
1439{
1440        mm->num_exe_file_vmas--;
1441        if ((mm->num_exe_file_vmas == 0) && mm->exe_file){
1442                fput(mm->exe_file);
1443                mm->exe_file = NULL;
1444        }
1445
1446}
1447
1448void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file)
1449{
1450        if (new_exe_file)
1451                get_file(new_exe_file);
1452        if (mm->exe_file)
1453                fput(mm->exe_file);
1454        mm->exe_file = new_exe_file;
1455        mm->num_exe_file_vmas = 0;
1456}
1457
1458struct file *get_mm_exe_file(struct mm_struct *mm)
1459{
1460        struct file *exe_file;
1461
1462        /* We need mmap_sem to protect against races with removal of
1463         * VM_EXECUTABLE vmas */
1464        down_read(&mm->mmap_sem);
1465        exe_file = mm->exe_file;
1466        if (exe_file)
1467                get_file(exe_file);
1468        up_read(&mm->mmap_sem);
1469        return exe_file;
1470}
1471
1472void dup_mm_exe_file(struct mm_struct *oldmm, struct mm_struct *newmm)
1473{
1474        /* It's safe to write the exe_file pointer without exe_file_lock because
1475         * this is called during fork when the task is not yet in /proc */
1476        newmm->exe_file = get_mm_exe_file(oldmm);
1477}
1478
1479static int proc_exe_link(struct inode *inode, struct path *exe_path)
1480{
1481        struct task_struct *task;
1482        struct mm_struct *mm;
1483        struct file *exe_file;
1484
1485        task = get_proc_task(inode);
1486        if (!task)
1487                return -ENOENT;
1488        mm = get_task_mm(task);
1489        put_task_struct(task);
1490        if (!mm)
1491                return -ENOENT;
1492        exe_file = get_mm_exe_file(mm);
1493        mmput(mm);
1494        if (exe_file) {
1495                *exe_path = exe_file->f_path;
1496                path_get(&exe_file->f_path);
1497                fput(exe_file);
1498                return 0;
1499        } else
1500                return -ENOENT;
1501}
1502
1503static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd)
1504{
1505        struct inode *inode = dentry->d_inode;
1506        int error = -EACCES;
1507
1508        /* We don't need a base pointer in the /proc filesystem */
1509        path_put(&nd->path);
1510
1511        /* Are we allowed to snoop on the tasks file descriptors? */
1512        if (!proc_fd_access_allowed(inode))
1513                goto out;
1514
1515        error = PROC_I(inode)->op.proc_get_link(inode, &nd->path);
1516out:
1517        return ERR_PTR(error);
1518}
1519
1520static int do_proc_readlink(struct path *path, char __user *buffer, int buflen)
1521{
1522        char *tmp = (char*)__get_free_page(GFP_TEMPORARY);
1523        char *pathname;
1524        int len;
1525
1526        if (!tmp)
1527                return -ENOMEM;
1528
1529        pathname = d_path(path, tmp, PAGE_SIZE);
1530        len = PTR_ERR(pathname);
1531        if (IS_ERR(pathname))
1532                goto out;
1533        len = tmp + PAGE_SIZE - 1 - pathname;
1534
1535        if (len > buflen)
1536                len = buflen;
1537        if (copy_to_user(buffer, pathname, len))
1538                len = -EFAULT;
1539 out:
1540        free_page((unsigned long)tmp);
1541        return len;
1542}
1543
1544static int proc_pid_readlink(struct dentry * dentry, char __user * buffer, int buflen)
1545{
1546        int error = -EACCES;
1547        struct inode *inode = dentry->d_inode;
1548        struct path path;
1549
1550        /* Are we allowed to snoop on the tasks file descriptors? */
1551        if (!proc_fd_access_allowed(inode))
1552                goto out;
1553
1554        error = PROC_I(inode)->op.proc_get_link(inode, &path);
1555        if (error)
1556                goto out;
1557
1558        error = do_proc_readlink(&path, buffer, buflen);
1559        path_put(&path);
1560out:
1561        return error;
1562}
1563
1564static const struct inode_operations proc_pid_link_inode_operations = {
1565        .readlink       = proc_pid_readlink,
1566        .follow_link    = proc_pid_follow_link,
1567        .setattr        = proc_setattr,
1568};
1569
1570
1571/* building an inode */
1572
1573static int task_dumpable(struct task_struct *task)
1574{
1575        int dumpable = 0;
1576        struct mm_struct *mm;
1577
1578        task_lock(task);
1579        mm = task->mm;
1580        if (mm)
1581                dumpable = get_dumpable(mm);
1582        task_unlock(task);
1583        if(dumpable == 1)
1584                return 1;
1585        return 0;
1586}
1587
1588
1589static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task)
1590{
1591        struct inode * inode;
1592        struct proc_inode *ei;
1593        const struct cred *cred;
1594
1595        /* We need a new inode */
1596
1597        inode = new_inode(sb);
1598        if (!inode)
1599                goto out;
1600
1601        /* Common stuff */
1602        ei = PROC_I(inode);
1603        inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
1604        inode->i_op = &proc_def_inode_operations;
1605
1606        /*
1607         * grab the reference to task.
1608         */
1609        ei->pid = get_task_pid(task, PIDTYPE_PID);
1610        if (!ei->pid)
1611                goto out_unlock;
1612
1613        if (task_dumpable(task)) {
1614                rcu_read_lock();
1615                cred = __task_cred(task);
1616                inode->i_uid = cred->euid;
1617                inode->i_gid = cred->egid;
1618                rcu_read_unlock();
1619        }
1620        security_task_to_inode(task, inode);
1621
1622out:
1623        return inode;
1624
1625out_unlock:
1626        iput(inode);
1627        return NULL;
1628}
1629
1630static int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
1631{
1632        struct inode *inode = dentry->d_inode;
1633        struct task_struct *task;
1634        const struct cred *cred;
1635
1636        generic_fillattr(inode, stat);
1637
1638        rcu_read_lock();
1639        stat->uid = 0;
1640        stat->gid = 0;
1641        task = pid_task(proc_pid(inode), PIDTYPE_PID);
1642        if (task) {
1643                if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) ||
1644                    task_dumpable(task)) {
1645                        cred = __task_cred(task);
1646                        stat->uid = cred->euid;
1647                        stat->gid = cred->egid;
1648                }
1649        }
1650        rcu_read_unlock();
1651        return 0;
1652}
1653
1654/* dentry stuff */
1655
1656/*
1657 *      Exceptional case: normally we are not allowed to unhash a busy
1658 * directory. In this case, however, we can do it - no aliasing problems
1659 * due to the way we treat inodes.
1660 *
1661 * Rewrite the inode's ownerships here because the owning task may have
1662 * performed a setuid(), etc.
1663 *
1664 * Before the /proc/pid/status file was created the only way to read
1665 * the effective uid of a /process was to stat /proc/pid.  Reading
1666 * /proc/pid/status is slow enough that procps and other packages
1667 * kept stating /proc/pid.  To keep the rules in /proc simple I have
1668 * made this apply to all per process world readable and executable
1669 * directories.
1670 */
1671static int pid_revalidate(struct dentry *dentry, struct nameidata *nd)
1672{
1673        struct inode *inode = dentry->d_inode;
1674        struct task_struct *task = get_proc_task(inode);
1675        const struct cred *cred;
1676
1677        if (task) {
1678                if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) ||
1679                    task_dumpable(task)) {
1680                        rcu_read_lock();
1681                        cred = __task_cred(task);
1682                        inode->i_uid = cred->euid;
1683                        inode->i_gid = cred->egid;
1684                        rcu_read_unlock();
1685                } else {
1686                        inode->i_uid = 0;
1687                        inode->i_gid = 0;
1688                }
1689                inode->i_mode &= ~(S_ISUID | S_ISGID);
1690                security_task_to_inode(task, inode);
1691                put_task_struct(task);
1692                return 1;
1693        }
1694        d_drop(dentry);
1695        return 0;
1696}
1697
1698static int pid_delete_dentry(struct dentry * dentry)
1699{
1700        /* Is the task we represent dead?
1701         * If so, then don't put the dentry on the lru list,
1702         * kill it immediately.
1703         */
1704        return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first;
1705}
1706
1707static const struct dentry_operations pid_dentry_operations =
1708{
1709        .d_revalidate   = pid_revalidate,
1710        .d_delete       = pid_delete_dentry,
1711};
1712
1713/* Lookups */
1714
1715typedef struct dentry *instantiate_t(struct inode *, struct dentry *,
1716                                struct task_struct *, const void *);
1717
1718/*
1719 * Fill a directory entry.
1720 *
1721 * If possible create the dcache entry and derive our inode number and
1722 * file type from dcache entry.
1723 *
1724 * Since all of the proc inode numbers are dynamically generated, the inode
1725 * numbers do not exist until the inode is cache.  This means creating the
1726 * the dcache entry in readdir is necessary to keep the inode numbers
1727 * reported by readdir in sync with the inode numbers reported
1728 * by stat.
1729 */
1730static int proc_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
1731        char *name, int len,
1732        instantiate_t instantiate, struct task_struct *task, const void *ptr)
1733{
1734        struct dentry *child, *dir = filp->f_path.dentry;
1735        struct inode *inode;
1736        struct qstr qname;
1737        ino_t ino = 0;
1738        unsigned type = DT_UNKNOWN;
1739
1740        qname.name = name;
1741        qname.len  = len;
1742        qname.hash = full_name_hash(name, len);
1743
1744        child = d_lookup(dir, &qname);
1745        if (!child) {
1746                struct dentry *new;
1747                new = d_alloc(dir, &qname);
1748                if (new) {
1749                        child = instantiate(dir->d_inode, new, task, ptr);
1750                        if (child)
1751                                dput(new);
1752                        else
1753                                child = new;
1754                }
1755        }
1756        if (!child || IS_ERR(child) || !child->d_inode)
1757                goto end_instantiate;
1758        inode = child->d_inode;
1759        if (inode) {
1760                ino = inode->i_ino;
1761                type = inode->i_mode >> 12;
1762        }
1763        dput(child);
1764end_instantiate:
1765        if (!ino)
1766                ino = find_inode_number(dir, &qname);
1767        if (!ino)
1768                ino = 1;
1769        return filldir(dirent, name, len, filp->f_pos, ino, type);
1770}
1771
1772static unsigned name_to_int(struct dentry *dentry)
1773{
1774        const char *name = dentry->d_name.name;
1775        int len = dentry->d_name.len;
1776        unsigned n = 0;
1777
1778        if (len > 1 && *name == '0')
1779                goto out;
1780        while (len-- > 0) {
1781                unsigned c = *name++ - '0';
1782                if (c > 9)
1783                        goto out;
1784                if (n >= (~0U-9)/10)
1785                        goto out;
1786                n *= 10;
1787                n += c;
1788        }
1789        return n;
1790out:
1791        return ~0U;
1792}
1793
1794#define PROC_FDINFO_MAX 64
1795
1796static int proc_fd_info(struct inode *inode, struct path *path, char *info)
1797{
1798        struct task_struct *task = get_proc_task(inode);
1799        struct files_struct *files = NULL;
1800        struct file *file;
1801        int fd = proc_fd(inode);
1802
1803        if (task) {
1804                files = get_files_struct(task);
1805                put_task_struct(task);
1806        }
1807        if (files) {
1808                /*
1809                 * We are not taking a ref to the file structure, so we must
1810                 * hold ->file_lock.
1811                 */
1812                spin_lock(&files->file_lock);
1813                file = fcheck_files(files, fd);
1814                if (file) {
1815                        if (path) {
1816                                *path = file->f_path;
1817                                path_get(&file->f_path);
1818                        }
1819                        if (info)
1820                                snprintf(info, PROC_FDINFO_MAX,
1821                                         "pos:\t%lli\n"
1822                                         "flags:\t0%o\n",
1823                                         (long long) file->f_pos,
1824                                         file->f_flags);
1825                        spin_unlock(&files->file_lock);
1826                        put_files_struct(files);
1827                        return 0;
1828                }
1829                spin_unlock(&files->file_lock);
1830                put_files_struct(files);
1831        }
1832        return -ENOENT;
1833}
1834
1835static int proc_fd_link(struct inode *inode, struct path *path)
1836{
1837        return proc_fd_info(inode, path, NULL);
1838}
1839
1840static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd)
1841{
1842        struct inode *inode = dentry->d_inode;
1843        struct task_struct *task = get_proc_task(inode);
1844        int fd = proc_fd(inode);
1845        struct files_struct *files;
1846        const struct cred *cred;
1847
1848        if (task) {
1849                files = get_files_struct(task);
1850                if (files) {
1851                        rcu_read_lock();
1852                        if (fcheck_files(files, fd)) {
1853                                rcu_read_unlock();
1854                                put_files_struct(files);
1855                                if (task_dumpable(task)) {
1856                                        rcu_read_lock();
1857                                        cred = __task_cred(task);
1858                                        inode->i_uid = cred->euid;
1859                                        inode->i_gid = cred->egid;
1860                                        rcu_read_unlock();
1861                                } else {
1862                                        inode->i_uid = 0;
1863                                        inode->i_gid = 0;
1864                                }
1865                                inode->i_mode &= ~(S_ISUID | S_ISGID);
1866                                security_task_to_inode(task, inode);
1867                                put_task_struct(task);
1868                                return 1;
1869                        }
1870                        rcu_read_unlock();
1871                        put_files_struct(files);
1872                }
1873                put_task_struct(task);
1874        }
1875        d_drop(dentry);
1876        return 0;
1877}
1878
1879static const struct dentry_operations tid_fd_dentry_operations =
1880{
1881        .d_revalidate   = tid_fd_revalidate,
1882        .d_delete       = pid_delete_dentry,
1883};
1884
1885static struct dentry *proc_fd_instantiate(struct inode *dir,
1886        struct dentry *dentry, struct task_struct *task, const void *ptr)
1887{
1888        unsigned fd = *(const unsigned *)ptr;
1889        struct file *file;
1890        struct files_struct *files;
1891        struct inode *inode;
1892        struct proc_inode *ei;
1893        struct dentry *error = ERR_PTR(-ENOENT);
1894
1895        inode = proc_pid_make_inode(dir->i_sb, task);
1896        if (!inode)
1897                goto out;
1898        ei = PROC_I(inode);
1899        ei->fd = fd;
1900        files = get_files_struct(task);
1901        if (!files)
1902                goto out_iput;
1903        inode->i_mode = S_IFLNK;
1904
1905        /*
1906         * We are not taking a ref to the file structure, so we must
1907         * hold ->file_lock.
1908         */
1909        spin_lock(&files->file_lock);
1910        file = fcheck_files(files, fd);
1911        if (!file)
1912                goto out_unlock;
1913        if (file->f_mode & FMODE_READ)
1914                inode->i_mode |= S_IRUSR | S_IXUSR;
1915        if (file->f_mode & FMODE_WRITE)
1916                inode->i_mode |= S_IWUSR | S_IXUSR;
1917        spin_unlock(&files->file_lock);
1918        put_files_struct(files);
1919
1920        inode->i_op = &proc_pid_link_inode_operations;
1921        inode->i_size = 64;
1922        ei->op.proc_get_link = proc_fd_link;
1923        dentry->d_op = &tid_fd_dentry_operations;
1924        d_add(dentry, inode);
1925        /* Close the race of the process dying before we return the dentry */
1926        if (tid_fd_revalidate(dentry, NULL))
1927                error = NULL;
1928
1929 out:
1930        return error;
1931out_unlock:
1932        spin_unlock(&files->file_lock);
1933        put_files_struct(files);
1934out_iput:
1935        iput(inode);
1936        goto out;
1937}
1938
1939static struct dentry *proc_lookupfd_common(struct inode *dir,
1940                                           struct dentry *dentry,
1941                                           instantiate_t instantiate)
1942{
1943        struct task_struct *task = get_proc_task(dir);
1944        unsigned fd = name_to_int(dentry);
1945        struct dentry *result = ERR_PTR(-ENOENT);
1946
1947        if (!task)
1948                goto out_no_task;
1949        if (fd == ~0U)
1950                goto out;
1951
1952        result = instantiate(dir, dentry, task, &fd);
1953out:
1954        put_task_struct(task);
1955out_no_task:
1956        return result;
1957}
1958
1959static int proc_readfd_common(struct file * filp, void * dirent,
1960                              filldir_t filldir, instantiate_t instantiate)
1961{
1962        struct dentry *dentry = filp->f_path.dentry;
1963        struct inode *inode = dentry->d_inode;
1964        struct task_struct *p = get_proc_task(inode);
1965        unsigned int fd, ino;
1966        int retval;
1967        struct files_struct * files;
1968
1969        retval = -ENOENT;
1970        if (!p)
1971                goto out_no_task;
1972        retval = 0;
1973
1974        fd = filp->f_pos;
1975        switch (fd) {
1976                case 0:
1977                        if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0)
1978                                goto out;
1979                        filp->f_pos++;
1980                case 1:
1981                        ino = parent_ino(dentry);
1982                        if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0)
1983                                goto out;
1984                        filp->f_pos++;
1985                default:
1986                        files = get_files_struct(p);
1987                        if (!files)
1988                                goto out;
1989                        rcu_read_lock();
1990                        for (fd = filp->f_pos-2;
1991                             fd < files_fdtable(files)->max_fds;
1992                             fd++, filp->f_pos++) {
1993                                char name[PROC_NUMBUF];
1994                                int len;
1995
1996                                if (!fcheck_files(files, fd))
1997                                        continue;
1998                                rcu_read_unlock();
1999
2000                                len = snprintf(name, sizeof(name), "%d", fd);
2001                                if (proc_fill_cache(filp, dirent, filldir,
2002                                                    name, len, instantiate,
2003                                                    p, &fd) < 0) {
2004                                        rcu_read_lock();
2005                                        break;
2006                                }
2007                                rcu_read_lock();
2008                        }
2009                        rcu_read_unlock();
2010                        put_files_struct(files);
2011        }
2012out:
2013        put_task_struct(p);
2014out_no_task:
2015        return retval;
2016}
2017
2018static struct dentry *proc_lookupfd(struct inode *dir, struct dentry *dentry,
2019                                    struct nameidata *nd)
2020{
2021        return proc_lookupfd_common(dir, dentry, proc_fd_instantiate);
2022}
2023
2024static int proc_readfd(struct file *filp, void *dirent, filldir_t filldir)
2025{
2026        return proc_readfd_common(filp, dirent, filldir, proc_fd_instantiate);
2027}
2028
2029static ssize_t proc_fdinfo_read(struct file *file, char __user *buf,
2030                                      size_t len, loff_t *ppos)
2031{
2032        char tmp[PROC_FDINFO_MAX];
2033        int err = proc_fd_info(file->f_path.dentry->d_inode, NULL, tmp);
2034        if (!err)
2035                err = simple_read_from_buffer(buf, len, ppos, tmp, strlen(tmp));
2036        return err;
2037}
2038
2039static const struct file_operations proc_fdinfo_file_operations = {
2040        .open           = nonseekable_open,
2041        .read           = proc_fdinfo_read,
2042};
2043
2044static const struct file_operations proc_fd_operations = {
2045        .read           = generic_read_dir,
2046        .readdir        = proc_readfd,
2047};
2048
2049/*
2050 * /proc/pid/fd needs a special permission handler so that a process can still
2051 * access /proc/self/fd after it has executed a setuid().
2052 */
2053static int proc_fd_permission(struct inode *inode, int mask)
2054{
2055        int rv;
2056
2057        rv = generic_permission(inode, mask, NULL);
2058        if (rv == 0)
2059                return 0;
2060        if (task_pid(current) == proc_pid(inode))
2061                rv = 0;
2062        return rv;
2063}
2064
2065/*
2066 * proc directories can do almost nothing..
2067 */
2068static const struct inode_operations proc_fd_inode_operations = {
2069        .lookup         = proc_lookupfd,
2070        .permission     = proc_fd_permission,
2071        .setattr        = proc_setattr,
2072};
2073
2074static struct dentry *proc_fdinfo_instantiate(struct inode *dir,
2075        struct dentry *dentry, struct task_struct *task, const void *ptr)
2076{
2077        unsigned fd = *(unsigned *)ptr;
2078        struct inode *inode;
2079        struct proc_inode *ei;
2080        struct dentry *error = ERR_PTR(-ENOENT);
2081
2082        inode = proc_pid_make_inode(dir->i_sb, task);
2083        if (!inode)
2084                goto out;
2085        ei = PROC_I(inode);
2086        ei->fd = fd;
2087        inode->i_mode = S_IFREG | S_IRUSR;
2088        inode->i_fop = &proc_fdinfo_file_operations;
2089        dentry->d_op = &tid_fd_dentry_operations;
2090        d_add(dentry, inode);
2091        /* Close the race of the process dying before we return the dentry */
2092        if (tid_fd_revalidate(dentry, NULL))
2093                error = NULL;
2094
2095 out:
2096        return error;
2097}
2098
2099static struct dentry *proc_lookupfdinfo(struct inode *dir,
2100                                        struct dentry *dentry,
2101                                        struct nameidata *nd)
2102{
2103        return proc_lookupfd_common(dir, dentry, proc_fdinfo_instantiate);
2104}
2105
2106static int proc_readfdinfo(struct file *filp, void *dirent, filldir_t filldir)
2107{
2108        return proc_readfd_common(filp, dirent, filldir,
2109                                  proc_fdinfo_instantiate);
2110}
2111
2112static const struct file_operations proc_fdinfo_operations = {
2113        .read           = generic_read_dir,
2114        .readdir        = proc_readfdinfo,
2115};
2116
2117/*
2118 * proc directories can do almost nothing..
2119 */
2120static const struct inode_operations proc_fdinfo_inode_operations = {
2121        .lookup         = proc_lookupfdinfo,
2122        .setattr        = proc_setattr,
2123};
2124
2125
2126static struct dentry *proc_pident_instantiate(struct inode *dir,
2127        struct dentry *dentry, struct task_struct *task, const void *ptr)
2128{
2129        const struct pid_entry *p = ptr;
2130        struct inode *inode;
2131        struct proc_inode *ei;
2132        struct dentry *error = ERR_PTR(-ENOENT);
2133
2134        inode = proc_pid_make_inode(dir->i_sb, task);
2135        if (!inode)
2136                goto out;
2137
2138        ei = PROC_I(inode);
2139        inode->i_mode = p->mode;
2140        if (S_ISDIR(inode->i_mode))
2141                inode->i_nlink = 2;     /* Use getattr to fix if necessary */
2142        if (p->iop)
2143                inode->i_op = p->iop;
2144        if (p->fop)
2145                inode->i_fop = p->fop;
2146        ei->op = p->op;
2147        dentry->d_op = &pid_dentry_operations;
2148        d_add(dentry, inode);
2149        /* Close the race of the process dying before we return the dentry */
2150        if (pid_revalidate(dentry, NULL))
2151                error = NULL;
2152out:
2153        return error;
2154}
2155
2156static struct dentry *proc_pident_lookup(struct inode *dir, 
2157                                         struct dentry *dentry,
2158                                         const struct pid_entry *ents,
2159                                         unsigned int nents)
2160{
2161        struct dentry *error;
2162        struct task_struct *task = get_proc_task(dir);
2163        const struct pid_entry *p, *last;
2164
2165        error = ERR_PTR(-ENOENT);
2166
2167        if (!task)
2168                goto out_no_task;
2169
2170        /*
2171         * Yes, it does not scale. And it should not. Don't add
2172         * new entries into /proc/<tgid>/ without very good reasons.
2173         */
2174        last = &ents[nents - 1];
2175        for (p = ents; p <= last; p++) {
2176                if (p->len != dentry->d_name.len)
2177                        continue;
2178                if (!memcmp(dentry->d_name.name, p->name, p->len))
2179                        break;
2180        }
2181        if (p > last)
2182                goto out;
2183
2184        error = proc_pident_instantiate(dir, dentry, task, p);
2185out:
2186        put_task_struct(task);
2187out_no_task:
2188        return error;
2189}
2190
2191static int proc_pident_fill_cache(struct file *filp, void *dirent,
2192        filldir_t filldir, struct task_struct *task, const struct pid_entry *p)
2193{
2194        return proc_fill_cache(filp, dirent, filldir, p->name, p->len,
2195                                proc_pident_instantiate, task, p);
2196}
2197
2198static int proc_pident_readdir(struct file *filp,
2199                void *dirent, filldir_t filldir,
2200                const struct pid_entry *ents, unsigned int nents)
2201{
2202        int i;
2203        struct dentry *dentry = filp->f_path.dentry;
2204        struct inode *inode = dentry->d_inode;
2205        struct task_struct *task = get_proc_task(inode);
2206        const struct pid_entry *p, *last;
2207        ino_t ino;
2208        int ret;
2209
2210        ret = -ENOENT;
2211        if (!task)
2212                goto out_no_task;
2213
2214        ret = 0;
2215        i = filp->f_pos;
2216        switch (i) {
2217        case 0:
2218                ino = inode->i_ino;
2219                if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0)
2220                        goto out;
2221                i++;
2222                filp->f_pos++;
2223                /* fall through */
2224        case 1:
2225                ino = parent_ino(dentry);
2226                if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0)
2227                        goto out;
2228                i++;
2229                filp->f_pos++;
2230                /* fall through */
2231        default:
2232                i -= 2;
2233                if (i >= nents) {
2234                        ret = 1;
2235                        goto out;
2236                }
2237                p = ents + i;
2238                last = &ents[nents - 1];
2239                while (p <= last) {
2240                        if (proc_pident_fill_cache(filp, dirent, filldir, task, p) < 0)
2241                                goto out;
2242                        filp->f_pos++;
2243                        p++;
2244                }
2245        }
2246
2247        ret = 1;
2248out:
2249        put_task_struct(task);
2250out_no_task:
2251        return ret;
2252}
2253
2254#ifdef CONFIG_SECURITY
2255static ssize_t proc_pid_attr_read(struct file * file, char __user * buf,
2256                                  size_t count, loff_t *ppos)
2257{
2258        struct inode * inode = file->f_path.dentry->d_inode;
2259        char *p = NULL;
2260        ssize_t length;
2261        struct task_struct *task = get_proc_task(inode);
2262
2263        if (!task)
2264                return -ESRCH;
2265
2266        length = security_getprocattr(task,
2267                                      (char*)file->f_path.dentry->d_name.name,
2268                                      &p);
2269        put_task_struct(task);
2270        if (length > 0)
2271                length = simple_read_from_buffer(buf, count, ppos, p, length);
2272        kfree(p);
2273        return length;
2274}
2275
2276static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf,
2277                                   size_t count, loff_t *ppos)
2278{
2279        struct inode * inode = file->f_path.dentry->d_inode;
2280        char *page;
2281        ssize_t length;
2282        struct task_struct *task = get_proc_task(inode);
2283
2284        length = -ESRCH;
2285        if (!task)
2286                goto out_no_task;
2287        if (count > PAGE_SIZE)
2288                count = PAGE_SIZE;
2289
2290        /* No partial writes. */
2291        length = -EINVAL;
2292        if (*ppos != 0)
2293                goto out;
2294
2295        length = -ENOMEM;
2296        page = (char*)__get_free_page(GFP_TEMPORARY);
2297        if (!page)
2298                goto out;
2299
2300        length = -EFAULT;
2301        if (copy_from_user(page, buf, count))
2302                goto out_free;
2303
2304        /* Guard against adverse ptrace interaction */
2305        length = mutex_lock_interruptible(&task->cred_guard_mutex);
2306        if (length < 0)
2307                goto out_free;
2308
2309        length = security_setprocattr(task,
2310                                      (char*)file->f_path.dentry->d_name.name,
2311                                      (void*)page, count);
2312        mutex_unlock(&task->cred_guard_mutex);
2313out_free:
2314        free_page((unsigned long) page);
2315out:
2316        put_task_struct(task);
2317out_no_task:
2318        return length;
2319}
2320
2321static const struct file_operations proc_pid_attr_operations = {
2322        .read           = proc_pid_attr_read,
2323        .write          = proc_pid_attr_write,
2324        .llseek         = generic_file_llseek,
2325};
2326
2327static const struct pid_entry attr_dir_stuff[] = {
2328        REG("current",    S_IRUGO|S_IWUGO, proc_pid_attr_operations),
2329        REG("prev",       S_IRUGO,         proc_pid_attr_operations),
2330        REG("exec",       S_IRUGO|S_IWUGO, proc_pid_attr_operations),
2331        REG("fscreate",   S_IRUGO|S_IWUGO, proc_pid_attr_operations),
2332        REG("keycreate",  S_IRUGO|S_IWUGO, proc_pid_attr_operations),
2333        REG("sockcreate", S_IRUGO|S_IWUGO, proc_pid_attr_operations),
2334};
2335
2336static int proc_attr_dir_readdir(struct file * filp,
2337                             void * dirent, filldir_t filldir)
2338{
2339        return proc_pident_readdir(filp,dirent,filldir,
2340                                   attr_dir_stuff,ARRAY_SIZE(attr_dir_stuff));
2341}
2342
2343static const struct file_operations proc_attr_dir_operations = {
2344        .read           = generic_read_dir,
2345        .readdir        = proc_attr_dir_readdir,
2346};
2347
2348static struct dentry *proc_attr_dir_lookup(struct inode *dir,
2349                                struct dentry *dentry, struct nameidata *nd)
2350{
2351        return proc_pident_lookup(dir, dentry,
2352                                  attr_dir_stuff, ARRAY_SIZE(attr_dir_stuff));
2353}
2354
2355static const struct inode_operations proc_attr_dir_inode_operations = {
2356        .lookup         = proc_attr_dir_lookup,
2357        .getattr        = pid_getattr,
2358        .setattr        = proc_setattr,
2359};
2360
2361#endif
2362
2363#ifdef CONFIG_ELF_CORE
2364static ssize_t proc_coredump_filter_read(struct file *file, char __user *buf,
2365                                         size_t count, loff_t *ppos)
2366{
2367        struct task_struct *task = get_proc_task(file->f_dentry->d_inode);
2368        struct mm_struct *mm;
2369        char buffer[PROC_NUMBUF];
2370        size_t len;
2371        int ret;
2372
2373        if (!task)
2374                return -ESRCH;
2375
2376        ret = 0;
2377        mm = get_task_mm(task);
2378        if (mm) {
2379                len = snprintf(buffer, sizeof(buffer), "%08lx\n",
2380                               ((mm->flags & MMF_DUMP_FILTER_MASK) >>
2381                                MMF_DUMP_FILTER_SHIFT));
2382                mmput(mm);
2383                ret = simple_read_from_buffer(buf, count, ppos, buffer, len);
2384        }
2385
2386        put_task_struct(task);
2387
2388        return ret;
2389}
2390
2391static ssize_t proc_coredump_filter_write(struct file *file,
2392                                          const char __user *buf,
2393                                          size_t count,
2394                                          loff_t *ppos)
2395{
2396        struct task_struct *task;
2397        struct mm_struct *mm;
2398        char buffer[PROC_NUMBUF], *end;
2399        unsigned int val;
2400        int ret;
2401        int i;
2402        unsigned long mask;
2403
2404        ret = -EFAULT;
2405        memset(buffer, 0, sizeof(buffer));
2406        if (count > sizeof(buffer) - 1)
2407                count = sizeof(buffer) - 1;
2408        if (copy_from_user(buffer, buf, count))
2409                goto out_no_task;
2410
2411        ret = -EINVAL;
2412        val = (unsigned int)simple_strtoul(buffer, &end, 0);
2413        if (*end == '\n')
2414                end++;
2415        if (end - buffer == 0)
2416                goto out_no_task;
2417
2418        ret = -ESRCH;
2419        task = get_proc_task(file->f_dentry->d_inode);
2420        if (!task)
2421                goto out_no_task;
2422
2423        ret = end - buffer;
2424        mm = get_task_mm(task);
2425        if (!mm)
2426                goto out_no_mm;
2427
2428        for (i = 0, mask = 1; i < MMF_DUMP_FILTER_BITS; i++, mask <<= 1) {
2429                if (val & mask)
2430                        set_bit(i + MMF_DUMP_FILTER_SHIFT, &mm->flags);
2431                else
2432                        clear_bit(i + MMF_DUMP_FILTER_SHIFT, &mm->flags);
2433        }
2434
2435        mmput(mm);
2436 out_no_mm:
2437        put_task_struct(task);
2438 out_no_task:
2439        return ret;
2440}
2441
2442static const struct file_operations proc_coredump_filter_operations = {
2443        .read           = proc_coredump_filter_read,
2444        .write          = proc_coredump_filter_write,
2445        .llseek         = generic_file_llseek,
2446};
2447#endif
2448
2449/*
2450 * /proc/self:
2451 */
2452static int proc_self_readlink(struct dentry *dentry, char __user *buffer,
2453                              int buflen)
2454{
2455        struct pid_namespace *ns = dentry->d_sb->s_fs_info;
2456        pid_t tgid = task_tgid_nr_ns(current, ns);
2457        char tmp[PROC_NUMBUF];
2458        if (!tgid)
2459                return -ENOENT;
2460        sprintf(tmp, "%d", tgid);
2461        return vfs_readlink(dentry,buffer,buflen,tmp);
2462}
2463
2464static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd)
2465{
2466        struct pid_namespace *ns = dentry->d_sb->s_fs_info;
2467        pid_t tgid = task_tgid_nr_ns(current, ns);
2468        char *name = ERR_PTR(-ENOENT);
2469        if (tgid) {
2470                name = __getname();
2471                if (!name)
2472                        name = ERR_PTR(-ENOMEM);
2473                else
2474                        sprintf(name, "%d", tgid);
2475        }
2476        nd_set_link(nd, name);
2477        return NULL;
2478}
2479
2480static void proc_self_put_link(struct dentry *dentry, struct nameidata *nd,
2481                                void *cookie)
2482{
2483        char *s = nd_get_link(nd);
2484        if (!IS_ERR(s))
2485                __putname(s);
2486}
2487
2488static const struct inode_operations proc_self_inode_operations = {
2489        .readlink       = proc_self_readlink,
2490        .follow_link    = proc_self_follow_link,
2491        .put_link       = proc_self_put_link,
2492};
2493
2494/*
2495 * proc base
2496 *
2497 * These are the directory entries in the root directory of /proc
2498 * that properly belong to the /proc filesystem, as they describe
2499 * describe something that is process related.
2500 */
2501static const struct pid_entry proc_base_stuff[] = {
2502        NOD("self", S_IFLNK|S_IRWXUGO,
2503                &proc_self_inode_operations, NULL, {}),
2504};
2505
2506/*
2507 *      Exceptional case: normally we are not allowed to unhash a busy
2508 * directory. In this case, however, we can do it - no aliasing problems
2509 * due to the way we treat inodes.
2510 */
2511static int proc_base_revalidate(struct dentry *dentry, struct nameidata *nd)
2512{
2513        struct inode *inode = dentry->d_inode;
2514        struct task_struct *task = get_proc_task(inode);
2515        if (task) {
2516                put_task_struct(task);
2517                return 1;
2518        }
2519        d_drop(dentry);
2520        return 0;
2521}
2522
2523static const struct dentry_operations proc_base_dentry_operations =
2524{
2525        .d_revalidate   = proc_base_revalidate,
2526        .d_delete       = pid_delete_dentry,
2527};
2528
2529static struct dentry *proc_base_instantiate(struct inode *dir,
2530        struct dentry *dentry, struct task_struct *task, const void *ptr)
2531{
2532        const struct pid_entry *p = ptr;
2533        struct inode *inode;
2534        struct proc_inode *ei;
2535        struct dentry *error;
2536
2537        /* Allocate the inode */
2538        error = ERR_PTR(-ENOMEM);
2539        inode = new_inode(dir->i_sb);
2540        if (!inode)
2541                goto out;
2542
2543        /* Initialize the inode */
2544        ei = PROC_I(inode);
2545        inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
2546
2547        /*
2548         * grab the reference to the task.
2549         */
2550        ei->pid = get_task_pid(task, PIDTYPE_PID);
2551        if (!ei->pid)
2552                goto out_iput;
2553
2554        inode->i_mode = p->mode;
2555        if (S_ISDIR(inode->i_mode))
2556                inode->i_nlink = 2;
2557        if (S_ISLNK(inode->i_mode))
2558                inode->i_size = 64;
2559        if (p->iop)
2560                inode->i_op = p->iop;
2561        if (p->fop)
2562                inode->i_fop = p->fop;
2563        ei->op = p->op;
2564        dentry->d_op = &proc_base_dentry_operations;
2565        d_add(dentry, inode);
2566        error = NULL;
2567out:
2568        return error;
2569out_iput:
2570        iput(inode);
2571        goto out;
2572}
2573
2574static struct dentry *proc_base_lookup(struct inode *dir, struct dentry *dentry)
2575{
2576        struct dentry *error;
2577        struct task_struct *task = get_proc_task(dir);
2578        const struct pid_entry *p, *last;
2579
2580        error = ERR_PTR(-ENOENT);
2581
2582        if (!task)
2583                goto out_no_task;
2584
2585        /* Lookup the directory entry */
2586        last = &proc_base_stuff[ARRAY_SIZE(proc_base_stuff) - 1];
2587        for (p = proc_base_stuff; p <= last; p++) {
2588                if (p->len != dentry->d_name.len)
2589                        continue;
2590                if (!memcmp(dentry->d_name.name, p->name, p->len))
2591                        break;
2592        }
2593        if (p > last)
2594                goto out;
2595
2596        error = proc_base_instantiate(dir, dentry, task, p);
2597
2598out:
2599        put_task_struct(task);
2600out_no_task:
2601        return error;
2602}
2603
2604static int proc_base_fill_cache(struct file *filp, void *dirent,
2605        filldir_t filldir, struct task_struct *task, const struct pid_entry *p)
2606{
2607        return proc_fill_cache(filp, dirent, filldir, p->name, p->len,
2608                                proc_base_instantiate, task, p);
2609}
2610
2611#ifdef CONFIG_TASK_IO_ACCOUNTING
2612static int do_io_accounting(struct task_struct *task, char *buffer, int whole)
2613{
2614        struct task_io_accounting acct = task->ioac;
2615        unsigned long flags;
2616
2617        if (whole && lock_task_sighand(task, &flags)) {
2618                struct task_struct *t = task;
2619
2620                task_io_accounting_add(&acct, &task->signal->ioac);
2621                while_each_thread(task, t)
2622                        task_io_accounting_add(&acct, &t->ioac);
2623
2624                unlock_task_sighand(task, &flags);
2625        }
2626        return sprintf(buffer,
2627                        "rchar: %llu\n"
2628                        "wchar: %llu\n"
2629                        "syscr: %llu\n"
2630                        "syscw: %llu\n"
2631                        "read_bytes: %llu\n"
2632                        "write_bytes: %llu\n"
2633                        "cancelled_write_bytes: %llu\n",
2634                        (unsigned long long)acct.rchar,
2635                        (unsigned long long)acct.wchar,
2636                        (unsigned long long)acct.syscr,
2637                        (unsigned long long)acct.syscw,
2638                        (unsigned long long)acct.read_bytes,
2639                        (unsigned long long)acct.write_bytes,
2640                        (unsigned long long)acct.cancelled_write_bytes);
2641}
2642
2643static int proc_tid_io_accounting(struct task_struct *task, char *buffer)
2644{
2645        return do_io_accounting(task, buffer, 0);
2646}
2647
2648static int proc_tgid_io_accounting(struct task_struct *task, char *buffer)
2649{
2650        return do_io_accounting(task, buffer, 1);
2651}
2652#endif /* CONFIG_TASK_IO_ACCOUNTING */
2653
2654static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns,
2655                                struct pid *pid, struct task_struct *task)
2656{
2657        seq_printf(m, "%08x\n", task->personality);
2658        return 0;
2659}
2660
2661/*
2662 * Thread groups
2663 */
2664static const struct file_operations proc_task_operations;
2665static const struct inode_operations proc_task_inode_operations;
2666
2667static const struct pid_entry tgid_base_stuff[] = {
2668        DIR("task",       S_IRUGO|S_IXUGO, proc_task_inode_operations, proc_task_operations),
2669        DIR("fd",         S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
2670        DIR("fdinfo",     S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations),
2671#ifdef CONFIG_NET
2672        DIR("net",        S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations),
2673#endif
2674        REG("environ",    S_IRUSR, proc_environ_operations),
2675        INF("auxv",       S_IRUSR, proc_pid_auxv),
2676        ONE("status",     S_IRUGO, proc_pid_status),
2677        ONE("personality", S_IRUSR, proc_pid_personality),
2678        INF("limits",     S_IRUGO, proc_pid_limits),
2679#ifdef CONFIG_SCHED_DEBUG
2680        REG("sched",      S_IRUGO|S_IWUSR, proc_pid_sched_operations),
2681#endif
2682        REG("comm",      S_IRUGO|S_IWUSR, proc_pid_set_comm_operations),
2683#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
2684        INF("syscall",    S_IRUSR, proc_pid_syscall),
2685#endif
2686        INF("cmdline",    S_IRUGO, proc_pid_cmdline),
2687        ONE("stat",       S_IRUGO, proc_tgid_stat),
2688        ONE("statm",      S_IRUGO, proc_pid_statm),
2689        REG("maps",       S_IRUGO, proc_maps_operations),
2690#ifdef CONFIG_NUMA
2691        REG("numa_maps",  S_IRUGO, proc_numa_maps_operations),
2692#endif
2693        REG("mem",        S_IRUSR|S_IWUSR, proc_mem_operations),
2694        LNK("cwd",        proc_cwd_link),
2695        LNK("root",       proc_root_link),
2696        LNK("exe",        proc_exe_link),
2697        REG("mounts",     S_IRUGO, proc_mounts_operations),
2698        REG("mountinfo",  S_IRUGO, proc_mountinfo_operations),
2699        REG("mountstats", S_IRUSR, proc_mountstats_operations),
2700#ifdef CONFIG_PROC_PAGE_MONITOR
2701        REG("clear_refs", S_IWUSR, proc_clear_refs_operations),
2702        REG("smaps",      S_IRUGO, proc_smaps_operations),
2703        REG("pagemap",    S_IRUSR, proc_pagemap_operations),
2704#endif
2705#ifdef CONFIG_SECURITY
2706        DIR("attr",       S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations),
2707#endif
2708#ifdef CONFIG_KALLSYMS
2709        INF("wchan",      S_IRUGO, proc_pid_wchan),
2710#endif
2711#ifdef CONFIG_STACKTRACE
2712        ONE("stack",      S_IRUSR, proc_pid_stack),
2713#endif
2714#ifdef CONFIG_SCHEDSTATS
2715        INF("schedstat",  S_IRUGO, proc_pid_schedstat),
2716#endif
2717#ifdef CONFIG_LATENCYTOP
2718        REG("latency",  S_IRUGO, proc_lstats_operations),
2719#endif
2720#ifdef CONFIG_PROC_PID_CPUSET
2721        REG("cpuset",     S_IRUGO, proc_cpuset_operations),
2722#endif
2723#ifdef CONFIG_CGROUPS
2724        REG("cgroup",  S_IRUGO, proc_cgroup_operations),
2725#endif
2726        INF("oom_score",  S_IRUGO, proc_oom_score),
2727        REG("oom_adj",    S_IRUGO|S_IWUSR, proc_oom_adjust_operations),
2728        REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
2729#ifdef CONFIG_AUDITSYSCALL
2730        REG("loginuid",   S_IWUSR|S_IRUGO, proc_loginuid_operations),
2731        REG("sessionid",  S_IRUGO, proc_sessionid_operations),
2732#endif
2733#ifdef CONFIG_FAULT_INJECTION
2734        REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations),
2735#endif
2736#ifdef CONFIG_ELF_CORE
2737        REG("coredump_filter", S_IRUGO|S_IWUSR, proc_coredump_filter_operations),
2738#endif
2739#ifdef CONFIG_TASK_IO_ACCOUNTING
2740        INF("io",       S_IRUGO, proc_tgid_io_accounting),
2741#endif
2742};
2743
2744static int proc_tgid_base_readdir(struct file * filp,
2745                             void * dirent, filldir_t filldir)
2746{
2747        return proc_pident_readdir(filp,dirent,filldir,
2748                                   tgid_base_stuff,ARRAY_SIZE(tgid_base_stuff));
2749}
2750
2751static const struct file_operations proc_tgid_base_operations = {
2752        .read           = generic_read_dir,
2753        .readdir        = proc_tgid_base_readdir,
2754};
2755
2756static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){
2757        return proc_pident_lookup(dir, dentry,
2758                                  tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff));
2759}
2760
2761static const struct inode_operations proc_tgid_base_inode_operations = {
2762        .lookup         = proc_tgid_base_lookup,
2763        .getattr        = pid_getattr,
2764        .setattr        = proc_setattr,
2765};
2766
2767static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid)
2768{
2769        struct dentry *dentry, *leader, *dir;
2770        char buf[PROC_NUMBUF];
2771        struct qstr name;
2772
2773        name.name = buf;
2774        name.len = snprintf(buf, sizeof(buf), "%d", pid);
2775        dentry = d_hash_and_lookup(mnt->mnt_root, &name);
2776        if (dentry) {
2777                shrink_dcache_parent(dentry);
2778                d_drop(dentry);
2779                dput(dentry);
2780        }
2781
2782        name.name = buf;
2783        name.len = snprintf(buf, sizeof(buf), "%d", tgid);
2784        leader = d_hash_and_lookup(mnt->mnt_root, &name);
2785        if (!leader)
2786                goto out;
2787
2788        name.name = "task";
2789        name.len = strlen(name.name);
2790        dir = d_hash_and_lookup(leader, &name);
2791        if (!dir)
2792                goto out_put_leader;
2793
2794        name.name = buf;
2795        name.len = snprintf(buf, sizeof(buf), "%d", pid);
2796        dentry = d_hash_and_lookup(dir, &name);
2797        if (dentry) {
2798                shrink_dcache_parent(dentry);
2799                d_drop(dentry);
2800                dput(dentry);
2801        }
2802
2803        dput(dir);
2804out_put_leader:
2805        dput(leader);
2806out:
2807        return;
2808}
2809
2810/**
2811 * proc_flush_task -  Remove dcache entries for @task from the /proc dcache.
2812 * @task: task that should be flushed.
2813 *
2814 * When flushing dentries from proc, one needs to flush them from global
2815 * proc (proc_mnt) and from all the namespaces' procs this task was seen
2816 * in. This call is supposed to do all of this job.
2817 *
2818 * Looks in the dcache for
2819 * /proc/@pid
2820 * /proc/@tgid/task/@pid
2821 * if either directory is present flushes it and all of it'ts children
2822 * from the dcache.
2823 *
2824 * It is safe and reasonable to cache /proc entries for a task until
2825 * that task exits.  After that they just clog up the dcache with
2826 * useless entries, possibly causing useful dcache entries to be
2827 * flushed instead.  This routine is proved to flush those useless
2828 * dcache entries at process exit time.
2829 *
2830 * NOTE: This routine is just an optimization so it does not guarantee
2831 *       that no dcache entries will exist at process exit time it
2832 *       just makes it very unlikely that any will persist.
2833 */
2834
2835void proc_flush_task(struct task_struct *task)
2836{
2837        int i;
2838        struct pid *pid, *tgid;
2839        struct upid *upid;
2840
2841        pid = task_pid(task);
2842        tgid = task_tgid(task);
2843
2844        for (i = 0; i <= pid->level; i++) {
2845                upid = &pid->numbers[i];
2846                proc_flush_task_mnt(upid->ns->proc_mnt, upid->nr,
2847                                        tgid->numbers[i].nr);
2848        }
2849
2850        upid = &pid->numbers[pid->level];
2851        if (upid->nr == 1)
2852                pid_ns_release_proc(upid->ns);
2853}
2854
2855static struct dentry *proc_pid_instantiate(struct inode *dir,
2856                                           struct dentry * dentry,
2857                                           struct task_struct *task, const void *ptr)
2858{
2859        struct dentry *error = ERR_PTR(-ENOENT);
2860        struct inode *inode;
2861
2862        inode = proc_pid_make_inode(dir->i_sb, task);
2863        if (!inode)
2864                goto out;
2865
2866        inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO;
2867        inode->i_op = &proc_tgid_base_inode_operations;
2868        inode->i_fop = &proc_tgid_base_operations;
2869        inode->i_flags|=S_IMMUTABLE;
2870
2871        inode->i_nlink = 2 + pid_entry_count_dirs(tgid_base_stuff,
2872                ARRAY_SIZE(tgid_base_stuff));
2873
2874        dentry->d_op = &pid_dentry_operations;
2875
2876        d_add(dentry, inode);
2877        /* Close the race of the process dying before we return the dentry */
2878        if (pid_revalidate(dentry, NULL))
2879                error = NULL;
2880out:
2881        return error;
2882}
2883
2884struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd)
2885{
2886        struct dentry *result;
2887        struct task_struct *task;
2888        unsigned tgid;
2889        struct pid_namespace *ns;
2890
2891        result = proc_base_lookup(dir, dentry);
2892        if (!IS_ERR(result) || PTR_ERR(result) != -ENOENT)
2893                goto out;
2894
2895        tgid = name_to_int(dentry);
2896        if (tgid == ~0U)
2897                goto out;
2898
2899        ns = dentry->d_sb->s_fs_info;
2900        rcu_read_lock();
2901        task = find_task_by_pid_ns(tgid, ns);
2902        if (task)
2903                get_task_struct(task);
2904        rcu_read_unlock();
2905        if (!task)
2906                goto out;
2907
2908        result = proc_pid_instantiate(dir, dentry, task, NULL);
2909        put_task_struct(task);
2910out:
2911        return result;
2912}
2913
2914/*
2915 * Find the first task with tgid >= tgid
2916 *
2917 */
2918struct tgid_iter {
2919        unsigned int tgid;
2920        struct task_struct *task;
2921};
2922static struct tgid_iter next_tgid(struct pid_namespace *ns, struct tgid_iter iter)
2923{
2924        struct pid *pid;
2925
2926        if (iter.task)
2927                put_task_struct(iter.task);
2928        rcu_read_lock();
2929retry:
2930        iter.task = NULL;
2931        pid = find_ge_pid(iter.tgid, ns);
2932        if (pid) {
2933                iter.tgid = pid_nr_ns(pid, ns);
2934                iter.task = pid_task(pid, PIDTYPE_PID);
2935                /* What we to know is if the pid we have find is the
2936                 * pid of a thread_group_leader.  Testing for task
2937                 * being a thread_group_leader is the obvious thing
2938                 * todo but there is a window when it fails, due to
2939                 * the pid transfer logic in de_thread.
2940                 *
2941                 * So we perform the straight forward test of seeing
2942                 * if the pid we have found is the pid of a thread
2943                 * group leader, and don't worry if the task we have
2944                 * found doesn't happen to be a thread group leader.
2945                 * As we don't care in the case of readdir.
2946                 */
2947                if (!iter.task || !has_group_leader_pid(iter.task)) {
2948                        iter.tgid += 1;
2949                        goto retry;
2950                }
2951                get_task_struct(iter.task);
2952        }
2953        rcu_read_unlock();
2954        return iter;
2955}
2956
2957#define TGID_OFFSET (FIRST_PROCESS_ENTRY + ARRAY_SIZE(proc_base_stuff))
2958
2959static int proc_pid_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
2960        struct tgid_iter iter)
2961{
2962        char name[PROC_NUMBUF];
2963        int len = snprintf(name, sizeof(name), "%d", iter.tgid);
2964        return proc_fill_cache(filp, dirent, filldir, name, len,
2965                                proc_pid_instantiate, iter.task, NULL);
2966}
2967
2968/* for the /proc/ directory itself, after non-process stuff has been done */
2969int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
2970{
2971        unsigned int nr = filp->f_pos - FIRST_PROCESS_ENTRY;
2972        struct task_struct *reaper = get_proc_task(filp->f_path.dentry->d_inode);
2973        struct tgid_iter iter;
2974        struct pid_namespace *ns;
2975
2976        if (!reaper)
2977                goto out_no_task;
2978
2979        for (; nr < ARRAY_SIZE(proc_base_stuff); filp->f_pos++, nr++) {
2980                const struct pid_entry *p = &proc_base_stuff[nr];
2981                if (proc_base_fill_cache(filp, dirent, filldir, reaper, p) < 0)
2982                        goto out;
2983        }
2984
2985        ns = filp->f_dentry->d_sb->s_fs_info;
2986        iter.task = NULL;
2987        iter.tgid = filp->f_pos - TGID_OFFSET;
2988        for (iter = next_tgid(ns, iter);
2989             iter.task;
2990             iter.tgid += 1, iter = next_tgid(ns, iter)) {
2991                filp->f_pos = iter.tgid + TGID_OFFSET;
2992                if (proc_pid_fill_cache(filp, dirent, filldir, iter) < 0) {
2993                        put_task_struct(iter.task);
2994                        goto out;
2995                }
2996        }
2997        filp->f_pos = PID_MAX_LIMIT + TGID_OFFSET;
2998out:
2999        put_task_struct(reaper);
3000out_no_task:
3001        return 0;
3002}
3003
3004/*
3005 * Tasks
3006 */
3007static const struct pid_entry tid_base_stuff[] = {
3008        DIR("fd",        S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
3009        DIR("fdinfo",    S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations),
3010        REG("environ",   S_IRUSR, proc_environ_operations),
3011        INF("auxv",      S_IRUSR, proc_pid_auxv),
3012        ONE("status",    S_IRUGO, proc_pid_status),
3013        ONE("personality", S_IRUSR, proc_pid_personality),
3014        INF("limits",    S_IRUGO, proc_pid_limits),
3015#ifdef CONFIG_SCHED_DEBUG
3016        REG("sched",     S_IRUGO|S_IWUSR, proc_pid_sched_operations),
3017#endif
3018        REG("comm",      S_IRUGO|S_IWUSR, proc_pid_set_comm_operations),
3019#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
3020        INF("syscall",   S_IRUSR, proc_pid_syscall),
3021#endif
3022        INF("cmdline",   S_IRUGO, proc_pid_cmdline),
3023        ONE("stat",      S_IRUGO, proc_tid_stat),
3024        ONE("statm",     S_IRUGO, proc_pid_statm),
3025        REG("maps",      S_IRUGO, proc_maps_operations),
3026#ifdef CONFIG_NUMA
3027        REG("numa_maps", S_IRUGO, proc_numa_maps_operations),
3028#endif
3029        REG("mem",       S_IRUSR|S_IWUSR, proc_mem_operations),
3030        LNK("cwd",       proc_cwd_link),
3031        LNK("root",      proc_root_link),
3032        LNK("exe",       proc_exe_link),
3033        REG("mounts",    S_IRUGO, proc_mounts_operations),
3034        REG("mountinfo",  S_IRUGO, proc_mountinfo_operations),
3035#ifdef CONFIG_PROC_PAGE_MONITOR
3036        REG("clear_refs", S_IWUSR, proc_clear_refs_operations),
3037        REG("smaps",     S_IRUGO, proc_smaps_operations),
3038        REG("pagemap",    S_IRUSR, proc_pagemap_operations),
3039#endif
3040#ifdef CONFIG_SECURITY
3041        DIR("attr",      S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations),
3042#endif
3043#ifdef CONFIG_KALLSYMS
3044        INF("wchan",     S_IRUGO, proc_pid_wchan),
3045#endif
3046#ifdef CONFIG_STACKTRACE
3047        ONE("stack",      S_IRUSR, proc_pid_stack),
3048#endif
3049#ifdef CONFIG_SCHEDSTATS
3050        INF("schedstat", S_IRUGO, proc_pid_schedstat),
3051#endif
3052#ifdef CONFIG_LATENCYTOP
3053        REG("latency",  S_IRUGO, proc_lstats_operations),
3054#endif
3055#ifdef CONFIG_PROC_PID_CPUSET
3056        REG("cpuset",    S_IRUGO, proc_cpuset_operations),
3057#endif
3058#ifdef CONFIG_CGROUPS
3059        REG("cgroup",  S_IRUGO, proc_cgroup_operations),
3060#endif
3061        INF("oom_score", S_IRUGO, proc_oom_score),
3062        REG("oom_adj",   S_IRUGO|S_IWUSR, proc_oom_adjust_operations),
3063        REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
3064#ifdef CONFIG_AUDITSYSCALL
3065        REG("loginuid",  S_IWUSR|S_IRUGO, proc_loginuid_operations),
3066        REG("sessionid",  S_IRUSR, proc_sessionid_operations),
3067#endif
3068#ifdef CONFIG_FAULT_INJECTION
3069        REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations),
3070#endif
3071#ifdef CONFIG_TASK_IO_ACCOUNTING
3072        INF("io",       S_IRUGO, proc_tid_io_accounting),
3073#endif
3074};
3075
3076static int proc_tid_base_readdir(struct file * filp,
3077                             void * dirent, filldir_t filldir)
3078{
3079        return proc_pident_readdir(filp,dirent,filldir,
3080                                   tid_base_stuff,ARRAY_SIZE(tid_base_stuff));
3081}
3082
3083static struct dentry *proc_tid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){
3084        return proc_pident_lookup(dir, dentry,
3085                                  tid_base_stuff, ARRAY_SIZE(tid_base_stuff));
3086}
3087
3088static const struct file_operations proc_tid_base_operations = {
3089        .read           = generic_read_dir,
3090        .readdir        = proc_tid_base_readdir,
3091};
3092
3093static const struct inode_operations proc_tid_base_inode_operations = {
3094        .lookup         = proc_tid_base_lookup,
3095        .getattr        = pid_getattr,
3096        .setattr        = proc_setattr,
3097};
3098
3099static struct dentry *proc_task_instantiate(struct inode *dir,
3100        struct dentry *dentry, struct task_struct *task, const void *ptr)
3101{
3102        struct dentry *error = ERR_PTR(-ENOENT);
3103        struct inode *inode;
3104        inode = proc_pid_make_inode(dir->i_sb, task);
3105
3106        if (!inode)
3107                goto out;
3108        inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO;
3109        inode->i_op = &proc_tid_base_inode_operations;
3110        inode->i_fop = &proc_tid_base_operations;
3111        inode->i_flags|=S_IMMUTABLE;
3112
3113        inode->i_nlink = 2 + pid_entry_count_dirs(tid_base_stuff,
3114                ARRAY_SIZE(tid_base_stuff));
3115
3116        dentry->d_op = &pid_dentry_operations;
3117
3118        d_add(dentry, inode);
3119        /* Close the race of the process dying before we return the dentry */
3120        if (pid_revalidate(dentry, NULL))
3121                error = NULL;
3122out:
3123        return error;
3124}
3125
3126static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd)
3127{
3128        struct dentry *result = ERR_PTR(-ENOENT);
3129        struct task_struct *task;
3130        struct task_struct *leader = get_proc_task(dir);
3131        unsigned tid;
3132        struct pid_namespace *ns;
3133
3134        if (!leader)
3135                goto out_no_task;
3136
3137        tid = name_to_int(dentry);
3138        if (tid == ~0U)
3139                goto out;
3140
3141        ns = dentry->d_sb->s_fs_info;
3142        rcu_read_lock();
3143        task = find_task_by_pid_ns(tid, ns);
3144        if (task)
3145                get_task_struct(task);
3146        rcu_read_unlock();
3147        if (!task)
3148                goto out;
3149        if (!same_thread_group(leader, task))
3150                goto out_drop_task;
3151
3152        result = proc_task_instantiate(dir, dentry, task, NULL);
3153out_drop_task:
3154        put_task_struct(task);
3155out:
3156        put_task_struct(leader);
3157out_no_task:
3158        return result;
3159}
3160
3161/*
3162 * Find the first tid of a thread group to return to user space.
3163 *
3164 * Usually this is just the thread group leader, but if the users
3165 * buffer was too small or there was a seek into the middle of the
3166 * directory we have more work todo.
3167 *
3168 * In the case of a short read we start with find_task_by_pid.
3169 *
3170 * In the case of a seek we start with the leader and walk nr
3171 * threads past it.
3172 */
3173static struct task_struct *first_tid(struct task_struct *leader,
3174                int tid, int nr, struct pid_namespace *ns)
3175{
3176        struct task_struct *pos;
3177
3178        rcu_read_lock();
3179        /* Attempt to start with the pid of a thread */
3180        if (tid && (nr > 0)) {
3181                pos = find_task_by_pid_ns(tid, ns);
3182                if (pos && (pos->group_leader == leader))
3183                        goto found;
3184        }
3185
3186        /* If nr exceeds the number of threads there is nothing todo */
3187        pos = NULL;
3188        if (nr && nr >= get_nr_threads(leader))
3189                goto out;
3190
3191        /* If we haven't found our starting place yet start
3192         * with the leader and walk nr threads forward.
3193         */
3194        for (pos = leader; nr > 0; --nr) {
3195                pos = next_thread(pos);
3196                if (pos == leader) {
3197                        pos = NULL;
3198                        goto out;
3199                }
3200        }
3201found:
3202        get_task_struct(pos);
3203out:
3204        rcu_read_unlock();
3205        return pos;
3206}
3207
3208/*
3209 * Find the next thread in the thread list.
3210 * Return NULL if there is an error or no next thread.
3211 *
3212 * The reference to the input task_struct is released.
3213 */
3214static struct task_struct *next_tid(struct task_struct *start)
3215{
3216        struct task_struct *pos = NULL;
3217        rcu_read_lock();
3218        if (pid_alive(start)) {
3219                pos = next_thread(start);
3220                if (thread_group_leader(pos))
3221                        pos = NULL;
3222                else
3223                        get_task_struct(pos);
3224        }
3225        rcu_read_unlock();
3226        put_task_struct(start);
3227        return pos;
3228}
3229
3230static int proc_task_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
3231        struct task_struct *task, int tid)
3232{
3233        char name[PROC_NUMBUF];
3234        int len = snprintf(name, sizeof(name), "%d", tid);
3235        return proc_fill_cache(filp, dirent, filldir, name, len,
3236                                proc_task_instantiate, task, NULL);
3237}
3238
3239/* for the /proc/TGID/task/ directories */
3240static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldir)
3241{
3242        struct dentry *dentry = filp->f_path.dentry;
3243        struct inode *inode = dentry->d_inode;
3244        struct task_struct *leader = NULL;
3245        struct task_struct *task;
3246        int retval = -ENOENT;
3247        ino_t ino;
3248        int tid;
3249        struct pid_namespace *ns;
3250
3251        task = get_proc_task(inode);
3252        if (!task)
3253                goto out_no_task;
3254        rcu_read_lock();
3255        if (pid_alive(task)) {
3256                leader = task->group_leader;
3257                get_task_struct(leader);
3258        }
3259        rcu_read_unlock();
3260        put_task_struct(task);
3261        if (!leader)
3262                goto out_no_task;
3263        retval = 0;
3264
3265        switch ((unsigned long)filp->f_pos) {
3266        case 0:
3267                ino = inode->i_ino;
3268                if (filldir(dirent, ".", 1, filp->f_pos, ino, DT_DIR) < 0)
3269                        goto out;
3270                filp->f_pos++;
3271                /* fall through */
3272        case 1:
3273                ino = parent_ino(dentry);
3274                if (filldir(dirent, "..", 2, filp->f_pos, ino, DT_DIR) < 0)
3275                        goto out;
3276                filp->f_pos++;
3277                /* fall through */
3278        }
3279
3280        /* f_version caches the tgid value that the last readdir call couldn't
3281         * return. lseek aka telldir automagically resets f_version to 0.
3282         */
3283        ns = filp->f_dentry->d_sb->s_fs_info;
3284        tid = (int)filp->f_version;
3285        filp->f_version = 0;
3286        for (task = first_tid(leader, tid, filp->f_pos - 2, ns);
3287             task;
3288             task = next_tid(task), filp->f_pos++) {
3289                tid = task_pid_nr_ns(task, ns);
3290                if (proc_task_fill_cache(filp, dirent, filldir, task, tid) < 0) {
3291                        /* returning this tgid failed, save it as the first
3292                         * pid for the next readir call */
3293                        filp->f_version = (u64)tid;
3294                        put_task_struct(task);
3295                        break;
3296                }
3297        }
3298out:
3299        put_task_struct(leader);
3300out_no_task:
3301        return retval;
3302}
3303
3304static int proc_task_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
3305{
3306        struct inode *inode = dentry->d_inode;
3307        struct task_struct *p = get_proc_task(inode);
3308        generic_fillattr(inode, stat);
3309
3310        if (p) {
3311                stat->nlink += get_nr_threads(p);
3312                put_task_struct(p);
3313        }
3314
3315        return 0;
3316}
3317
3318static const struct inode_operations proc_task_inode_operations = {
3319        .lookup         = proc_task_lookup,
3320        .getattr        = proc_task_getattr,
3321        .setattr        = proc_setattr,
3322};
3323
3324static const struct file_operations proc_task_operations = {
3325        .read           = generic_read_dir,
3326        .readdir        = proc_task_readdir,
3327};
3328
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.