linux/fs/proc/base.c
<<
>>
Prefs
   1/*
   2 *  linux/fs/proc/base.c
   3 *
   4 *  Copyright (C) 1991, 1992 Linus Torvalds
   5 *
   6 *  proc base directory handling functions
   7 *
   8 *  1999, Al Viro. Rewritten. Now it covers the whole per-process part.
   9 *  Instead of using magical inumbers to determine the kind of object
  10 *  we allocate and fill in-core inodes upon lookup. They don't even
  11 *  go into icache. We cache the reference to task_struct upon lookup too.
  12 *  Eventually it should become a filesystem in its own. We don't use the
  13 *  rest of procfs anymore.
  14 *
  15 *
  16 *  Changelog:
  17 *  17-Jan-2005
  18 *  Allan Bezerra
  19 *  Bruna Moreira <bruna.moreira@indt.org.br>
  20 *  Edjard Mota <edjard.mota@indt.org.br>
  21 *  Ilias Biris <ilias.biris@indt.org.br>
  22 *  Mauricio Lin <mauricio.lin@indt.org.br>
  23 *
  24 *  Embedded Linux Lab - 10LE Instituto Nokia de Tecnologia - INdT
  25 *
  26 *  A new process specific entry (smaps) included in /proc. It shows the
  27 *  size of rss for each memory area. The maps entry lacks information
  28 *  about physical memory size (rss) for each mapped file, i.e.,
  29 *  rss information for executables and library files.
  30 *  This additional information is useful for any tools that need to know
  31 *  about physical memory consumption for a process specific library.
  32 *
  33 *  Changelog:
  34 *  21-Feb-2005
  35 *  Embedded Linux Lab - 10LE Instituto Nokia de Tecnologia - INdT
  36 *  Pud inclusion in the page table walking.
  37 *
  38 *  ChangeLog:
  39 *  10-Mar-2005
  40 *  10LE Instituto Nokia de Tecnologia - INdT:
  41 *  A better way to walks through the page table as suggested by Hugh Dickins.
  42 *
  43 *  Simo Piiroinen <simo.piiroinen@nokia.com>:
  44 *  Smaps information related to shared, private, clean and dirty pages.
  45 *
  46 *  Paul Mundt <paul.mundt@nokia.com>:
  47 *  Overall revision about smaps.
  48 */
  49
  50#include <asm/uaccess.h>
  51
  52#include <linux/errno.h>
  53#include <linux/time.h>
  54#include <linux/proc_fs.h>
  55#include <linux/stat.h>
  56#include <linux/task_io_accounting_ops.h>
  57#include <linux/init.h>
  58#include <linux/capability.h>
  59#include <linux/file.h>
  60#include <linux/fdtable.h>
  61#include <linux/string.h>
  62#include <linux/seq_file.h>
  63#include <linux/namei.h>
  64#include <linux/mnt_namespace.h>
  65#include <linux/mm.h>
  66#include <linux/rcupdate.h>
  67#include <linux/kallsyms.h>
  68#include <linux/resource.h>
  69#include <linux/module.h>
  70#include <linux/mount.h>
  71#include <linux/security.h>
  72#include <linux/ptrace.h>
  73#include <linux/tracehook.h>
  74#include <linux/cgroup.h>
  75#include <linux/cpuset.h>
  76#include <linux/audit.h>
  77#include <linux/poll.h>
  78#include <linux/nsproxy.h>
  79#include <linux/oom.h>
  80#include <linux/elf.h>
  81#include <linux/pid_namespace.h>
  82#include "internal.h"
  83
  84/* NOTE:
  85 *      Implementing inode permission operations in /proc is almost
  86 *      certainly an error.  Permission checks need to happen during
  87 *      each system call not at open time.  The reason is that most of
  88 *      what we wish to check for permissions in /proc varies at runtime.
  89 *
  90 *      The classic example of a problem is opening file descriptors
  91 *      in /proc for a task before it execs a suid executable.
  92 */
  93
  94struct pid_entry {
  95        char *name;
  96        int len;
  97        mode_t mode;
  98        const struct inode_operations *iop;
  99        const struct file_operations *fop;
 100        union proc_op op;
 101};
 102
 103#define NOD(NAME, MODE, IOP, FOP, OP) {                 \
 104        .name = (NAME),                                 \
 105        .len  = sizeof(NAME) - 1,                       \
 106        .mode = MODE,                                   \
 107        .iop  = IOP,                                    \
 108        .fop  = FOP,                                    \
 109        .op   = OP,                                     \
 110}
 111
 112#define DIR(NAME, MODE, OTYPE)                                                  \
 113        NOD(NAME, (S_IFDIR|(MODE)),                                             \
 114                &proc_##OTYPE##_inode_operations, &proc_##OTYPE##_operations,   \
 115                {} )
 116#define LNK(NAME, OTYPE)                                        \
 117        NOD(NAME, (S_IFLNK|S_IRWXUGO),                          \
 118                &proc_pid_link_inode_operations, NULL,          \
 119                { .proc_get_link = &proc_##OTYPE##_link } )
 120#define REG(NAME, MODE, OTYPE)                          \
 121        NOD(NAME, (S_IFREG|(MODE)), NULL,               \
 122                &proc_##OTYPE##_operations, {})
 123#define INF(NAME, MODE, OTYPE)                          \
 124        NOD(NAME, (S_IFREG|(MODE)),                     \
 125                NULL, &proc_info_file_operations,       \
 126                { .proc_read = &proc_##OTYPE } )
 127#define ONE(NAME, MODE, OTYPE)                          \
 128        NOD(NAME, (S_IFREG|(MODE)),                     \
 129                NULL, &proc_single_file_operations,     \
 130                { .proc_show = &proc_##OTYPE } )
 131
 132/*
 133 * Count the number of hardlinks for the pid_entry table, excluding the .
 134 * and .. links.
 135 */
 136static unsigned int pid_entry_count_dirs(const struct pid_entry *entries,
 137        unsigned int n)
 138{
 139        unsigned int i;
 140        unsigned int count;
 141
 142        count = 0;
 143        for (i = 0; i < n; ++i) {
 144                if (S_ISDIR(entries[i].mode))
 145                        ++count;
 146        }
 147
 148        return count;
 149}
 150
 151static struct fs_struct *get_fs_struct(struct task_struct *task)
 152{
 153        struct fs_struct *fs;
 154        task_lock(task);
 155        fs = task->fs;
 156        if(fs)
 157                atomic_inc(&fs->count);
 158        task_unlock(task);
 159        return fs;
 160}
 161
 162static int get_nr_threads(struct task_struct *tsk)
 163{
 164        unsigned long flags;
 165        int count = 0;
 166
 167        if (lock_task_sighand(tsk, &flags)) {
 168                count = atomic_read(&tsk->signal->count);
 169                unlock_task_sighand(tsk, &flags);
 170        }
 171        return count;
 172}
 173
 174static int proc_cwd_link(struct inode *inode, struct path *path)
 175{
 176        struct task_struct *task = get_proc_task(inode);
 177        struct fs_struct *fs = NULL;
 178        int result = -ENOENT;
 179
 180        if (task) {
 181                fs = get_fs_struct(task);
 182                put_task_struct(task);
 183        }
 184        if (fs) {
 185                read_lock(&fs->lock);
 186                *path = fs->pwd;
 187                path_get(&fs->pwd);
 188                read_unlock(&fs->lock);
 189                result = 0;
 190                put_fs_struct(fs);
 191        }
 192        return result;
 193}
 194
 195static int proc_root_link(struct inode *inode, struct path *path)
 196{
 197        struct task_struct *task = get_proc_task(inode);
 198        struct fs_struct *fs = NULL;
 199        int result = -ENOENT;
 200
 201        if (task) {
 202                fs = get_fs_struct(task);
 203                put_task_struct(task);
 204        }
 205        if (fs) {
 206                read_lock(&fs->lock);
 207                *path = fs->root;
 208                path_get(&fs->root);
 209                read_unlock(&fs->lock);
 210                result = 0;
 211                put_fs_struct(fs);
 212        }
 213        return result;
 214}
 215
 216/*
 217 * Return zero if current may access user memory in @task, -error if not.
 218 */
 219static int check_mem_permission(struct task_struct *task)
 220{
 221        /*
 222         * A task can always look at itself, in case it chooses
 223         * to use system calls instead of load instructions.
 224         */
 225        if (task == current)
 226                return 0;
 227
 228        /*
 229         * If current is actively ptrace'ing, and would also be
 230         * permitted to freshly attach with ptrace now, permit it.
 231         */
 232        if (task_is_stopped_or_traced(task)) {
 233                int match;
 234                rcu_read_lock();
 235                match = (tracehook_tracer_task(task) == current);
 236                rcu_read_unlock();
 237                if (match && ptrace_may_access(task, PTRACE_MODE_ATTACH))
 238                        return 0;
 239        }
 240
 241        /*
 242         * Noone else is allowed.
 243         */
 244        return -EPERM;
 245}
 246
 247struct mm_struct *mm_for_maps(struct task_struct *task)
 248{
 249        struct mm_struct *mm = get_task_mm(task);
 250        if (!mm)
 251                return NULL;
 252        down_read(&mm->mmap_sem);
 253        task_lock(task);
 254        if (task->mm != mm)
 255                goto out;
 256        if (task->mm != current->mm &&
 257            __ptrace_may_access(task, PTRACE_MODE_READ) < 0)
 258                goto out;
 259        task_unlock(task);
 260        return mm;
 261out:
 262        task_unlock(task);
 263        up_read(&mm->mmap_sem);
 264        mmput(mm);
 265        return NULL;
 266}
 267
 268static int proc_pid_cmdline(struct task_struct *task, char * buffer)
 269{
 270        int res = 0;
 271        unsigned int len;
 272        struct mm_struct *mm = get_task_mm(task);
 273        if (!mm)
 274                goto out;
 275        if (!mm->arg_end)
 276                goto out_mm;    /* Shh! No looking before we're done */
 277
 278        len = mm->arg_end - mm->arg_start;
 279 
 280        if (len > PAGE_SIZE)
 281                len = PAGE_SIZE;
 282 
 283        res = access_process_vm(task, mm->arg_start, buffer, len, 0);
 284
 285        // If the nul at the end of args has been overwritten, then
 286        // assume application is using setproctitle(3).
 287        if (res > 0 && buffer[res-1] != '\0' && len < PAGE_SIZE) {
 288                len = strnlen(buffer, res);
 289                if (len < res) {
 290                    res = len;
 291                } else {
 292                        len = mm->env_end - mm->env_start;
 293                        if (len > PAGE_SIZE - res)
 294                                len = PAGE_SIZE - res;
 295                        res += access_process_vm(task, mm->env_start, buffer+res, len, 0);
 296                        res = strnlen(buffer, res);
 297                }
 298        }
 299out_mm:
 300        mmput(mm);
 301out:
 302        return res;
 303}
 304
 305static int proc_pid_auxv(struct task_struct *task, char *buffer)
 306{
 307        int res = 0;
 308        struct mm_struct *mm = get_task_mm(task);
 309        if (mm) {
 310                unsigned int nwords = 0;
 311                do
 312                        nwords += 2;
 313                while (mm->saved_auxv[nwords - 2] != 0); /* AT_NULL */
 314                res = nwords * sizeof(mm->saved_auxv[0]);
 315                if (res > PAGE_SIZE)
 316                        res = PAGE_SIZE;
 317                memcpy(buffer, mm->saved_auxv, res);
 318                mmput(mm);
 319        }
 320        return res;
 321}
 322
 323
 324#ifdef CONFIG_KALLSYMS
 325/*
 326 * Provides a wchan file via kallsyms in a proper one-value-per-file format.
 327 * Returns the resolved symbol.  If that fails, simply return the address.
 328 */
 329static int proc_pid_wchan(struct task_struct *task, char *buffer)
 330{
 331        unsigned long wchan;
 332        char symname[KSYM_NAME_LEN];
 333
 334        wchan = get_wchan(task);
 335
 336        if (lookup_symbol_name(wchan, symname) < 0)
 337                return sprintf(buffer, "%lu", wchan);
 338        else
 339                return sprintf(buffer, "%s", symname);
 340}
 341#endif /* CONFIG_KALLSYMS */
 342
 343#ifdef CONFIG_SCHEDSTATS
 344/*
 345 * Provides /proc/PID/schedstat
 346 */
 347static int proc_pid_schedstat(struct task_struct *task, char *buffer)
 348{
 349        return sprintf(buffer, "%llu %llu %lu\n",
 350                        task->sched_info.cpu_time,
 351                        task->sched_info.run_delay,
 352                        task->sched_info.pcount);
 353}
 354#endif
 355
 356#ifdef CONFIG_LATENCYTOP
 357static int lstats_show_proc(struct seq_file *m, void *v)
 358{
 359        int i;
 360        struct inode *inode = m->private;
 361        struct task_struct *task = get_proc_task(inode);
 362
 363        if (!task)
 364                return -ESRCH;
 365        seq_puts(m, "Latency Top version : v0.1\n");
 366        for (i = 0; i < 32; i++) {
 367                if (task->latency_record[i].backtrace[0]) {
 368                        int q;
 369                        seq_printf(m, "%i %li %li ",
 370                                task->latency_record[i].count,
 371                                task->latency_record[i].time,
 372                                task->latency_record[i].max);
 373                        for (q = 0; q < LT_BACKTRACEDEPTH; q++) {
 374                                char sym[KSYM_SYMBOL_LEN];
 375                                char *c;
 376                                if (!task->latency_record[i].backtrace[q])
 377                                        break;
 378                                if (task->latency_record[i].backtrace[q] == ULONG_MAX)
 379                                        break;
 380                                sprint_symbol(sym, task->latency_record[i].backtrace[q]);
 381                                c = strchr(sym, '+');
 382                                if (c)
 383                                        *c = 0;
 384                                seq_printf(m, "%s ", sym);
 385                        }
 386                        seq_printf(m, "\n");
 387                }
 388
 389        }
 390        put_task_struct(task);
 391        return 0;
 392}
 393
 394static int lstats_open(struct inode *inode, struct file *file)
 395{
 396        return single_open(file, lstats_show_proc, inode);
 397}
 398
 399static ssize_t lstats_write(struct file *file, const char __user *buf,
 400                            size_t count, loff_t *offs)
 401{
 402        struct task_struct *task = get_proc_task(file->f_dentry->d_inode);
 403
 404        if (!task)
 405                return -ESRCH;
 406        clear_all_latency_tracing(task);
 407        put_task_struct(task);
 408
 409        return count;
 410}
 411
 412static const struct file_operations proc_lstats_operations = {
 413        .open           = lstats_open,
 414        .read           = seq_read,
 415        .write          = lstats_write,
 416        .llseek         = seq_lseek,
 417        .release        = single_release,
 418};
 419
 420#endif
 421
 422/* The badness from the OOM killer */
 423unsigned long badness(struct task_struct *p, unsigned long uptime);
 424static int proc_oom_score(struct task_struct *task, char *buffer)
 425{
 426        unsigned long points;
 427        struct timespec uptime;
 428
 429        do_posix_clock_monotonic_gettime(&uptime);
 430        read_lock(&tasklist_lock);
 431        points = badness(task, uptime.tv_sec);
 432        read_unlock(&tasklist_lock);
 433        return sprintf(buffer, "%lu\n", points);
 434}
 435
 436struct limit_names {
 437        char *name;
 438        char *unit;
 439};
 440
 441static const struct limit_names lnames[RLIM_NLIMITS] = {
 442        [RLIMIT_CPU] = {"Max cpu time", "ms"},
 443        [RLIMIT_FSIZE] = {"Max file size", "bytes"},
 444        [RLIMIT_DATA] = {"Max data size", "bytes"},
 445        [RLIMIT_STACK] = {"Max stack size", "bytes"},
 446        [RLIMIT_CORE] = {"Max core file size", "bytes"},
 447        [RLIMIT_RSS] = {"Max resident set", "bytes"},
 448        [RLIMIT_NPROC] = {"Max processes", "processes"},
 449        [RLIMIT_NOFILE] = {"Max open files", "files"},
 450        [RLIMIT_MEMLOCK] = {"Max locked memory", "bytes"},
 451        [RLIMIT_AS] = {"Max address space", "bytes"},
 452        [RLIMIT_LOCKS] = {"Max file locks", "locks"},
 453        [RLIMIT_SIGPENDING] = {"Max pending signals", "signals"},
 454        [RLIMIT_MSGQUEUE] = {"Max msgqueue size", "bytes"},
 455        [RLIMIT_NICE] = {"Max nice priority", NULL},
 456        [RLIMIT_RTPRIO] = {"Max realtime priority", NULL},
 457        [RLIMIT_RTTIME] = {"Max realtime timeout", "us"},
 458};
 459
 460/* Display limits for a process */
 461static int proc_pid_limits(struct task_struct *task, char *buffer)
 462{
 463        unsigned int i;
 464        int count = 0;
 465        unsigned long flags;
 466        char *bufptr = buffer;
 467
 468        struct rlimit rlim[RLIM_NLIMITS];
 469
 470        if (!lock_task_sighand(task, &flags))
 471                return 0;
 472        memcpy(rlim, task->signal->rlim, sizeof(struct rlimit) * RLIM_NLIMITS);
 473        unlock_task_sighand(task, &flags);
 474
 475        /*
 476         * print the file header
 477         */
 478        count += sprintf(&bufptr[count], "%-25s %-20s %-20s %-10s\n",
 479                        "Limit", "Soft Limit", "Hard Limit", "Units");
 480
 481        for (i = 0; i < RLIM_NLIMITS; i++) {
 482                if (rlim[i].rlim_cur == RLIM_INFINITY)
 483                        count += sprintf(&bufptr[count], "%-25s %-20s ",
 484                                         lnames[i].name, "unlimited");
 485                else
 486                        count += sprintf(&bufptr[count], "%-25s %-20lu ",
 487                                         lnames[i].name, rlim[i].rlim_cur);
 488
 489                if (rlim[i].rlim_max == RLIM_INFINITY)
 490                        count += sprintf(&bufptr[count], "%-20s ", "unlimited");
 491                else
 492                        count += sprintf(&bufptr[count], "%-20lu ",
 493                                         rlim[i].rlim_max);
 494
 495                if (lnames[i].unit)
 496                        count += sprintf(&bufptr[count], "%-10s\n",
 497                                         lnames[i].unit);
 498                else
 499                        count += sprintf(&bufptr[count], "\n");
 500        }
 501
 502        return count;
 503}
 504
 505#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
 506static int proc_pid_syscall(struct task_struct *task, char *buffer)
 507{
 508        long nr;
 509        unsigned long args[6], sp, pc;
 510
 511        if (task_current_syscall(task, &nr, args, 6, &sp, &pc))
 512                return sprintf(buffer, "running\n");
 513
 514        if (nr < 0)
 515                return sprintf(buffer, "%ld 0x%lx 0x%lx\n", nr, sp, pc);
 516
 517        return sprintf(buffer,
 518                       "%ld 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n",
 519                       nr,
 520                       args[0], args[1], args[2], args[3], args[4], args[5],
 521                       sp, pc);
 522}
 523#endif /* CONFIG_HAVE_ARCH_TRACEHOOK */
 524
 525/************************************************************************/
 526/*                       Here the fs part begins                        */
 527/************************************************************************/
 528
 529/* permission checks */
 530static int proc_fd_access_allowed(struct inode *inode)
 531{
 532        struct task_struct *task;
 533        int allowed = 0;
 534        /* Allow access to a task's file descriptors if it is us or we
 535         * may use ptrace attach to the process and find out that
 536         * information.
 537         */
 538        task = get_proc_task(inode);
 539        if (task) {
 540                allowed = ptrace_may_access(task, PTRACE_MODE_READ);
 541                put_task_struct(task);
 542        }
 543        return allowed;
 544}
 545
 546static int proc_setattr(struct dentry *dentry, struct iattr *attr)
 547{
 548        int error;
 549        struct inode *inode = dentry->d_inode;
 550
 551        if (attr->ia_valid & ATTR_MODE)
 552                return -EPERM;
 553
 554        error = inode_change_ok(inode, attr);
 555        if (!error)
 556                error = inode_setattr(inode, attr);
 557        return error;
 558}
 559
 560static const struct inode_operations proc_def_inode_operations = {
 561        .setattr        = proc_setattr,
 562};
 563
 564static int mounts_open_common(struct inode *inode, struct file *file,
 565                              const struct seq_operations *op)
 566{
 567        struct task_struct *task = get_proc_task(inode);
 568        struct nsproxy *nsp;
 569        struct mnt_namespace *ns = NULL;
 570        struct fs_struct *fs = NULL;
 571        struct path root;
 572        struct proc_mounts *p;
 573        int ret = -EINVAL;
 574
 575        if (task) {
 576                rcu_read_lock();
 577                nsp = task_nsproxy(task);
 578                if (nsp) {
 579                        ns = nsp->mnt_ns;
 580                        if (ns)
 581                                get_mnt_ns(ns);
 582                }
 583                rcu_read_unlock();
 584                if (ns)
 585                        fs = get_fs_struct(task);
 586                put_task_struct(task);
 587        }
 588
 589        if (!ns)
 590                goto err;
 591        if (!fs)
 592                goto err_put_ns;
 593
 594        read_lock(&fs->lock);
 595        root = fs->root;
 596        path_get(&root);
 597        read_unlock(&fs->lock);
 598        put_fs_struct(fs);
 599
 600        ret = -ENOMEM;
 601        p = kmalloc(sizeof(struct proc_mounts), GFP_KERNEL);
 602        if (!p)
 603                goto err_put_path;
 604
 605        file->private_data = &p->m;
 606        ret = seq_open(file, op);
 607        if (ret)
 608                goto err_free;
 609
 610        p->m.private = p;
 611        p->ns = ns;
 612        p->root = root;
 613        p->event = ns->event;
 614
 615        return 0;
 616
 617 err_free:
 618        kfree(p);
 619 err_put_path:
 620        path_put(&root);
 621 err_put_ns:
 622        put_mnt_ns(ns);
 623 err:
 624        return ret;
 625}
 626
 627static int mounts_release(struct inode *inode, struct file *file)
 628{
 629        struct proc_mounts *p = file->private_data;
 630        path_put(&p->root);
 631        put_mnt_ns(p->ns);
 632        return seq_release(inode, file);
 633}
 634
 635static unsigned mounts_poll(struct file *file, poll_table *wait)
 636{
 637        struct proc_mounts *p = file->private_data;
 638        struct mnt_namespace *ns = p->ns;
 639        unsigned res = 0;
 640
 641        poll_wait(file, &ns->poll, wait);
 642
 643        spin_lock(&vfsmount_lock);
 644        if (p->event != ns->event) {
 645                p->event = ns->event;
 646                res = POLLERR;
 647        }
 648        spin_unlock(&vfsmount_lock);
 649
 650        return res;
 651}
 652
 653static int mounts_open(struct inode *inode, struct file *file)
 654{
 655        return mounts_open_common(inode, file, &mounts_op);
 656}
 657
 658static const struct file_operations proc_mounts_operations = {
 659        .open           = mounts_open,
 660        .read           = seq_read,
 661        .llseek         = seq_lseek,
 662        .release        = mounts_release,
 663        .poll           = mounts_poll,
 664};
 665
 666static int mountinfo_open(struct inode *inode, struct file *file)
 667{
 668        return mounts_open_common(inode, file, &mountinfo_op);
 669}
 670
 671static const struct file_operations proc_mountinfo_operations = {
 672        .open           = mountinfo_open,
 673        .read           = seq_read,
 674        .llseek         = seq_lseek,
 675        .release        = mounts_release,
 676        .poll           = mounts_poll,
 677};
 678
 679static int mountstats_open(struct inode *inode, struct file *file)
 680{
 681        return mounts_open_common(inode, file, &mountstats_op);
 682}
 683
 684static const struct file_operations proc_mountstats_operations = {
 685        .open           = mountstats_open,
 686        .read           = seq_read,
 687        .llseek         = seq_lseek,
 688        .release        = mounts_release,
 689};
 690
 691#define PROC_BLOCK_SIZE (3*1024)                /* 4K page size but our output routines use some slack for overruns */
 692
 693static ssize_t proc_info_read(struct file * file, char __user * buf,
 694                          size_t count, loff_t *ppos)
 695{
 696        struct inode * inode = file->f_path.dentry->d_inode;
 697        unsigned long page;
 698        ssize_t length;
 699        struct task_struct *task = get_proc_task(inode);
 700
 701        length = -ESRCH;
 702        if (!task)
 703                goto out_no_task;
 704
 705        if (count > PROC_BLOCK_SIZE)
 706                count = PROC_BLOCK_SIZE;
 707
 708        length = -ENOMEM;
 709        if (!(page = __get_free_page(GFP_TEMPORARY)))
 710                goto out;
 711
 712        length = PROC_I(inode)->op.proc_read(task, (char*)page);
 713
 714        if (length >= 0)
 715                length = simple_read_from_buffer(buf, count, ppos, (char *)page, length);
 716        free_page(page);
 717out:
 718        put_task_struct(task);
 719out_no_task:
 720        return length;
 721}
 722
 723static const struct file_operations proc_info_file_operations = {
 724        .read           = proc_info_read,
 725};
 726
 727static int proc_single_show(struct seq_file *m, void *v)
 728{
 729        struct inode *inode = m->private;
 730        struct pid_namespace *ns;
 731        struct pid *pid;
 732        struct task_struct *task;
 733        int ret;
 734
 735        ns = inode->i_sb->s_fs_info;
 736        pid = proc_pid(inode);
 737        task = get_pid_task(pid, PIDTYPE_PID);
 738        if (!task)
 739                return -ESRCH;
 740
 741        ret = PROC_I(inode)->op.proc_show(m, ns, pid, task);
 742
 743        put_task_struct(task);
 744        return ret;
 745}
 746
 747static int proc_single_open(struct inode *inode, struct file *filp)
 748{
 749        int ret;
 750        ret = single_open(filp, proc_single_show, NULL);
 751        if (!ret) {
 752                struct seq_file *m = filp->private_data;
 753
 754                m->private = inode;
 755        }
 756        return ret;
 757}
 758
 759static const struct file_operations proc_single_file_operations = {
 760        .open           = proc_single_open,
 761        .read           = seq_read,
 762        .llseek         = seq_lseek,
 763        .release        = single_release,
 764};
 765
 766static int mem_open(struct inode* inode, struct file* file)
 767{
 768        file->private_data = (void*)((long)current->self_exec_id);
 769        return 0;
 770}
 771
 772static ssize_t mem_read(struct file * file, char __user * buf,
 773                        size_t count, loff_t *ppos)
 774{
 775        struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
 776        char *page;
 777        unsigned long src = *ppos;
 778        int ret = -ESRCH;
 779        struct mm_struct *mm;
 780
 781        if (!task)
 782                goto out_no_task;
 783
 784        if (check_mem_permission(task))
 785                goto out;
 786
 787        ret = -ENOMEM;
 788        page = (char *)__get_free_page(GFP_TEMPORARY);
 789        if (!page)
 790                goto out;
 791
 792        ret = 0;
 793 
 794        mm = get_task_mm(task);
 795        if (!mm)
 796                goto out_free;
 797
 798        ret = -EIO;
 799 
 800        if (file->private_data != (void*)((long)current->self_exec_id))
 801                goto out_put;
 802
 803        ret = 0;
 804 
 805        while (count > 0) {
 806                int this_len, retval;
 807
 808                this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count;
 809                retval = access_process_vm(task, src, page, this_len, 0);
 810                if (!retval || check_mem_permission(task)) {
 811                        if (!ret)
 812                                ret = -EIO;
 813                        break;
 814                }
 815
 816                if (copy_to_user(buf, page, retval)) {
 817                        ret = -EFAULT;
 818                        break;
 819                }
 820 
 821                ret += retval;
 822                src += retval;
 823                buf += retval;
 824                count -= retval;
 825        }
 826        *ppos = src;
 827
 828out_put:
 829        mmput(mm);
 830out_free:
 831        free_page((unsigned long) page);
 832out:
 833        put_task_struct(task);
 834out_no_task:
 835        return ret;
 836}
 837
 838#define mem_write NULL
 839
 840#ifndef mem_write
 841/* This is a security hazard */
 842static ssize_t mem_write(struct file * file, const char __user *buf,
 843                         size_t count, loff_t *ppos)
 844{
 845        int copied;
 846        char *page;
 847        struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
 848        unsigned long dst = *ppos;
 849
 850        copied = -ESRCH;
 851        if (!task)
 852                goto out_no_task;
 853
 854        if (check_mem_permission(task))
 855                goto out;
 856
 857        copied = -ENOMEM;
 858        page = (char *)__get_free_page(GFP_TEMPORARY);
 859        if (!page)
 860                goto out;
 861
 862        copied = 0;
 863        while (count > 0) {
 864                int this_len, retval;
 865
 866                this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count;
 867                if (copy_from_user(page, buf, this_len)) {
 868                        copied = -EFAULT;
 869                        break;
 870                }
 871                retval = access_process_vm(task, dst, page, this_len, 1);
 872                if (!retval) {
 873                        if (!copied)
 874                                copied = -EIO;
 875                        break;
 876                }
 877                copied += retval;
 878                buf += retval;
 879                dst += retval;
 880                count -= retval;                        
 881        }
 882        *ppos = dst;
 883        free_page((unsigned long) page);
 884out:
 885        put_task_struct(task);
 886out_no_task:
 887        return copied;
 888}
 889#endif
 890
 891loff_t mem_lseek(struct file *file, loff_t offset, int orig)
 892{
 893        switch (orig) {
 894        case 0:
 895                file->f_pos = offset;
 896                break;
 897        case 1:
 898                file->f_pos += offset;
 899                break;
 900        default:
 901                return -EINVAL;
 902        }
 903        force_successful_syscall_return();
 904        return file->f_pos;
 905}
 906
 907static const struct file_operations proc_mem_operations = {
 908        .llseek         = mem_lseek,
 909        .read           = mem_read,
 910        .write          = mem_write,
 911        .open           = mem_open,
 912};
 913
 914static ssize_t environ_read(struct file *file, char __user *buf,
 915                        size_t count, loff_t *ppos)
 916{
 917        struct task_struct *task = get_proc_task(file->f_dentry->d_inode);
 918        char *page;
 919        unsigned long src = *ppos;
 920        int ret = -ESRCH;
 921        struct mm_struct *mm;
 922
 923        if (!task)
 924                goto out_no_task;
 925
 926        if (!ptrace_may_access(task, PTRACE_MODE_READ))
 927                goto out;
 928
 929        ret = -ENOMEM;
 930        page = (char *)__get_free_page(GFP_TEMPORARY);
 931        if (!page)
 932                goto out;
 933
 934        ret = 0;
 935
 936        mm = get_task_mm(task);
 937        if (!mm)
 938                goto out_free;
 939
 940        while (count > 0) {
 941                int this_len, retval, max_len;
 942
 943                this_len = mm->env_end - (mm->env_start + src);
 944
 945                if (this_len <= 0)
 946                        break;
 947
 948                max_len = (count > PAGE_SIZE) ? PAGE_SIZE : count;
 949                this_len = (this_len > max_len) ? max_len : this_len;
 950
 951                retval = access_process_vm(task, (mm->env_start + src),
 952                        page, this_len, 0);
 953
 954                if (retval <= 0) {
 955                        ret = retval;
 956                        break;
 957                }
 958
 959                if (copy_to_user(buf, page, retval)) {
 960                        ret = -EFAULT;
 961                        break;
 962                }
 963
 964                ret += retval;
 965                src += retval;
 966                buf += retval;
 967                count -= retval;
 968        }
 969        *ppos = src;
 970
 971        mmput(mm);
 972out_free:
 973        free_page((unsigned long) page);
 974out:
 975        put_task_struct(task);
 976out_no_task:
 977        return ret;
 978}
 979
 980static const struct file_operations proc_environ_operations = {
 981        .read           = environ_read,
 982};
 983
 984static ssize_t oom_adjust_read(struct file *file, char __user *buf,
 985                                size_t count, loff_t *ppos)
 986{
 987        struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
 988        char buffer[PROC_NUMBUF];
 989        size_t len;
 990        int oom_adjust;
 991
 992        if (!task)
 993                return -ESRCH;
 994        oom_adjust = task->oomkilladj;
 995        put_task_struct(task);
 996
 997        len = snprintf(buffer, sizeof(buffer), "%i\n", oom_adjust);
 998
 999        return simple_read_from_buffer(buf, count, ppos, buffer, len);
1000}
1001
1002static ssize_t oom_adjust_write(struct file *file, const char __user *buf,
1003                                size_t count, loff_t *ppos)
1004{
1005        struct task_struct *task;
1006        char buffer[PROC_NUMBUF], *end;
1007        int oom_adjust;
1008
1009        memset(buffer, 0, sizeof(buffer));
1010        if (count > sizeof(buffer) - 1)
1011                count = sizeof(buffer) - 1;
1012        if (copy_from_user(buffer, buf, count))
1013                return -EFAULT;
1014        oom_adjust = simple_strtol(buffer, &end, 0);
1015        if ((oom_adjust < OOM_ADJUST_MIN || oom_adjust > OOM_ADJUST_MAX) &&
1016             oom_adjust != OOM_DISABLE)
1017                return -EINVAL;
1018        if (*end == '\n')
1019                end++;
1020        task = get_proc_task(file->f_path.dentry->d_inode);
1021        if (!task)
1022                return -ESRCH;
1023        if (oom_adjust < task->oomkilladj && !capable(CAP_SYS_RESOURCE)) {
1024                put_task_struct(task);
1025                return -EACCES;
1026        }
1027        task->oomkilladj = oom_adjust;
1028        put_task_struct(task);
1029        if (end - buffer == 0)
1030                return -EIO;
1031        return end - buffer;
1032}
1033
1034static const struct file_operations proc_oom_adjust_operations = {
1035        .read           = oom_adjust_read,
1036        .write          = oom_adjust_write,
1037};
1038
1039#ifdef CONFIG_AUDITSYSCALL
1040#define TMPBUFLEN 21
1041static ssize_t proc_loginuid_read(struct file * file, char __user * buf,
1042                                  size_t count, loff_t *ppos)
1043{
1044        struct inode * inode = file->f_path.dentry->d_inode;
1045        struct task_struct *task = get_proc_task(inode);
1046        ssize_t length;
1047        char tmpbuf[TMPBUFLEN];
1048
1049        if (!task)
1050                return -ESRCH;
1051        length = scnprintf(tmpbuf, TMPBUFLEN, "%u",
1052                                audit_get_loginuid(task));
1053        put_task_struct(task);
1054        return simple_read_from_buffer(buf, count, ppos, tmpbuf, length);
1055}
1056
1057static ssize_t proc_loginuid_write(struct file * file, const char __user * buf,
1058                                   size_t count, loff_t *ppos)
1059{
1060        struct inode * inode = file->f_path.dentry->d_inode;
1061        char *page, *tmp;
1062        ssize_t length;
1063        uid_t loginuid;
1064
1065        if (!capable(CAP_AUDIT_CONTROL))
1066                return -EPERM;
1067
1068        if (current != pid_task(proc_pid(inode), PIDTYPE_PID))
1069                return -EPERM;
1070
1071        if (count >= PAGE_SIZE)
1072                count = PAGE_SIZE - 1;
1073
1074        if (*ppos != 0) {
1075                /* No partial writes. */
1076                return -EINVAL;
1077        }
1078        page = (char*)__get_free_page(GFP_TEMPORARY);
1079        if (!page)
1080                return -ENOMEM;
1081        length = -EFAULT;
1082        if (copy_from_user(page, buf, count))
1083                goto out_free_page;
1084
1085        page[count] = '\0';
1086        loginuid = simple_strtoul(page, &tmp, 10);
1087        if (tmp == page) {
1088                length = -EINVAL;
1089                goto out_free_page;
1090
1091        }
1092        length = audit_set_loginuid(current, loginuid);
1093        if (likely(length == 0))
1094                length = count;
1095
1096out_free_page:
1097        free_page((unsigned long) page);
1098        return length;
1099}
1100
1101static const struct file_operations proc_loginuid_operations = {
1102        .read           = proc_loginuid_read,
1103        .write          = proc_loginuid_write,
1104};
1105
1106static ssize_t proc_sessionid_read(struct file * file, char __user * buf,
1107                                  size_t count, loff_t *ppos)
1108{
1109        struct inode * inode = file->f_path.dentry->d_inode;
1110        struct task_struct *task = get_proc_task(inode);
1111        ssize_t length;
1112        char tmpbuf[TMPBUFLEN];
1113
1114        if (!task)
1115                return -ESRCH;
1116        length = scnprintf(tmpbuf, TMPBUFLEN, "%u",
1117                                audit_get_sessionid(task));
1118        put_task_struct(task);
1119        return simple_read_from_buffer(buf, count, ppos, tmpbuf, length);
1120}
1121
1122static const struct file_operations proc_sessionid_operations = {
1123        .read           = proc_sessionid_read,
1124};
1125#endif
1126
1127#ifdef CONFIG_FAULT_INJECTION
1128static ssize_t proc_fault_inject_read(struct file * file, char __user * buf,
1129                                      size_t count, loff_t *ppos)
1130{
1131        struct task_struct *task = get_proc_task(file->f_dentry->d_inode);
1132        char buffer[PROC_NUMBUF];
1133        size_t len;
1134        int make_it_fail;
1135
1136        if (!task)
1137                return -ESRCH;
1138        make_it_fail = task->make_it_fail;
1139        put_task_struct(task);
1140
1141        len = snprintf(buffer, sizeof(buffer), "%i\n", make_it_fail);
1142
1143        return simple_read_from_buffer(buf, count, ppos, buffer, len);
1144}
1145
1146static ssize_t proc_fault_inject_write(struct file * file,
1147                        const char __user * buf, size_t count, loff_t *ppos)
1148{
1149        struct task_struct *task;
1150        char buffer[PROC_NUMBUF], *end;
1151        int make_it_fail;
1152
1153        if (!capable(CAP_SYS_RESOURCE))
1154                return -EPERM;
1155        memset(buffer, 0, sizeof(buffer));
1156        if (count > sizeof(buffer) - 1)
1157                count = sizeof(buffer) - 1;
1158        if (copy_from_user(buffer, buf, count))
1159                return -EFAULT;
1160        make_it_fail = simple_strtol(buffer, &end, 0);
1161        if (*end == '\n')
1162                end++;
1163        task = get_proc_task(file->f_dentry->d_inode);
1164        if (!task)
1165                return -ESRCH;
1166        task->make_it_fail = make_it_fail;
1167        put_task_struct(task);
1168        if (end - buffer == 0)
1169                return -EIO;
1170        return end - buffer;
1171}
1172
1173static const struct file_operations proc_fault_inject_operations = {
1174        .read           = proc_fault_inject_read,
1175        .write          = proc_fault_inject_write,
1176};
1177#endif
1178
1179
1180#ifdef CONFIG_SCHED_DEBUG
1181/*
1182 * Print out various scheduling related per-task fields:
1183 */
1184static int sched_show(struct seq_file *m, void *v)
1185{
1186        struct inode *inode = m->private;
1187        struct task_struct *p;
1188
1189        WARN_ON(!inode);
1190
1191        p = get_proc_task(inode);
1192        if (!p)
1193                return -ESRCH;
1194        proc_sched_show_task(p, m);
1195
1196        put_task_struct(p);
1197
1198        return 0;
1199}
1200
1201static ssize_t
1202sched_write(struct file *file, const char __user *buf,
1203            size_t count, loff_t *offset)
1204{
1205        struct inode *inode = file->f_path.dentry->d_inode;
1206        struct task_struct *p;
1207
1208        WARN_ON(!inode);
1209
1210        p = get_proc_task(inode);
1211        if (!p)
1212                return -ESRCH;
1213        proc_sched_set_task(p);
1214
1215        put_task_struct(p);
1216
1217        return count;
1218}
1219
1220static int sched_open(struct inode *inode, struct file *filp)
1221{
1222        int ret;
1223
1224        ret = single_open(filp, sched_show, NULL);
1225        if (!ret) {
1226                struct seq_file *m = filp->private_data;
1227
1228                m->private = inode;
1229        }
1230        return ret;
1231}
1232
1233static const struct file_operations proc_pid_sched_operations = {
1234        .open           = sched_open,
1235        .read           = seq_read,
1236        .write          = sched_write,
1237        .llseek         = seq_lseek,
1238        .release        = single_release,
1239};
1240
1241#endif
1242
1243/*
1244 * We added or removed a vma mapping the executable. The vmas are only mapped
1245 * during exec and are not mapped with the mmap system call.
1246 * Callers must hold down_write() on the mm's mmap_sem for these
1247 */
1248void added_exe_file_vma(struct mm_struct *mm)
1249{
1250        mm->num_exe_file_vmas++;
1251}
1252
1253void removed_exe_file_vma(struct mm_struct *mm)
1254{
1255        mm->num_exe_file_vmas--;
1256        if ((mm->num_exe_file_vmas == 0) && mm->exe_file){
1257                fput(mm->exe_file);
1258                mm->exe_file = NULL;
1259        }
1260
1261}
1262
1263void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file)
1264{
1265        if (new_exe_file)
1266                get_file(new_exe_file);
1267        if (mm->exe_file)
1268                fput(mm->exe_file);
1269        mm->exe_file = new_exe_file;
1270        mm->num_exe_file_vmas = 0;
1271}
1272
1273struct file *get_mm_exe_file(struct mm_struct *mm)
1274{
1275        struct file *exe_file;
1276
1277        /* We need mmap_sem to protect against races with removal of
1278         * VM_EXECUTABLE vmas */
1279        down_read(&mm->mmap_sem);
1280        exe_file = mm->exe_file;
1281        if (exe_file)
1282                get_file(exe_file);
1283        up_read(&mm->mmap_sem);
1284        return exe_file;
1285}
1286
1287void dup_mm_exe_file(struct mm_struct *oldmm, struct mm_struct *newmm)
1288{
1289        /* It's safe to write the exe_file pointer without exe_file_lock because
1290         * this is called during fork when the task is not yet in /proc */
1291        newmm->exe_file = get_mm_exe_file(oldmm);
1292}
1293
1294static int proc_exe_link(struct inode *inode, struct path *exe_path)
1295{
1296        struct task_struct *task;
1297        struct mm_struct *mm;
1298        struct file *exe_file;
1299
1300        task = get_proc_task(inode);
1301        if (!task)
1302                return -ENOENT;
1303        mm = get_task_mm(task);
1304        put_task_struct(task);
1305        if (!mm)
1306                return -ENOENT;
1307        exe_file = get_mm_exe_file(mm);
1308        mmput(mm);
1309        if (exe_file) {
1310                *exe_path = exe_file->f_path;
1311                path_get(&exe_file->f_path);
1312                fput(exe_file);
1313                return 0;
1314        } else
1315                return -ENOENT;
1316}
1317
1318static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd)
1319{
1320        struct inode *inode = dentry->d_inode;
1321        int error = -EACCES;
1322
1323        /* We don't need a base pointer in the /proc filesystem */
1324        path_put(&nd->path);
1325
1326        /* Are we allowed to snoop on the tasks file descriptors? */
1327        if (!proc_fd_access_allowed(inode))
1328                goto out;
1329
1330        error = PROC_I(inode)->op.proc_get_link(inode, &nd->path);
1331        nd->last_type = LAST_BIND;
1332out:
1333        return ERR_PTR(error);
1334}
1335
1336static int do_proc_readlink(struct path *path, char __user *buffer, int buflen)
1337{
1338        char *tmp = (char*)__get_free_page(GFP_TEMPORARY);
1339        char *pathname;
1340        int len;
1341
1342        if (!tmp)
1343                return -ENOMEM;
1344
1345        pathname = d_path(path, tmp, PAGE_SIZE);
1346        len = PTR_ERR(pathname);
1347        if (IS_ERR(pathname))
1348                goto out;
1349        len = tmp + PAGE_SIZE - 1 - pathname;
1350
1351        if (len > buflen)
1352                len = buflen;
1353        if (copy_to_user(buffer, pathname, len))
1354                len = -EFAULT;
1355 out:
1356        free_page((unsigned long)tmp);
1357        return len;
1358}
1359
1360static int proc_pid_readlink(struct dentry * dentry, char __user * buffer, int buflen)
1361{
1362        int error = -EACCES;
1363        struct inode *inode = dentry->d_inode;
1364        struct path path;
1365
1366        /* Are we allowed to snoop on the tasks file descriptors? */
1367        if (!proc_fd_access_allowed(inode))
1368                goto out;
1369
1370        error = PROC_I(inode)->op.proc_get_link(inode, &path);
1371        if (error)
1372                goto out;
1373
1374        error = do_proc_readlink(&path, buffer, buflen);
1375        path_put(&path);
1376out:
1377        return error;
1378}
1379
1380static const struct inode_operations proc_pid_link_inode_operations = {
1381        .readlink       = proc_pid_readlink,
1382        .follow_link    = proc_pid_follow_link,
1383        .setattr        = proc_setattr,
1384};
1385
1386
1387/* building an inode */
1388
1389static int task_dumpable(struct task_struct *task)
1390{
1391        int dumpable = 0;
1392        struct mm_struct *mm;
1393
1394        task_lock(task);
1395        mm = task->mm;
1396        if (mm)
1397                dumpable = get_dumpable(mm);
1398        task_unlock(task);
1399        if(dumpable == 1)
1400                return 1;
1401        return 0;
1402}
1403
1404
1405static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task)
1406{
1407        struct inode * inode;
1408        struct proc_inode *ei;
1409
1410        /* We need a new inode */
1411
1412        inode = new_inode(sb);
1413        if (!inode)
1414                goto out;
1415
1416        /* Common stuff */
1417        ei = PROC_I(inode);
1418        inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
1419        inode->i_op = &proc_def_inode_operations;
1420
1421        /*
1422         * grab the reference to task.
1423         */
1424        ei->pid = get_task_pid(task, PIDTYPE_PID);
1425        if (!ei->pid)
1426                goto out_unlock;
1427
1428        inode->i_uid = 0;
1429        inode->i_gid = 0;
1430        if (task_dumpable(task)) {
1431                inode->i_uid = task->euid;
1432                inode->i_gid = task->egid;
1433        }
1434        security_task_to_inode(task, inode);
1435
1436out:
1437        return inode;
1438
1439out_unlock:
1440        iput(inode);
1441        return NULL;
1442}
1443
1444static int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
1445{
1446        struct inode *inode = dentry->d_inode;
1447        struct task_struct *task;
1448        generic_fillattr(inode, stat);
1449
1450        rcu_read_lock();
1451        stat->uid = 0;
1452        stat->gid = 0;
1453        task = pid_task(proc_pid(inode), PIDTYPE_PID);
1454        if (task) {
1455                if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) ||
1456                    task_dumpable(task)) {
1457                        stat->uid = task->euid;
1458                        stat->gid = task->egid;
1459                }
1460        }
1461        rcu_read_unlock();
1462        return 0;
1463}
1464
1465/* dentry stuff */
1466
1467/*
1468 *      Exceptional case: normally we are not allowed to unhash a busy
1469 * directory. In this case, however, we can do it - no aliasing problems
1470 * due to the way we treat inodes.
1471 *
1472 * Rewrite the inode's ownerships here because the owning task may have
1473 * performed a setuid(), etc.
1474 *
1475 * Before the /proc/pid/status file was created the only way to read
1476 * the effective uid of a /process was to stat /proc/pid.  Reading
1477 * /proc/pid/status is slow enough that procps and other packages
1478 * kept stating /proc/pid.  To keep the rules in /proc simple I have
1479 * made this apply to all per process world readable and executable
1480 * directories.
1481 */
1482static int pid_revalidate(struct dentry *dentry, struct nameidata *nd)
1483{
1484        struct inode *inode = dentry->d_inode;
1485        struct task_struct *task = get_proc_task(inode);
1486        if (task) {
1487                if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) ||
1488                    task_dumpable(task)) {
1489                        inode->i_uid = task->euid;
1490                        inode->i_gid = task->egid;
1491                } else {
1492                        inode->i_uid = 0;
1493                        inode->i_gid = 0;
1494                }
1495                inode->i_mode &= ~(S_ISUID | S_ISGID);
1496                security_task_to_inode(task, inode);
1497                put_task_struct(task);
1498                return 1;
1499        }
1500        d_drop(dentry);
1501        return 0;
1502}
1503
1504static int pid_delete_dentry(struct dentry * dentry)
1505{
1506        /* Is the task we represent dead?
1507         * If so, then don't put the dentry on the lru list,
1508         * kill it immediately.
1509         */
1510        return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first;
1511}
1512
1513static struct dentry_operations pid_dentry_operations =
1514{
1515        .d_revalidate   = pid_revalidate,
1516        .d_delete       = pid_delete_dentry,
1517};
1518
1519/* Lookups */
1520
1521typedef struct dentry *instantiate_t(struct inode *, struct dentry *,
1522                                struct task_struct *, const void *);
1523
1524/*
1525 * Fill a directory entry.
1526 *
1527 * If possible create the dcache entry and derive our inode number and
1528 * file type from dcache entry.
1529 *
1530 * Since all of the proc inode numbers are dynamically generated, the inode
1531 * numbers do not exist until the inode is cache.  This means creating the
1532 * the dcache entry in readdir is necessary to keep the inode numbers
1533 * reported by readdir in sync with the inode numbers reported
1534 * by stat.
1535 */
1536static int proc_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
1537        char *name, int len,
1538        instantiate_t instantiate, struct task_struct *task, const void *ptr)
1539{
1540        struct dentry *child, *dir = filp->f_path.dentry;
1541        struct inode *inode;
1542        struct qstr qname;
1543        ino_t ino = 0;
1544        unsigned type = DT_UNKNOWN;
1545
1546        qname.name = name;
1547        qname.len  = len;
1548        qname.hash = full_name_hash(name, len);
1549
1550        child = d_lookup(dir, &qname);
1551        if (!child) {
1552                struct dentry *new;
1553                new = d_alloc(dir, &qname);
1554                if (new) {
1555                        child = instantiate(dir->d_inode, new, task, ptr);
1556                        if (child)
1557                                dput(new);
1558                        else
1559                                child = new;
1560                }
1561        }
1562        if (!child || IS_ERR(child) || !child->d_inode)
1563                goto end_instantiate;
1564        inode = child->d_inode;
1565        if (inode) {
1566                ino = inode->i_ino;
1567                type = inode->i_mode >> 12;
1568        }
1569        dput(child);
1570end_instantiate:
1571        if (!ino)
1572                ino = find_inode_number(dir, &qname);
1573        if (!ino)
1574                ino = 1;
1575        return filldir(dirent, name, len, filp->f_pos, ino, type);
1576}
1577
1578static unsigned name_to_int(struct dentry *dentry)
1579{
1580        const char *name = dentry->d_name.name;
1581        int len = dentry->d_name.len;
1582        unsigned n = 0;
1583
1584        if (len > 1 && *name == '0')
1585                goto out;
1586        while (len-- > 0) {
1587                unsigned c = *name++ - '0';
1588                if (c > 9)
1589                        goto out;
1590                if (n >= (~0U-9)/10)
1591                        goto out;
1592                n *= 10;
1593                n += c;
1594        }
1595        return n;
1596out:
1597        return ~0U;
1598}
1599
1600#define PROC_FDINFO_MAX 64
1601
1602static int proc_fd_info(struct inode *inode, struct path *path, char *info)
1603{
1604        struct task_struct *task = get_proc_task(inode);
1605        struct files_struct *files = NULL;
1606        struct file *file;
1607        int fd = proc_fd(inode);
1608
1609        if (task) {
1610                files = get_files_struct(task);
1611                put_task_struct(task);
1612        }
1613        if (files) {
1614                /*
1615                 * We are not taking a ref to the file structure, so we must
1616                 * hold ->file_lock.
1617                 */
1618                spin_lock(&files->file_lock);
1619                file = fcheck_files(files, fd);
1620                if (file) {
1621                        if (path) {
1622                                *path = file->f_path;
1623                                path_get(&file->f_path);
1624                        }
1625                        if (info)
1626                                snprintf(info, PROC_FDINFO_MAX,
1627                                         "pos:\t%lli\n"
1628                                         "flags:\t0%o\n",
1629                                         (long long) file->f_pos,
1630                                         file->f_flags);
1631                        spin_unlock(&files->file_lock);
1632                        put_files_struct(files);
1633                        return 0;
1634                }
1635                spin_unlock(&files->file_lock);
1636                put_files_struct(files);
1637        }
1638        return -ENOENT;
1639}
1640
1641static int proc_fd_link(struct inode *inode, struct path *path)
1642{
1643        return proc_fd_info(inode, path, NULL);
1644}
1645
1646static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd)
1647{
1648        struct inode *inode = dentry->d_inode;
1649        struct task_struct *task = get_proc_task(inode);
1650        int fd = proc_fd(inode);
1651        struct files_struct *files;
1652
1653        if (task) {
1654                files = get_files_struct(task);
1655                if (files) {
1656                        rcu_read_lock();
1657                        if (fcheck_files(files, fd)) {
1658                                rcu_read_unlock();
1659                                put_files_struct(files);
1660                                if (task_dumpable(task)) {
1661                                        inode->i_uid = task->euid;
1662                                        inode->i_gid = task->egid;
1663                                } else {
1664                                        inode->i_uid = 0;
1665                                        inode->i_gid = 0;
1666                                }
1667                                inode->i_mode &= ~(S_ISUID | S_ISGID);
1668                                security_task_to_inode(task, inode);
1669                                put_task_struct(task);
1670                                return 1;
1671                        }
1672                        rcu_read_unlock();
1673                        put_files_struct(files);
1674                }
1675                put_task_struct(task);
1676        }
1677        d_drop(dentry);
1678        return 0;
1679}
1680
1681static struct dentry_operations tid_fd_dentry_operations =
1682{
1683        .d_revalidate   = tid_fd_revalidate,
1684        .d_delete       = pid_delete_dentry,
1685};
1686
1687static struct dentry *proc_fd_instantiate(struct inode *dir,
1688        struct dentry *dentry, struct task_struct *task, const void *ptr)
1689{
1690        unsigned fd = *(const unsigned *)ptr;
1691        struct file *file;
1692        struct files_struct *files;
1693        struct inode *inode;
1694        struct proc_inode *ei;
1695        struct dentry *error = ERR_PTR(-ENOENT);
1696
1697        inode = proc_pid_make_inode(dir->i_sb, task);
1698        if (!inode)
1699                goto out;
1700        ei = PROC_I(inode);
1701        ei->fd = fd;
1702        files = get_files_struct(task);
1703        if (!files)
1704                goto out_iput;
1705        inode->i_mode = S_IFLNK;
1706
1707        /*
1708         * We are not taking a ref to the file structure, so we must
1709         * hold ->file_lock.
1710         */
1711        spin_lock(&files->file_lock);
1712        file = fcheck_files(files, fd);
1713        if (!file)
1714                goto out_unlock;
1715        if (file->f_mode & FMODE_READ)
1716                inode->i_mode |= S_IRUSR | S_IXUSR;
1717        if (file->f_mode & FMODE_WRITE)
1718                inode->i_mode |= S_IWUSR | S_IXUSR;
1719        spin_unlock(&files->file_lock);
1720        put_files_struct(files);
1721
1722        inode->i_op = &proc_pid_link_inode_operations;
1723        inode->i_size = 64;
1724        ei->op.proc_get_link = proc_fd_link;
1725        dentry->d_op = &tid_fd_dentry_operations;
1726        d_add(dentry, inode);
1727        /* Close the race of the process dying before we return the dentry */
1728        if (tid_fd_revalidate(dentry, NULL))
1729                error = NULL;
1730
1731 out:
1732        return error;
1733out_unlock:
1734        spin_unlock(&files->file_lock);
1735        put_files_struct(files);
1736out_iput:
1737        iput(inode);
1738        goto out;
1739}
1740
1741static struct dentry *proc_lookupfd_common(struct inode *dir,
1742                                           struct dentry *dentry,
1743                                           instantiate_t instantiate)
1744{
1745        struct task_struct *task = get_proc_task(dir);
1746        unsigned fd = name_to_int(dentry);
1747        struct dentry *result = ERR_PTR(-ENOENT);
1748
1749        if (!task)
1750                goto out_no_task;
1751        if (fd == ~0U)
1752                goto out;
1753
1754        result = instantiate(dir, dentry, task, &fd);
1755out:
1756        put_task_struct(task);
1757out_no_task:
1758        return result;
1759}
1760
1761static int proc_readfd_common(struct file * filp, void * dirent,
1762                              filldir_t filldir, instantiate_t instantiate)
1763{
1764        struct dentry *dentry = filp->f_path.dentry;
1765        struct inode *inode = dentry->d_inode;
1766        struct task_struct *p = get_proc_task(inode);
1767        unsigned int fd, ino;
1768        int retval;
1769        struct files_struct * files;
1770
1771        retval = -ENOENT;
1772        if (!p)
1773                goto out_no_task;
1774        retval = 0;
1775
1776        fd = filp->f_pos;
1777        switch (fd) {
1778                case 0:
1779                        if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0)
1780                                goto out;
1781                        filp->f_pos++;
1782                case 1:
1783                        ino = parent_ino(dentry);
1784                        if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0)
1785                                goto out;
1786                        filp->f_pos++;
1787                default:
1788                        files = get_files_struct(p);
1789                        if (!files)
1790                                goto out;
1791                        rcu_read_lock();
1792                        for (fd = filp->f_pos-2;
1793                             fd < files_fdtable(files)->max_fds;
1794                             fd++, filp->f_pos++) {
1795                                char name[PROC_NUMBUF];
1796                                int len;
1797
1798                                if (!fcheck_files(files, fd))
1799                                        continue;
1800                                rcu_read_unlock();
1801
1802                                len = snprintf(name, sizeof(name), "%d", fd);
1803                                if (proc_fill_cache(filp, dirent, filldir,
1804                                                    name, len, instantiate,
1805                                                    p, &fd) < 0) {
1806                                        rcu_read_lock();
1807                                        break;
1808                                }
1809                                rcu_read_lock();
1810                        }
1811                        rcu_read_unlock();
1812                        put_files_struct(files);
1813        }
1814out:
1815        put_task_struct(p);
1816out_no_task:
1817        return retval;
1818}
1819
1820static struct dentry *proc_lookupfd(struct inode *dir, struct dentry *dentry,
1821                                    struct nameidata *nd)
1822{
1823        return proc_lookupfd_common(dir, dentry, proc_fd_instantiate);
1824}
1825
1826static int proc_readfd(struct file *filp, void *dirent, filldir_t filldir)
1827{
1828        return proc_readfd_common(filp, dirent, filldir, proc_fd_instantiate);
1829}
1830
1831static ssize_t proc_fdinfo_read(struct file *file, char __user *buf,
1832                                      size_t len, loff_t *ppos)
1833{
1834        char tmp[PROC_FDINFO_MAX];
1835        int err = proc_fd_info(file->f_path.dentry->d_inode, NULL, tmp);
1836        if (!err)
1837                err = simple_read_from_buffer(buf, len, ppos, tmp, strlen(tmp));
1838        return err;
1839}
1840
1841static const struct file_operations proc_fdinfo_file_operations = {
1842        .open           = nonseekable_open,
1843        .read           = proc_fdinfo_read,
1844};
1845
1846static const struct file_operations proc_fd_operations = {
1847        .read           = generic_read_dir,
1848        .readdir        = proc_readfd,
1849};
1850
1851/*
1852 * /proc/pid/fd needs a special permission handler so that a process can still
1853 * access /proc/self/fd after it has executed a setuid().
1854 */
1855static int proc_fd_permission(struct inode *inode, int mask)
1856{
1857        int rv;
1858
1859        rv = generic_permission(inode, mask, NULL);
1860        if (rv == 0)
1861                return 0;
1862        if (task_pid(current) == proc_pid(inode))
1863                rv = 0;
1864        return rv;
1865}
1866
1867/*
1868 * proc directories can do almost nothing..
1869 */
1870static const struct inode_operations proc_fd_inode_operations = {
1871        .lookup         = proc_lookupfd,
1872        .permission     = proc_fd_permission,
1873        .setattr        = proc_setattr,
1874};
1875
1876static struct dentry *proc_fdinfo_instantiate(struct inode *dir,
1877        struct dentry *dentry, struct task_struct *task, const void *ptr)
1878{
1879        unsigned fd = *(unsigned *)ptr;
1880        struct inode *inode;
1881        struct proc_inode *ei;
1882        struct dentry *error = ERR_PTR(-ENOENT);
1883
1884        inode = proc_pid_make_inode(dir->i_sb, task);
1885        if (!inode)
1886                goto out;
1887        ei = PROC_I(inode);
1888        ei->fd = fd;
1889        inode->i_mode = S_IFREG | S_IRUSR;
1890        inode->i_fop = &proc_fdinfo_file_operations;
1891        dentry->d_op = &tid_fd_dentry_operations;
1892        d_add(dentry, inode);
1893        /* Close the race of the process dying before we return the dentry */
1894        if (tid_fd_revalidate(dentry, NULL))
1895                error = NULL;
1896
1897 out:
1898        return error;
1899}
1900
1901static struct dentry *proc_lookupfdinfo(struct inode *dir,
1902                                        struct dentry *dentry,
1903                                        struct nameidata *nd)
1904{
1905        return proc_lookupfd_common(dir, dentry, proc_fdinfo_instantiate);
1906}
1907
1908static int proc_readfdinfo(struct file *filp, void *dirent, filldir_t filldir)
1909{
1910        return proc_readfd_common(filp, dirent, filldir,
1911                                  proc_fdinfo_instantiate);
1912}
1913
1914static const struct file_operations proc_fdinfo_operations = {
1915        .read           = generic_read_dir,
1916        .readdir        = proc_readfdinfo,
1917};
1918
1919/*
1920 * proc directories can do almost nothing..
1921 */
1922static const struct inode_operations proc_fdinfo_inode_operations = {
1923        .lookup         = proc_lookupfdinfo,
1924        .setattr        = proc_setattr,
1925};
1926
1927
1928static struct dentry *proc_pident_instantiate(struct inode *dir,
1929        struct dentry *dentry, struct task_struct *task, const void *ptr)
1930{
1931        const struct pid_entry *p = ptr;
1932        struct inode *inode;
1933        struct proc_inode *ei;
1934        struct dentry *error = ERR_PTR(-EINVAL);
1935
1936        inode = proc_pid_make_inode(dir->i_sb, task);
1937        if (!inode)
1938                goto out;
1939
1940        ei = PROC_I(inode);
1941        inode->i_mode = p->mode;
1942        if (S_ISDIR(inode->i_mode))
1943                inode->i_nlink = 2;     /* Use getattr to fix if necessary */
1944        if (p->iop)
1945                inode->i_op = p->iop;
1946        if (p->fop)
1947                inode->i_fop = p->fop;
1948        ei->op = p->op;
1949        dentry->d_op = &pid_dentry_operations;
1950        d_add(dentry, inode);
1951        /* Close the race of the process dying before we return the dentry */
1952        if (pid_revalidate(dentry, NULL))
1953                error = NULL;
1954out:
1955        return error;
1956}
1957
1958static struct dentry *proc_pident_lookup(struct inode *dir, 
1959                                         struct dentry *dentry,
1960                                         const struct pid_entry *ents,
1961                                         unsigned int nents)
1962{
1963        struct inode *inode;
1964        struct dentry *error;
1965        struct task_struct *task = get_proc_task(dir);
1966        const struct pid_entry *p, *last;
1967
1968        error = ERR_PTR(-ENOENT);
1969        inode = NULL;
1970
1971        if (!task)
1972                goto out_no_task;
1973
1974        /*
1975         * Yes, it does not scale. And it should not. Don't add
1976         * new entries into /proc/<tgid>/ without very good reasons.
1977         */
1978        last = &ents[nents - 1];
1979        for (p = ents; p <= last; p++) {
1980                if (p->len != dentry->d_name.len)
1981                        continue;
1982                if (!memcmp(dentry->d_name.name, p->name, p->len))
1983                        break;
1984        }
1985        if (p > last)
1986                goto out;
1987
1988        error = proc_pident_instantiate(dir, dentry, task, p);
1989out:
1990        put_task_struct(task);
1991out_no_task:
1992        return error;
1993}
1994
1995static int proc_pident_fill_cache(struct file *filp, void *dirent,
1996        filldir_t filldir, struct task_struct *task, const struct pid_entry *p)
1997{
1998        return proc_fill_cache(filp, dirent, filldir, p->name, p->len,
1999                                proc_pident_instantiate, task, p);
2000}
2001
2002static int proc_pident_readdir(struct file *filp,
2003                void *dirent, filldir_t filldir,
2004                const struct pid_entry *ents, unsigned int nents)
2005{
2006        int i;
2007        struct dentry *dentry = filp->f_path.dentry;
2008        struct inode *inode = dentry->d_inode;
2009        struct task_struct *task = get_proc_task(inode);
2010        const struct pid_entry *p, *last;
2011        ino_t ino;
2012        int ret;
2013
2014        ret = -ENOENT;
2015        if (!task)
2016                goto out_no_task;
2017
2018        ret = 0;
2019        i = filp->f_pos;
2020        switch (i) {
2021        case 0:
2022                ino = inode->i_ino;
2023                if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0)
2024                        goto out;
2025                i++;
2026                filp->f_pos++;
2027                /* fall through */
2028        case 1:
2029                ino = parent_ino(dentry);
2030                if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0)
2031                        goto out;
2032                i++;
2033                filp->f_pos++;
2034                /* fall through */
2035        default:
2036                i -= 2;
2037                if (i >= nents) {
2038                        ret = 1;
2039                        goto out;
2040                }
2041                p = ents + i;
2042                last = &ents[nents - 1];
2043                while (p <= last) {
2044                        if (proc_pident_fill_cache(filp, dirent, filldir, task, p) < 0)
2045                                goto out;
2046                        filp->f_pos++;
2047                        p++;
2048                }
2049        }
2050
2051        ret = 1;
2052out:
2053        put_task_struct(task);
2054out_no_task:
2055        return ret;
2056}
2057
2058#ifdef CONFIG_SECURITY
2059static ssize_t proc_pid_attr_read(struct file * file, char __user * buf,
2060                                  size_t count, loff_t *ppos)
2061{
2062        struct inode * inode = file->f_path.dentry->d_inode;
2063        char *p = NULL;
2064        ssize_t length;
2065        struct task_struct *task = get_proc_task(inode);
2066
2067        if (!task)
2068                return -ESRCH;
2069
2070        length = security_getprocattr(task,
2071                                      (char*)file->f_path.dentry->d_name.name,
2072                                      &p);
2073        put_task_struct(task);
2074        if (length > 0)
2075                length = simple_read_from_buffer(buf, count, ppos, p, length);
2076        kfree(p);
2077        return length;
2078}
2079
2080static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf,
2081                                   size_t count, loff_t *ppos)
2082{
2083        struct inode * inode = file->f_path.dentry->d_inode;
2084        char *page;
2085        ssize_t length;
2086        struct task_struct *task = get_proc_task(inode);
2087
2088        length = -ESRCH;
2089        if (!task)
2090                goto out_no_task;
2091        if (count > PAGE_SIZE)
2092                count = PAGE_SIZE;
2093
2094        /* No partial writes. */
2095        length = -EINVAL;
2096        if (*ppos != 0)
2097                goto out;
2098
2099        length = -ENOMEM;
2100        page = (char*)__get_free_page(GFP_TEMPORARY);
2101        if (!page)
2102                goto out;
2103
2104        length = -EFAULT;
2105        if (copy_from_user(page, buf, count))
2106                goto out_free;
2107
2108        length = security_setprocattr(task,
2109                                      (char*)file->f_path.dentry->d_name.name,
2110                                      (void*)page, count);
2111out_free:
2112        free_page((unsigned long) page);
2113out:
2114        put_task_struct(task);
2115out_no_task:
2116        return length;
2117}
2118
2119static const struct file_operations proc_pid_attr_operations = {
2120        .read           = proc_pid_attr_read,
2121        .write          = proc_pid_attr_write,
2122};
2123
2124static const struct pid_entry attr_dir_stuff[] = {
2125        REG("current",    S_IRUGO|S_IWUGO, pid_attr),
2126        REG("prev",       S_IRUGO,         pid_attr),
2127        REG("exec",       S_IRUGO|S_IWUGO, pid_attr),
2128        REG("fscreate",   S_IRUGO|S_IWUGO, pid_attr),
2129        REG("keycreate",  S_IRUGO|S_IWUGO, pid_attr),
2130        REG("sockcreate", S_IRUGO|S_IWUGO, pid_attr),
2131};
2132
2133static int proc_attr_dir_readdir(struct file * filp,
2134                             void * dirent, filldir_t filldir)
2135{
2136        return proc_pident_readdir(filp,dirent,filldir,
2137                                   attr_dir_stuff,ARRAY_SIZE(attr_dir_stuff));
2138}
2139
2140static const struct file_operations proc_attr_dir_operations = {
2141        .read           = generic_read_dir,
2142        .readdir        = proc_attr_dir_readdir,
2143};
2144
2145static struct dentry *proc_attr_dir_lookup(struct inode *dir,
2146                                struct dentry *dentry, struct nameidata *nd)
2147{
2148        return proc_pident_lookup(dir, dentry,
2149                                  attr_dir_stuff, ARRAY_SIZE(attr_dir_stuff));
2150}
2151
2152static const struct inode_operations proc_attr_dir_inode_operations = {
2153        .lookup         = proc_attr_dir_lookup,
2154        .getattr        = pid_getattr,
2155        .setattr        = proc_setattr,
2156};
2157
2158#endif
2159
2160#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
2161static ssize_t proc_coredump_filter_read(struct file *file, char __user *buf,
2162                                         size_t count, loff_t *ppos)
2163{
2164        struct task_struct *task = get_proc_task(file->f_dentry->d_inode);
2165        struct mm_struct *mm;
2166        char buffer[PROC_NUMBUF];
2167        size_t len;
2168        int ret;
2169
2170        if (!task)
2171                return -ESRCH;
2172
2173        ret = 0;
2174        mm = get_task_mm(task);
2175        if (mm) {
2176                len = snprintf(buffer, sizeof(buffer), "%08lx\n",
2177                               ((mm->flags & MMF_DUMP_FILTER_MASK) >>
2178                                MMF_DUMP_FILTER_SHIFT));
2179                mmput(mm);
2180                ret = simple_read_from_buffer(buf, count, ppos, buffer, len);
2181        }
2182
2183        put_task_struct(task);
2184
2185        return ret;
2186}
2187
2188static ssize_t proc_coredump_filter_write(struct file *file,
2189                                          const char __user *buf,
2190                                          size_t count,
2191                                          loff_t *ppos)
2192{
2193        struct task_struct *task;
2194        struct mm_struct *mm;
2195        char buffer[PROC_NUMBUF], *end;
2196        unsigned int val;
2197        int ret;
2198        int i;
2199        unsigned long mask;
2200
2201        ret = -EFAULT;
2202        memset(buffer, 0, sizeof(buffer));
2203        if (count > sizeof(buffer) - 1)
2204                count = sizeof(buffer) - 1;
2205        if (copy_from_user(buffer, buf, count))
2206                goto out_no_task;
2207
2208        ret = -EINVAL;
2209        val = (unsigned int)simple_strtoul(buffer, &end, 0);
2210        if (*end == '\n')
2211                end++;
2212        if (end - buffer == 0)
2213                goto out_no_task;
2214
2215        ret = -ESRCH;
2216        task = get_proc_task(file->f_dentry->d_inode);
2217        if (!task)
2218                goto out_no_task;
2219
2220        ret = end - buffer;
2221        mm = get_task_mm(task);
2222        if (!mm)
2223                goto out_no_mm;
2224
2225        for (i = 0, mask = 1; i < MMF_DUMP_FILTER_BITS; i++, mask <<= 1) {
2226                if (val & mask)
2227                        set_bit(i + MMF_DUMP_FILTER_SHIFT, &mm->flags);
2228                else
2229                        clear_bit(i + MMF_DUMP_FILTER_SHIFT, &mm->flags);
2230        }
2231
2232        mmput(mm);
2233 out_no_mm:
2234        put_task_struct(task);
2235 out_no_task:
2236        return ret;
2237}
2238
2239static const struct file_operations proc_coredump_filter_operations = {
2240        .read           = proc_coredump_filter_read,
2241        .write          = proc_coredump_filter_write,
2242};
2243#endif
2244
2245/*
2246 * /proc/self:
2247 */
2248static int proc_self_readlink(struct dentry *dentry, char __user *buffer,
2249                              int buflen)
2250{
2251        struct pid_namespace *ns = dentry->d_sb->s_fs_info;
2252        pid_t tgid = task_tgid_nr_ns(current, ns);
2253        char tmp[PROC_NUMBUF];
2254        if (!tgid)
2255                return -ENOENT;
2256        sprintf(tmp, "%d", tgid);
2257        return vfs_readlink(dentry,buffer,buflen,tmp);
2258}
2259
2260static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd)
2261{
2262        struct pid_namespace *ns = dentry->d_sb->s_fs_info;
2263        pid_t tgid = task_tgid_nr_ns(current, ns);
2264        char tmp[PROC_NUMBUF];
2265        if (!tgid)
2266                return ERR_PTR(-ENOENT);
2267        sprintf(tmp, "%d", task_tgid_nr_ns(current, ns));
2268        return ERR_PTR(vfs_follow_link(nd,tmp));
2269}
2270
2271static const struct inode_operations proc_self_inode_operations = {
2272        .readlink       = proc_self_readlink,
2273        .follow_link    = proc_self_follow_link,
2274};
2275
2276/*
2277 * proc base
2278 *
2279 * These are the directory entries in the root directory of /proc
2280 * that properly belong to the /proc filesystem, as they describe
2281 * describe something that is process related.
2282 */
2283static const struct pid_entry proc_base_stuff[] = {
2284        NOD("self", S_IFLNK|S_IRWXUGO,
2285                &proc_self_inode_operations, NULL, {}),
2286};
2287
2288/*
2289 *      Exceptional case: normally we are not allowed to unhash a busy
2290 * directory. In this case, however, we can do it - no aliasing problems
2291 * due to the way we treat inodes.
2292 */
2293static int proc_base_revalidate(struct dentry *dentry, struct nameidata *nd)
2294{
2295        struct inode *inode = dentry->d_inode;
2296        struct task_struct *task = get_proc_task(inode);
2297        if (task) {
2298                put_task_struct(task);
2299                return 1;
2300        }
2301        d_drop(dentry);
2302        return 0;
2303}
2304
2305static struct dentry_operations proc_base_dentry_operations =
2306{
2307        .d_revalidate   = proc_base_revalidate,
2308        .d_delete       = pid_delete_dentry,
2309};
2310
2311static struct dentry *proc_base_instantiate(struct inode *dir,
2312        struct dentry *dentry, struct task_struct *task, const void *ptr)
2313{
2314        const struct pid_entry *p = ptr;
2315        struct inode *inode;
2316        struct proc_inode *ei;
2317        struct dentry *error = ERR_PTR(-EINVAL);
2318
2319        /* Allocate the inode */
2320        error = ERR_PTR(-ENOMEM);
2321        inode = new_inode(dir->i_sb);
2322        if (!inode)
2323                goto out;
2324
2325        /* Initialize the inode */
2326        ei = PROC_I(inode);
2327        inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
2328
2329        /*
2330         * grab the reference to the task.
2331         */
2332        ei->pid = get_task_pid(task, PIDTYPE_PID);
2333        if (!ei->pid)
2334                goto out_iput;
2335
2336        inode->i_uid = 0;
2337        inode->i_gid = 0;
2338        inode->i_mode = p->mode;
2339        if (S_ISDIR(inode->i_mode))
2340                inode->i_nlink = 2;
2341        if (S_ISLNK(inode->i_mode))
2342                inode->i_size = 64;
2343        if (p->iop)
2344                inode->i_op = p->iop;
2345        if (p->fop)
2346                inode->i_fop = p->fop;
2347        ei->op = p->op;
2348        dentry->d_op = &proc_base_dentry_operations;
2349        d_add(dentry, inode);
2350        error = NULL;
2351out:
2352        return error;
2353out_iput:
2354        iput(inode);
2355        goto out;
2356}
2357
2358static struct dentry *proc_base_lookup(struct inode *dir, struct dentry *dentry)
2359{
2360        struct dentry *error;
2361        struct task_struct *task = get_proc_task(dir);
2362        const struct pid_entry *p, *last;
2363
2364        error = ERR_PTR(-ENOENT);
2365
2366        if (!task)
2367                goto out_no_task;
2368
2369        /* Lookup the directory entry */
2370        last = &proc_base_stuff[ARRAY_SIZE(proc_base_stuff) - 1];
2371        for (p = proc_base_stuff; p <= last; p++) {
2372                if (p->len != dentry->d_name.len)
2373                        continue;
2374                if (!memcmp(dentry->d_name.name, p->name, p->len))
2375                        break;
2376        }
2377        if (p > last)
2378                goto out;
2379
2380        error = proc_base_instantiate(dir, dentry, task, p);
2381
2382out:
2383        put_task_struct(task);
2384out_no_task:
2385        return error;
2386}
2387
2388static int proc_base_fill_cache(struct file *filp, void *dirent,
2389        filldir_t filldir, struct task_struct *task, const struct pid_entry *p)
2390{
2391        return proc_fill_cache(filp, dirent, filldir, p->name, p->len,
2392                                proc_base_instantiate, task, p);
2393}
2394
2395#ifdef CONFIG_TASK_IO_ACCOUNTING
2396static int do_io_accounting(struct task_struct *task, char *buffer, int whole)
2397{
2398        struct task_io_accounting acct = task->ioac;
2399        unsigned long flags;
2400
2401        if (whole && lock_task_sighand(task, &flags)) {
2402                struct task_struct *t = task;
2403
2404                task_io_accounting_add(&acct, &task->signal->ioac);
2405                while_each_thread(task, t)
2406                        task_io_accounting_add(&acct, &t->ioac);
2407
2408                unlock_task_sighand(task, &flags);
2409        }
2410        return sprintf(buffer,
2411                        "rchar: %llu\n"
2412                        "wchar: %llu\n"
2413                        "syscr: %llu\n"
2414                        "syscw: %llu\n"
2415                        "read_bytes: %llu\n"
2416                        "write_bytes: %llu\n"
2417                        "cancelled_write_bytes: %llu\n",
2418                        (unsigned long long)acct.rchar,
2419                        (unsigned long long)acct.wchar,
2420                        (unsigned long long)acct.syscr,
2421                        (unsigned long long)acct.syscw,
2422                        (unsigned long long)acct.read_bytes,
2423                        (unsigned long long)acct.write_bytes,
2424                        (unsigned long long)acct.cancelled_write_bytes);
2425}
2426
2427static int proc_tid_io_accounting(struct task_struct *task, char *buffer)
2428{
2429        return do_io_accounting(task, buffer, 0);
2430}
2431
2432static int proc_tgid_io_accounting(struct task_struct *task, char *buffer)
2433{
2434        return do_io_accounting(task, buffer, 1);
2435}
2436#endif /* CONFIG_TASK_IO_ACCOUNTING */
2437
2438static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns,
2439                                struct pid *pid, struct task_struct *task)
2440{
2441        seq_printf(m, "%08x\n", task->personality);
2442        return 0;
2443}
2444
2445/*
2446 * Thread groups
2447 */
2448static const struct file_operations proc_task_operations;
2449static const struct inode_operations proc_task_inode_operations;
2450
2451static const struct pid_entry tgid_base_stuff[] = {
2452        DIR("task",       S_IRUGO|S_IXUGO, task),
2453        DIR("fd",         S_IRUSR|S_IXUSR, fd),
2454        DIR("fdinfo",     S_IRUSR|S_IXUSR, fdinfo),
2455#ifdef CONFIG_NET
2456        DIR("net",        S_IRUGO|S_IXUGO, net),
2457#endif
2458        REG("environ",    S_IRUSR, environ),
2459        INF("auxv",       S_IRUSR, pid_auxv),
2460        ONE("status",     S_IRUGO, pid_status),
2461        ONE("personality", S_IRUSR, pid_personality),
2462        INF("limits",     S_IRUSR, pid_limits),
2463#ifdef CONFIG_SCHED_DEBUG
2464        REG("sched",      S_IRUGO|S_IWUSR, pid_sched),
2465#endif
2466#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
2467        INF("syscall",    S_IRUSR, pid_syscall),
2468#endif
2469        INF("cmdline",    S_IRUGO, pid_cmdline),
2470        ONE("stat",       S_IRUGO, tgid_stat),
2471        ONE("statm",      S_IRUGO, pid_statm),
2472        REG("maps",       S_IRUGO, maps),
2473#ifdef CONFIG_NUMA
2474        REG("numa_maps",  S_IRUGO, numa_maps),
2475#endif
2476        REG("mem",        S_IRUSR|S_IWUSR, mem),
2477        LNK("cwd",        cwd),
2478        LNK("root",       root),
2479        LNK("exe",        exe),
2480        REG("mounts",     S_IRUGO, mounts),
2481        REG("mountinfo",  S_IRUGO, mountinfo),
2482        REG("mountstats", S_IRUSR, mountstats),
2483#ifdef CONFIG_PROC_PAGE_MONITOR
2484        REG("clear_refs", S_IWUSR, clear_refs),
2485        REG("smaps",      S_IRUGO, smaps),
2486        REG("pagemap",    S_IRUSR, pagemap),
2487#endif
2488#ifdef CONFIG_SECURITY
2489        DIR("attr",       S_IRUGO|S_IXUGO, attr_dir),
2490#endif
2491#ifdef CONFIG_KALLSYMS
2492        INF("wchan",      S_IRUGO, pid_wchan),
2493#endif
2494#ifdef CONFIG_SCHEDSTATS
2495        INF("schedstat",  S_IRUGO, pid_schedstat),
2496#endif
2497#ifdef CONFIG_LATENCYTOP
2498        REG("latency",  S_IRUGO, lstats),
2499#endif
2500#ifdef CONFIG_PROC_PID_CPUSET
2501        REG("cpuset",     S_IRUGO, cpuset),
2502#endif
2503#ifdef CONFIG_CGROUPS
2504        REG("cgroup",  S_IRUGO, cgroup),
2505#endif
2506        INF("oom_score",  S_IRUGO, oom_score),
2507        REG("oom_adj",    S_IRUGO|S_IWUSR, oom_adjust),
2508#ifdef CONFIG_AUDITSYSCALL
2509        REG("loginuid",   S_IWUSR|S_IRUGO, loginuid),
2510        REG("sessionid",  S_IRUGO, sessionid),
2511#endif
2512#ifdef CONFIG_FAULT_INJECTION
2513        REG("make-it-fail", S_IRUGO|S_IWUSR, fault_inject),
2514#endif
2515#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
2516        REG("coredump_filter", S_IRUGO|S_IWUSR, coredump_filter),
2517#endif
2518#ifdef CONFIG_TASK_IO_ACCOUNTING
2519        INF("io",       S_IRUGO, tgid_io_accounting),
2520#endif
2521};
2522
2523static int proc_tgid_base_readdir(struct file * filp,
2524                             void * dirent, filldir_t filldir)
2525{
2526        return proc_pident_readdir(filp,dirent,filldir,
2527                                   tgid_base_stuff,ARRAY_SIZE(tgid_base_stuff));
2528}
2529
2530static const struct file_operations proc_tgid_base_operations = {
2531        .read           = generic_read_dir,
2532        .readdir        = proc_tgid_base_readdir,
2533};
2534
2535static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){
2536        return proc_pident_lookup(dir, dentry,
2537                                  tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff));
2538}
2539
2540static const struct inode_operations proc_tgid_base_inode_operations = {
2541        .lookup         = proc_tgid_base_lookup,
2542        .getattr        = pid_getattr,
2543        .setattr        = proc_setattr,
2544};
2545
2546static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid)
2547{
2548        struct dentry *dentry, *leader, *dir;
2549        char buf[PROC_NUMBUF];
2550        struct qstr name;
2551
2552        name.name = buf;
2553        name.len = snprintf(buf, sizeof(buf), "%d", pid);
2554        dentry = d_hash_and_lookup(mnt->mnt_root, &name);
2555        if (dentry) {
2556                if (!(current->flags & PF_EXITING))
2557                        shrink_dcache_parent(dentry);
2558                d_drop(dentry);
2559                dput(dentry);
2560        }
2561
2562        if (tgid == 0)
2563                goto out;
2564
2565        name.name = buf;
2566        name.len = snprintf(buf, sizeof(buf), "%d", tgid);
2567        leader = d_hash_and_lookup(mnt->mnt_root, &name);
2568        if (!leader)
2569                goto out;
2570
2571        name.name = "task";
2572        name.len = strlen(name.name);
2573        dir = d_hash_and_lookup(leader, &name);
2574        if (!dir)
2575                goto out_put_leader;
2576
2577        name.name = buf;
2578        name.len = snprintf(buf, sizeof(buf), "%d", pid);
2579        dentry = d_hash_and_lookup(dir, &name);
2580        if (dentry) {
2581                shrink_dcache_parent(dentry);
2582                d_drop(dentry);
2583                dput(dentry);
2584        }
2585
2586        dput(dir);
2587out_put_leader:
2588        dput(leader);
2589out:
2590        return;
2591}
2592
2593/**
2594 * proc_flush_task -  Remove dcache entries for @task from the /proc dcache.
2595 * @task: task that should be flushed.
2596 *
2597 * When flushing dentries from proc, one needs to flush them from global
2598 * proc (proc_mnt) and from all the namespaces' procs this task was seen
2599 * in. This call is supposed to do all of this job.
2600 *
2601 * Looks in the dcache for
2602 * /proc/@pid
2603 * /proc/@tgid/task/@pid
2604 * if either directory is present flushes it and all of it'ts children
2605 * from the dcache.
2606 *
2607 * It is safe and reasonable to cache /proc entries for a task until
2608 * that task exits.  After that they just clog up the dcache with
2609 * useless entries, possibly causing useful dcache entries to be
2610 * flushed instead.  This routine is proved to flush those useless
2611 * dcache entries at process exit time.
2612 *
2613 * NOTE: This routine is just an optimization so it does not guarantee
2614 *       that no dcache entries will exist at process exit time it
2615 *       just makes it very unlikely that any will persist.
2616 */
2617
2618void proc_flush_task(struct task_struct *task)
2619{
2620        int i;
2621        struct pid *pid, *tgid = NULL;
2622        struct upid *upid;
2623
2624        pid = task_pid(task);
2625        if (thread_group_leader(task))
2626                tgid = task_tgid(task);
2627
2628        for (i = 0; i <= pid->level; i++) {
2629                upid = &pid->numbers[i];
2630                proc_flush_task_mnt(upid->ns->proc_mnt, upid->nr,
2631                        tgid ? tgid->numbers[i].nr : 0);
2632        }
2633
2634        upid = &pid->numbers[pid->level];
2635        if (upid->nr == 1)
2636                pid_ns_release_proc(upid->ns);
2637}
2638
2639static struct dentry *proc_pid_instantiate(struct inode *dir,
2640                                           struct dentry * dentry,
2641                                           struct task_struct *task, const void *ptr)
2642{
2643        struct dentry *error = ERR_PTR(-ENOENT);
2644        struct inode *inode;
2645
2646        inode = proc_pid_make_inode(dir->i_sb, task);
2647        if (!inode)
2648                goto out;
2649
2650        inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO;
2651        inode->i_op = &proc_tgid_base_inode_operations;
2652        inode->i_fop = &proc_tgid_base_operations;
2653        inode->i_flags|=S_IMMUTABLE;
2654
2655        inode->i_nlink = 2 + pid_entry_count_dirs(tgid_base_stuff,
2656                ARRAY_SIZE(tgid_base_stuff));
2657
2658        dentry->d_op = &pid_dentry_operations;
2659
2660        d_add(dentry, inode);
2661        /* Close the race of the process dying before we return the dentry */
2662        if (pid_revalidate(dentry, NULL))
2663                error = NULL;
2664out:
2665        return error;
2666}
2667
2668struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd)
2669{
2670        struct dentry *result = ERR_PTR(-ENOENT);
2671        struct task_struct *task;
2672        unsigned tgid;
2673        struct pid_namespace *ns;
2674
2675        result = proc_base_lookup(dir, dentry);
2676        if (!IS_ERR(result) || PTR_ERR(result) != -ENOENT)
2677                goto out;
2678
2679        tgid = name_to_int(dentry);
2680        if (tgid == ~0U)
2681                goto out;
2682
2683        ns = dentry->d_sb->s_fs_info;
2684        rcu_read_lock();
2685        task = find_task_by_pid_ns(tgid, ns);
2686        if (task)
2687                get_task_struct(task);
2688        rcu_read_unlock();
2689        if (!task)
2690                goto out;
2691
2692        result = proc_pid_instantiate(dir, dentry, task, NULL);
2693        put_task_struct(task);
2694out:
2695        return result;
2696}
2697
2698/*
2699 * Find the first task with tgid >= tgid
2700 *
2701 */
2702struct tgid_iter {
2703        unsigned int tgid;
2704        struct task_struct *task;
2705};
2706static struct tgid_iter next_tgid(struct pid_namespace *ns, struct tgid_iter iter)
2707{
2708        struct pid *pid;
2709
2710        if (iter.task)
2711                put_task_struct(iter.task);
2712        rcu_read_lock();
2713retry:
2714        iter.task = NULL;
2715        pid = find_ge_pid(iter.tgid, ns);
2716        if (pid) {
2717                iter.tgid = pid_nr_ns(pid, ns);
2718                iter.task = pid_task(pid, PIDTYPE_PID);
2719                /* What we to know is if the pid we have find is the
2720                 * pid of a thread_group_leader.  Testing for task
2721                 * being a thread_group_leader is the obvious thing
2722                 * todo but there is a window when it fails, due to
2723                 * the pid transfer logic in de_thread.
2724                 *
2725                 * So we perform the straight forward test of seeing
2726                 * if the pid we have found is the pid of a thread
2727                 * group leader, and don't worry if the task we have
2728                 * found doesn't happen to be a thread group leader.
2729                 * As we don't care in the case of readdir.
2730                 */
2731                if (!iter.task || !has_group_leader_pid(iter.task)) {
2732                        iter.tgid += 1;
2733                        goto retry;
2734                }
2735                get_task_struct(iter.task);
2736        }
2737        rcu_read_unlock();
2738        return iter;
2739}
2740
2741#define TGID_OFFSET (FIRST_PROCESS_ENTRY + ARRAY_SIZE(proc_base_stuff))
2742
2743static int proc_pid_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
2744        struct tgid_iter iter)
2745{
2746        char name[PROC_NUMBUF];
2747        int len = snprintf(name, sizeof(name), "%d", iter.tgid);
2748        return proc_fill_cache(filp, dirent, filldir, name, len,
2749                                proc_pid_instantiate, iter.task, NULL);
2750}
2751
2752/* for the /proc/ directory itself, after non-process stuff has been done */
2753int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
2754{
2755        unsigned int nr = filp->f_pos - FIRST_PROCESS_ENTRY;
2756        struct task_struct *reaper = get_proc_task(filp->f_path.dentry->d_inode);
2757        struct tgid_iter iter;
2758        struct pid_namespace *ns;
2759
2760        if (!reaper)
2761                goto out_no_task;
2762
2763        for (; nr < ARRAY_SIZE(proc_base_stuff); filp->f_pos++, nr++) {
2764                const struct pid_entry *p = &proc_base_stuff[nr];
2765                if (proc_base_fill_cache(filp, dirent, filldir, reaper, p) < 0)
2766                        goto out;
2767        }
2768
2769        ns = filp->f_dentry->d_sb->s_fs_info;
2770        iter.task = NULL;
2771        iter.tgid = filp->f_pos - TGID_OFFSET;
2772        for (iter = next_tgid(ns, iter);
2773             iter.task;
2774             iter.tgid += 1, iter = next_tgid(ns, iter)) {
2775                filp->f_pos = iter.tgid + TGID_OFFSET;
2776                if (proc_pid_fill_cache(filp, dirent, filldir, iter) < 0) {
2777                        put_task_struct(iter.task);
2778                        goto out;
2779                }
2780        }
2781        filp->f_pos = PID_MAX_LIMIT + TGID_OFFSET;
2782out:
2783        put_task_struct(reaper);
2784out_no_task:
2785        return 0;
2786}
2787
2788/*
2789 * Tasks
2790 */
2791static const struct pid_entry tid_base_stuff[] = {
2792        DIR("fd",        S_IRUSR|S_IXUSR, fd),
2793        DIR("fdinfo",    S_IRUSR|S_IXUSR, fdinfo),
2794        REG("environ",   S_IRUSR, environ),
2795        INF("auxv",      S_IRUSR, pid_auxv),
2796        ONE("status",    S_IRUGO, pid_status),
2797        ONE("personality", S_IRUSR, pid_personality),
2798        INF("limits",    S_IRUSR, pid_limits),
2799#ifdef CONFIG_SCHED_DEBUG
2800        REG("sched",     S_IRUGO|S_IWUSR, pid_sched),
2801#endif
2802#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
2803        INF("syscall",   S_IRUSR, pid_syscall),
2804#endif
2805        INF("cmdline",   S_IRUGO, pid_cmdline),
2806        ONE("stat",      S_IRUGO, tid_stat),
2807        ONE("statm",     S_IRUGO, pid_statm),
2808        REG("maps",      S_IRUGO, maps),
2809#ifdef CONFIG_NUMA
2810        REG("numa_maps", S_IRUGO, numa_maps),
2811#endif
2812        REG("mem",       S_IRUSR|S_IWUSR, mem),
2813        LNK("cwd",       cwd),
2814        LNK("root",      root),
2815        LNK("exe",       exe),
2816        REG("mounts",    S_IRUGO, mounts),
2817        REG("mountinfo",  S_IRUGO, mountinfo),
2818#ifdef CONFIG_PROC_PAGE_MONITOR
2819        REG("clear_refs", S_IWUSR, clear_refs),
2820        REG("smaps",     S_IRUGO, smaps),
2821        REG("pagemap",    S_IRUSR, pagemap),
2822#endif
2823#ifdef CONFIG_SECURITY
2824        DIR("attr",      S_IRUGO|S_IXUGO, attr_dir),
2825#endif
2826#ifdef CONFIG_KALLSYMS
2827        INF("wchan",     S_IRUGO, pid_wchan),
2828#endif
2829#ifdef CONFIG_SCHEDSTATS
2830        INF("schedstat", S_IRUGO, pid_schedstat),
2831#endif
2832#ifdef CONFIG_LATENCYTOP
2833        REG("latency",  S_IRUGO, lstats),
2834#endif
2835#ifdef CONFIG_PROC_PID_CPUSET
2836        REG("cpuset",    S_IRUGO, cpuset),
2837#endif
2838#ifdef CONFIG_CGROUPS
2839        REG("cgroup",  S_IRUGO, cgroup),
2840#endif
2841        INF("oom_score", S_IRUGO, oom_score),
2842        REG("oom_adj",   S_IRUGO|S_IWUSR, oom_adjust),
2843#ifdef CONFIG_AUDITSYSCALL
2844        REG("loginuid",  S_IWUSR|S_IRUGO, loginuid),
2845        REG("sessionid",  S_IRUSR, sessionid),
2846#endif
2847#ifdef CONFIG_FAULT_INJECTION
2848        REG("make-it-fail", S_IRUGO|S_IWUSR, fault_inject),
2849#endif
2850#ifdef CONFIG_TASK_IO_ACCOUNTING
2851        INF("io",       S_IRUGO, tid_io_accounting),
2852#endif
2853};
2854
2855static int proc_tid_base_readdir(struct file * filp,
2856                             void * dirent, filldir_t filldir)
2857{
2858        return proc_pident_readdir(filp,dirent,filldir,
2859                                   tid_base_stuff,ARRAY_SIZE(tid_base_stuff));
2860}
2861
2862static struct dentry *proc_tid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){
2863        return proc_pident_lookup(dir, dentry,
2864                                  tid_base_stuff, ARRAY_SIZE(tid_base_stuff));
2865}
2866
2867static const struct file_operations proc_tid_base_operations = {
2868        .read           = generic_read_dir,
2869        .readdir        = proc_tid_base_readdir,
2870};
2871
2872static const struct inode_operations proc_tid_base_inode_operations = {
2873        .lookup         = proc_tid_base_lookup,
2874        .getattr        = pid_getattr,
2875        .setattr        = proc_setattr,
2876};
2877
2878static struct dentry *proc_task_instantiate(struct inode *dir,
2879        struct dentry *dentry, struct task_struct *task, const void *ptr)
2880{
2881        struct dentry *error = ERR_PTR(-ENOENT);
2882        struct inode *inode;
2883        inode = proc_pid_make_inode(dir->i_sb, task);
2884
2885        if (!inode)
2886                goto out;
2887        inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO;
2888        inode->i_op = &proc_tid_base_inode_operations;
2889        inode->i_fop = &proc_tid_base_operations;
2890        inode->i_flags|=S_IMMUTABLE;
2891
2892        inode->i_nlink = 2 + pid_entry_count_dirs(tid_base_stuff,
2893                ARRAY_SIZE(tid_base_stuff));
2894
2895        dentry->d_op = &pid_dentry_operations;
2896
2897        d_add(dentry, inode);
2898        /* Close the race of the process dying before we return the dentry */
2899        if (pid_revalidate(dentry, NULL))
2900                error = NULL;
2901out:
2902        return error;
2903}
2904
2905static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd)
2906{
2907        struct dentry *result = ERR_PTR(-ENOENT);
2908        struct task_struct *task;
2909        struct task_struct *leader = get_proc_task(dir);
2910        unsigned tid;
2911        struct pid_namespace *ns;
2912
2913        if (!leader)
2914                goto out_no_task;
2915
2916        tid = name_to_int(dentry);
2917        if (tid == ~0U)
2918                goto out;
2919
2920        ns = dentry->d_sb->s_fs_info;
2921        rcu_read_lock();
2922        task = find_task_by_pid_ns(tid, ns);
2923        if (task)
2924                get_task_struct(task);
2925        rcu_read_unlock();
2926        if (!task)
2927                goto out;
2928        if (!same_thread_group(leader, task))
2929                goto out_drop_task;
2930
2931        result = proc_task_instantiate(dir, dentry, task, NULL);
2932out_drop_task:
2933        put_task_struct(task);
2934out:
2935        put_task_struct(leader);
2936out_no_task:
2937        return result;
2938}
2939
2940/*
2941 * Find the first tid of a thread group to return to user space.
2942 *
2943 * Usually this is just the thread group leader, but if the users
2944 * buffer was too small or there was a seek into the middle of the
2945 * directory we have more work todo.
2946 *
2947 * In the case of a short read we start with find_task_by_pid.
2948 *
2949 * In the case of a seek we start with the leader and walk nr
2950 * threads past it.
2951 */
2952static struct task_struct *first_tid(struct task_struct *leader,
2953                int tid, int nr, struct pid_namespace *ns)
2954{
2955        struct task_struct *pos;
2956
2957        rcu_read_lock();
2958        /* Attempt to start with the pid of a thread */
2959        if (tid && (nr > 0)) {
2960                pos = find_task_by_pid_ns(tid, ns);
2961                if (pos && (pos->group_leader == leader))
2962                        goto found;
2963        }
2964
2965        /* If nr exceeds the number of threads there is nothing todo */
2966        pos = NULL;
2967        if (nr && nr >= get_nr_threads(leader))
2968                goto out;
2969
2970        /* If we haven't found our starting place yet start
2971         * with the leader and walk nr threads forward.
2972         */
2973        for (pos = leader; nr > 0; --nr) {
2974                pos = next_thread(pos);
2975                if (pos == leader) {
2976                        pos = NULL;
2977                        goto out;
2978                }
2979        }
2980found:
2981        get_task_struct(pos);
2982out:
2983        rcu_read_unlock();
2984        return pos;
2985}
2986
2987/*
2988 * Find the next thread in the thread list.
2989 * Return NULL if there is an error or no next thread.
2990 *
2991 * The reference to the input task_struct is released.
2992 */
2993static struct task_struct *next_tid(struct task_struct *start)
2994{
2995        struct task_struct *pos = NULL;
2996        rcu_read_lock();
2997        if (pid_alive(start)) {
2998                pos = next_thread(start);
2999                if (thread_group_leader(pos))
3000                        pos = NULL;
3001                else
3002                        get_task_struct(pos);
3003        }
3004        rcu_read_unlock();
3005        put_task_struct(start);
3006        return pos;
3007}
3008
3009static int proc_task_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
3010        struct task_struct *task, int tid)
3011{
3012        char name[PROC_NUMBUF];
3013        int len = snprintf(name, sizeof(name), "%d", tid);
3014        return proc_fill_cache(filp, dirent, filldir, name, len,
3015                                proc_task_instantiate, task, NULL);
3016}
3017
3018/* for the /proc/TGID/task/ directories */
3019static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldir)
3020{
3021        struct dentry *dentry = filp->f_path.dentry;
3022        struct inode *inode = dentry->d_inode;
3023        struct task_struct *leader = NULL;
3024        struct task_struct *task;
3025        int retval = -ENOENT;
3026        ino_t ino;
3027        int tid;
3028        unsigned long pos = filp->f_pos;  /* avoiding "long long" filp->f_pos */
3029        struct pid_namespace *ns;
3030
3031        task = get_proc_task(inode);
3032        if (!task)
3033                goto out_no_task;
3034        rcu_read_lock();
3035        if (pid_alive(task)) {
3036                leader = task->group_leader;
3037                get_task_struct(leader);
3038        }
3039        rcu_read_unlock();
3040        put_task_struct(task);
3041        if (!leader)
3042                goto out_no_task;
3043        retval = 0;
3044
3045        switch (pos) {
3046        case 0:
3047                ino = inode->i_ino;
3048                if (filldir(dirent, ".", 1, pos, ino, DT_DIR) < 0)
3049                        goto out;
3050                pos++;
3051                /* fall through */
3052        case 1:
3053                ino = parent_ino(dentry);
3054                if (filldir(dirent, "..", 2, pos, ino, DT_DIR) < 0)
3055                        goto out;
3056                pos++;
3057                /* fall through */
3058        }
3059
3060        /* f_version caches the tgid value that the last readdir call couldn't
3061         * return. lseek aka telldir automagically resets f_version to 0.
3062         */
3063        ns = filp->f_dentry->d_sb->s_fs_info;
3064        tid = (int)filp->f_version;
3065        filp->f_version = 0;
3066        for (task = first_tid(leader, tid, pos - 2, ns);
3067             task;
3068             task = next_tid(task), pos++) {
3069                tid = task_pid_nr_ns(task, ns);
3070                if (proc_task_fill_cache(filp, dirent, filldir, task, tid) < 0) {
3071                        /* returning this tgid failed, save it as the first
3072                         * pid for the next readir call */
3073                        filp->f_version = (u64)tid;
3074                        put_task_struct(task);
3075                        break;
3076                }
3077        }
3078out:
3079        filp->f_pos = pos;
3080        put_task_struct(leader);
3081out_no_task:
3082        return retval;
3083}
3084
3085static int proc_task_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
3086{
3087        struct inode *inode = dentry->d_inode;
3088        struct task_struct *p = get_proc_task(inode);
3089        generic_fillattr(inode, stat);
3090
3091        if (p) {
3092                stat->nlink += get_nr_threads(p);
3093                put_task_struct(p);
3094        }
3095
3096        return 0;
3097}
3098
3099static const struct inode_operations proc_task_inode_operations = {
3100        .lookup         = proc_task_lookup,
3101        .getattr        = proc_task_getattr,
3102        .setattr        = proc_setattr,
3103};
3104
3105static const struct file_operations proc_task_operations = {
3106        .read           = generic_read_dir,
3107        .readdir        = proc_task_readdir,
3108};
3109
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.