linux/kernel/softirq.c
<<
>>
Prefs
   1/*
   2 *      linux/kernel/softirq.c
   3 *
   4 *      Copyright (C) 1992 Linus Torvalds
   5 *
   6 *      Distribute under GPLv2.
   7 *
   8 *      Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903)
   9 */
  10
  11#include <linux/module.h>
  12#include <linux/kernel_stat.h>
  13#include <linux/interrupt.h>
  14#include <linux/init.h>
  15#include <linux/mm.h>
  16#include <linux/notifier.h>
  17#include <linux/percpu.h>
  18#include <linux/cpu.h>
  19#include <linux/freezer.h>
  20#include <linux/kthread.h>
  21#include <linux/rcupdate.h>
  22#include <linux/smp.h>
  23#include <linux/tick.h>
  24
  25#include <asm/irq.h>
  26/*
  27   - No shared variables, all the data are CPU local.
  28   - If a softirq needs serialization, let it serialize itself
  29     by its own spinlocks.
  30   - Even if softirq is serialized, only local cpu is marked for
  31     execution. Hence, we get something sort of weak cpu binding.
  32     Though it is still not clear, will it result in better locality
  33     or will not.
  34
  35   Examples:
  36   - NET RX softirq. It is multithreaded and does not require
  37     any global serialization.
  38   - NET TX softirq. It kicks software netdevice queues, hence
  39     it is logically serialized per device, but this serialization
  40     is invisible to common code.
  41   - Tasklets: serialized wrt itself.
  42 */
  43
  44#ifndef __ARCH_IRQ_STAT
  45irq_cpustat_t irq_stat[NR_CPUS] ____cacheline_aligned;
  46EXPORT_SYMBOL(irq_stat);
  47#endif
  48
  49static struct softirq_action softirq_vec[32] __cacheline_aligned_in_smp;
  50
  51static DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
  52
  53/*
  54 * we cannot loop indefinitely here to avoid userspace starvation,
  55 * but we also don't want to introduce a worst case 1/HZ latency
  56 * to the pending events, so lets the scheduler to balance
  57 * the softirq load for us.
  58 */
  59static inline void wakeup_softirqd(void)
  60{
  61        /* Interrupts are disabled: no need to stop preemption */
  62        struct task_struct *tsk = __get_cpu_var(ksoftirqd);
  63
  64        if (tsk && tsk->state != TASK_RUNNING)
  65                wake_up_process(tsk);
  66}
  67
  68/*
  69 * This one is for softirq.c-internal use,
  70 * where hardirqs are disabled legitimately:
  71 */
  72#ifdef CONFIG_TRACE_IRQFLAGS
  73static void __local_bh_disable(unsigned long ip)
  74{
  75        unsigned long flags;
  76
  77        WARN_ON_ONCE(in_irq());
  78
  79        raw_local_irq_save(flags);
  80        add_preempt_count(SOFTIRQ_OFFSET);
  81        /*
  82         * Were softirqs turned off above:
  83         */
  84        if (softirq_count() == SOFTIRQ_OFFSET)
  85                trace_softirqs_off(ip);
  86        raw_local_irq_restore(flags);
  87}
  88#else /* !CONFIG_TRACE_IRQFLAGS */
  89static inline void __local_bh_disable(unsigned long ip)
  90{
  91        add_preempt_count(SOFTIRQ_OFFSET);
  92        barrier();
  93}
  94#endif /* CONFIG_TRACE_IRQFLAGS */
  95
  96void local_bh_disable(void)
  97{
  98        __local_bh_disable((unsigned long)__builtin_return_address(0));
  99}
 100
 101EXPORT_SYMBOL(local_bh_disable);
 102
 103void __local_bh_enable(void)
 104{
 105        WARN_ON_ONCE(in_irq());
 106
 107        /*
 108         * softirqs should never be enabled by __local_bh_enable(),
 109         * it always nests inside local_bh_enable() sections:
 110         */
 111        WARN_ON_ONCE(softirq_count() == SOFTIRQ_OFFSET);
 112
 113        sub_preempt_count(SOFTIRQ_OFFSET);
 114}
 115EXPORT_SYMBOL_GPL(__local_bh_enable);
 116
 117/*
 118 * Special-case - softirqs can safely be enabled in
 119 * cond_resched_softirq(), or by __do_softirq(),
 120 * without processing still-pending softirqs:
 121 */
 122void _local_bh_enable(void)
 123{
 124        WARN_ON_ONCE(in_irq());
 125        WARN_ON_ONCE(!irqs_disabled());
 126
 127        if (softirq_count() == SOFTIRQ_OFFSET)
 128                trace_softirqs_on((unsigned long)__builtin_return_address(0));
 129        sub_preempt_count(SOFTIRQ_OFFSET);
 130}
 131
 132EXPORT_SYMBOL(_local_bh_enable);
 133
 134void local_bh_enable(void)
 135{
 136#ifdef CONFIG_TRACE_IRQFLAGS
 137        unsigned long flags;
 138
 139        WARN_ON_ONCE(in_irq());
 140#endif
 141        WARN_ON_ONCE(irqs_disabled());
 142
 143#ifdef CONFIG_TRACE_IRQFLAGS
 144        local_irq_save(flags);
 145#endif
 146        /*
 147         * Are softirqs going to be turned on now:
 148         */
 149        if (softirq_count() == SOFTIRQ_OFFSET)
 150                trace_softirqs_on((unsigned long)__builtin_return_address(0));
 151        /*
 152         * Keep preemption disabled until we are done with
 153         * softirq processing:
 154         */
 155        sub_preempt_count(SOFTIRQ_OFFSET - 1);
 156
 157        if (unlikely(!in_interrupt() && local_softirq_pending()))
 158                do_softirq();
 159
 160        dec_preempt_count();
 161#ifdef CONFIG_TRACE_IRQFLAGS
 162        local_irq_restore(flags);
 163#endif
 164        preempt_check_resched();
 165}
 166EXPORT_SYMBOL(local_bh_enable);
 167
 168void local_bh_enable_ip(unsigned long ip)
 169{
 170#ifdef CONFIG_TRACE_IRQFLAGS
 171        unsigned long flags;
 172
 173        WARN_ON_ONCE(in_irq());
 174
 175        local_irq_save(flags);
 176#endif
 177        /*
 178         * Are softirqs going to be turned on now:
 179         */
 180        if (softirq_count() == SOFTIRQ_OFFSET)
 181                trace_softirqs_on(ip);
 182        /*
 183         * Keep preemption disabled until we are done with
 184         * softirq processing:
 185         */
 186        sub_preempt_count(SOFTIRQ_OFFSET - 1);
 187
 188        if (unlikely(!in_interrupt() && local_softirq_pending()))
 189                do_softirq();
 190
 191        dec_preempt_count();
 192#ifdef CONFIG_TRACE_IRQFLAGS
 193        local_irq_restore(flags);
 194#endif
 195        preempt_check_resched();
 196}
 197EXPORT_SYMBOL(local_bh_enable_ip);
 198
 199/*
 200 * We restart softirq processing MAX_SOFTIRQ_RESTART times,
 201 * and we fall back to softirqd after that.
 202 *
 203 * This number has been established via experimentation.
 204 * The two things to balance is latency against fairness -
 205 * we want to handle softirqs as soon as possible, but they
 206 * should not be able to lock up the box.
 207 */
 208#define MAX_SOFTIRQ_RESTART 10
 209
 210asmlinkage void __do_softirq(void)
 211{
 212        struct softirq_action *h;
 213        __u32 pending;
 214        int max_restart = MAX_SOFTIRQ_RESTART;
 215        int cpu;
 216
 217        pending = local_softirq_pending();
 218        account_system_vtime(current);
 219
 220        __local_bh_disable((unsigned long)__builtin_return_address(0));
 221        trace_softirq_enter();
 222
 223        cpu = smp_processor_id();
 224restart:
 225        /* Reset the pending bitmask before enabling irqs */
 226        set_softirq_pending(0);
 227
 228        local_irq_enable();
 229
 230        h = softirq_vec;
 231
 232        do {
 233                if (pending & 1) {
 234                        h->action(h);
 235                        rcu_bh_qsctr_inc(cpu);
 236                }
 237                h++;
 238                pending >>= 1;
 239        } while (pending);
 240
 241        local_irq_disable();
 242
 243        pending = local_softirq_pending();
 244        if (pending && --max_restart)
 245                goto restart;
 246
 247        if (pending)
 248                wakeup_softirqd();
 249
 250        trace_softirq_exit();
 251
 252        account_system_vtime(current);
 253        _local_bh_enable();
 254}
 255
 256#ifndef __ARCH_HAS_DO_SOFTIRQ
 257
 258asmlinkage void do_softirq(void)
 259{
 260        __u32 pending;
 261        unsigned long flags;
 262
 263        if (in_interrupt())
 264                return;
 265
 266        local_irq_save(flags);
 267
 268        pending = local_softirq_pending();
 269
 270        if (pending)
 271                __do_softirq();
 272
 273        local_irq_restore(flags);
 274}
 275
 276#endif
 277
 278/*
 279 * Enter an interrupt context.
 280 */
 281void irq_enter(void)
 282{
 283#ifdef CONFIG_NO_HZ
 284        int cpu = smp_processor_id();
 285        if (idle_cpu(cpu) && !in_interrupt())
 286                tick_nohz_stop_idle(cpu);
 287#endif
 288        __irq_enter();
 289#ifdef CONFIG_NO_HZ
 290        if (idle_cpu(cpu))
 291                tick_nohz_update_jiffies();
 292#endif
 293}
 294
 295#ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED
 296# define invoke_softirq()       __do_softirq()
 297#else
 298# define invoke_softirq()       do_softirq()
 299#endif
 300
 301/*
 302 * Exit an interrupt context. Process softirqs if needed and possible:
 303 */
 304void irq_exit(void)
 305{
 306        account_system_vtime(current);
 307        trace_hardirq_exit();
 308        sub_preempt_count(IRQ_EXIT_OFFSET);
 309        if (!in_interrupt() && local_softirq_pending())
 310                invoke_softirq();
 311
 312#ifdef CONFIG_NO_HZ
 313        /* Make sure that timer wheel updates are propagated */
 314        if (!in_interrupt() && idle_cpu(smp_processor_id()) && !need_resched())
 315                tick_nohz_stop_sched_tick();
 316        rcu_irq_exit();
 317#endif
 318        preempt_enable_no_resched();
 319}
 320
 321/*
 322 * This function must run with irqs disabled!
 323 */
 324inline void raise_softirq_irqoff(unsigned int nr)
 325{
 326        __raise_softirq_irqoff(nr);
 327
 328        /*
 329         * If we're in an interrupt or softirq, we're done
 330         * (this also catches softirq-disabled code). We will
 331         * actually run the softirq once we return from
 332         * the irq or softirq.
 333         *
 334         * Otherwise we wake up ksoftirqd to make sure we
 335         * schedule the softirq soon.
 336         */
 337        if (!in_interrupt())
 338                wakeup_softirqd();
 339}
 340
 341void raise_softirq(unsigned int nr)
 342{
 343        unsigned long flags;
 344
 345        local_irq_save(flags);
 346        raise_softirq_irqoff(nr);
 347        local_irq_restore(flags);
 348}
 349
 350void open_softirq(int nr, void (*action)(struct softirq_action*), void *data)
 351{
 352        softirq_vec[nr].data = data;
 353        softirq_vec[nr].action = action;
 354}
 355
 356/* Tasklets */
 357struct tasklet_head
 358{
 359        struct tasklet_struct *head;
 360        struct tasklet_struct **tail;
 361};
 362
 363/* Some compilers disobey section attribute on statics when not
 364   initialized -- RR */
 365static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec) = { NULL };
 366static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec) = { NULL };
 367
 368void __tasklet_schedule(struct tasklet_struct *t)
 369{
 370        unsigned long flags;
 371
 372        local_irq_save(flags);
 373        t->next = NULL;
 374        *__get_cpu_var(tasklet_vec).tail = t;
 375        __get_cpu_var(tasklet_vec).tail = &(t->next);
 376        raise_softirq_irqoff(TASKLET_SOFTIRQ);
 377        local_irq_restore(flags);
 378}
 379
 380EXPORT_SYMBOL(__tasklet_schedule);
 381
 382void __tasklet_hi_schedule(struct tasklet_struct *t)
 383{
 384        unsigned long flags;
 385
 386        local_irq_save(flags);
 387        t->next = NULL;
 388        *__get_cpu_var(tasklet_hi_vec).tail = t;
 389        __get_cpu_var(tasklet_hi_vec).tail = &(t->next);
 390        raise_softirq_irqoff(HI_SOFTIRQ);
 391        local_irq_restore(flags);
 392}
 393
 394EXPORT_SYMBOL(__tasklet_hi_schedule);
 395
 396static void tasklet_action(struct softirq_action *a)
 397{
 398        struct tasklet_struct *list;
 399
 400        local_irq_disable();
 401        list = __get_cpu_var(tasklet_vec).head;
 402        __get_cpu_var(tasklet_vec).head = NULL;
 403        __get_cpu_var(tasklet_vec).tail = &__get_cpu_var(tasklet_vec).head;
 404        local_irq_enable();
 405
 406        while (list) {
 407                struct tasklet_struct *t = list;
 408
 409                list = list->next;
 410
 411                if (tasklet_trylock(t)) {
 412                        if (!atomic_read(&t->count)) {
 413                                if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
 414                                        BUG();
 415                                t->func(t->data);
 416                                tasklet_unlock(t);
 417                                continue;
 418                        }
 419                        tasklet_unlock(t);
 420                }
 421
 422                local_irq_disable();
 423                t->next = NULL;
 424                *__get_cpu_var(tasklet_vec).tail = t;
 425                __get_cpu_var(tasklet_vec).tail = &(t->next);
 426                __raise_softirq_irqoff(TASKLET_SOFTIRQ);
 427                local_irq_enable();
 428        }
 429}
 430
 431static void tasklet_hi_action(struct softirq_action *a)
 432{
 433        struct tasklet_struct *list;
 434
 435        local_irq_disable();
 436        list = __get_cpu_var(tasklet_hi_vec).head;
 437        __get_cpu_var(tasklet_hi_vec).head = NULL;
 438        __get_cpu_var(tasklet_hi_vec).tail = &__get_cpu_var(tasklet_hi_vec).head;
 439        local_irq_enable();
 440
 441        while (list) {
 442                struct tasklet_struct *t = list;
 443
 444                list = list->next;
 445
 446                if (tasklet_trylock(t)) {
 447                        if (!atomic_read(&t->count)) {
 448                                if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
 449                                        BUG();
 450                                t->func(t->data);
 451                                tasklet_unlock(t);
 452                                continue;
 453                        }
 454                        tasklet_unlock(t);
 455                }
 456
 457                local_irq_disable();
 458                t->next = NULL;
 459                *__get_cpu_var(tasklet_hi_vec).tail = t;
 460                __get_cpu_var(tasklet_hi_vec).tail = &(t->next);
 461                __raise_softirq_irqoff(HI_SOFTIRQ);
 462                local_irq_enable();
 463        }
 464}
 465
 466
 467void tasklet_init(struct tasklet_struct *t,
 468                  void (*func)(unsigned long), unsigned long data)
 469{
 470        t->next = NULL;
 471        t->state = 0;
 472        atomic_set(&t->count, 0);
 473        t->func = func;
 474        t->data = data;
 475}
 476
 477EXPORT_SYMBOL(tasklet_init);
 478
 479void tasklet_kill(struct tasklet_struct *t)
 480{
 481        if (in_interrupt())
 482                printk("Attempt to kill tasklet from interrupt\n");
 483
 484        while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
 485                do
 486                        yield();
 487                while (test_bit(TASKLET_STATE_SCHED, &t->state));
 488        }
 489        tasklet_unlock_wait(t);
 490        clear_bit(TASKLET_STATE_SCHED, &t->state);
 491}
 492
 493EXPORT_SYMBOL(tasklet_kill);
 494
 495void __init softirq_init(void)
 496{
 497        int cpu;
 498
 499        for_each_possible_cpu(cpu) {
 500                per_cpu(tasklet_vec, cpu).tail =
 501                        &per_cpu(tasklet_vec, cpu).head;
 502                per_cpu(tasklet_hi_vec, cpu).tail =
 503                        &per_cpu(tasklet_hi_vec, cpu).head;
 504        }
 505
 506        open_softirq(TASKLET_SOFTIRQ, tasklet_action, NULL);
 507        open_softirq(HI_SOFTIRQ, tasklet_hi_action, NULL);
 508}
 509
 510static int ksoftirqd(void * __bind_cpu)
 511{
 512        set_current_state(TASK_INTERRUPTIBLE);
 513
 514        while (!kthread_should_stop()) {
 515                preempt_disable();
 516                if (!local_softirq_pending()) {
 517                        preempt_enable_no_resched();
 518                        schedule();
 519                        preempt_disable();
 520                }
 521
 522                __set_current_state(TASK_RUNNING);
 523
 524                while (local_softirq_pending()) {
 525                        /* Preempt disable stops cpu going offline.
 526                           If already offline, we'll be on wrong CPU:
 527                           don't process */
 528                        if (cpu_is_offline((long)__bind_cpu))
 529                                goto wait_to_die;
 530                        do_softirq();
 531                        preempt_enable_no_resched();
 532                        cond_resched();
 533                        preempt_disable();
 534                }
 535                preempt_enable();
 536                set_current_state(TASK_INTERRUPTIBLE);
 537        }
 538        __set_current_state(TASK_RUNNING);
 539        return 0;
 540
 541wait_to_die:
 542        preempt_enable();
 543        /* Wait for kthread_stop */
 544        set_current_state(TASK_INTERRUPTIBLE);
 545        while (!kthread_should_stop()) {
 546                schedule();
 547                set_current_state(TASK_INTERRUPTIBLE);
 548        }
 549        __set_current_state(TASK_RUNNING);
 550        return 0;
 551}
 552
 553#ifdef CONFIG_HOTPLUG_CPU
 554/*
 555 * tasklet_kill_immediate is called to remove a tasklet which can already be
 556 * scheduled for execution on @cpu.
 557 *
 558 * Unlike tasklet_kill, this function removes the tasklet
 559 * _immediately_, even if the tasklet is in TASKLET_STATE_SCHED state.
 560 *
 561 * When this function is called, @cpu must be in the CPU_DEAD state.
 562 */
 563void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu)
 564{
 565        struct tasklet_struct **i;
 566
 567        BUG_ON(cpu_online(cpu));
 568        BUG_ON(test_bit(TASKLET_STATE_RUN, &t->state));
 569
 570        if (!test_bit(TASKLET_STATE_SCHED, &t->state))
 571                return;
 572
 573        /* CPU is dead, so no lock needed. */
 574        for (i = &per_cpu(tasklet_vec, cpu).head; *i; i = &(*i)->next) {
 575                if (*i == t) {
 576                        *i = t->next;
 577                        /* If this was the tail element, move the tail ptr */
 578                        if (*i == NULL)
 579                                per_cpu(tasklet_vec, cpu).tail = i;
 580                        return;
 581                }
 582        }
 583        BUG();
 584}
 585
 586static void takeover_tasklets(unsigned int cpu)
 587{
 588        /* CPU is dead, so no lock needed. */
 589        local_irq_disable();
 590
 591        /* Find end, append list for that CPU. */
 592        if (&per_cpu(tasklet_vec, cpu).head != per_cpu(tasklet_vec, cpu).tail) {
 593                *(__get_cpu_var(tasklet_vec).tail) = per_cpu(tasklet_vec, cpu).head;
 594                __get_cpu_var(tasklet_vec).tail = per_cpu(tasklet_vec, cpu).tail;
 595                per_cpu(tasklet_vec, cpu).head = NULL;
 596                per_cpu(tasklet_vec, cpu).tail = &per_cpu(tasklet_vec, cpu).head;
 597        }
 598        raise_softirq_irqoff(TASKLET_SOFTIRQ);
 599
 600        if (&per_cpu(tasklet_hi_vec, cpu).head != per_cpu(tasklet_hi_vec, cpu).tail) {
 601                *__get_cpu_var(tasklet_hi_vec).tail = per_cpu(tasklet_hi_vec, cpu).head;
 602                __get_cpu_var(tasklet_hi_vec).tail = per_cpu(tasklet_hi_vec, cpu).tail;
 603                per_cpu(tasklet_hi_vec, cpu).head = NULL;
 604                per_cpu(tasklet_hi_vec, cpu).tail = &per_cpu(tasklet_hi_vec, cpu).head;
 605        }
 606        raise_softirq_irqoff(HI_SOFTIRQ);
 607
 608        local_irq_enable();
 609}
 610#endif /* CONFIG_HOTPLUG_CPU */
 611
 612static int __cpuinit cpu_callback(struct notifier_block *nfb,
 613                                  unsigned long action,
 614                                  void *hcpu)
 615{
 616        int hotcpu = (unsigned long)hcpu;
 617        struct task_struct *p;
 618
 619        switch (action) {
 620        case CPU_UP_PREPARE:
 621        case CPU_UP_PREPARE_FROZEN:
 622                p = kthread_create(ksoftirqd, hcpu, "ksoftirqd/%d", hotcpu);
 623                if (IS_ERR(p)) {
 624                        printk("ksoftirqd for %i failed\n", hotcpu);
 625                        return NOTIFY_BAD;
 626                }
 627                kthread_bind(p, hotcpu);
 628                per_cpu(ksoftirqd, hotcpu) = p;
 629                break;
 630        case CPU_ONLINE:
 631        case CPU_ONLINE_FROZEN:
 632                wake_up_process(per_cpu(ksoftirqd, hotcpu));
 633                break;
 634#ifdef CONFIG_HOTPLUG_CPU
 635        case CPU_UP_CANCELED:
 636        case CPU_UP_CANCELED_FROZEN:
 637                if (!per_cpu(ksoftirqd, hotcpu))
 638                        break;
 639                /* Unbind so it can run.  Fall thru. */
 640                kthread_bind(per_cpu(ksoftirqd, hotcpu),
 641                             any_online_cpu(cpu_online_map));
 642        case CPU_DEAD:
 643        case CPU_DEAD_FROZEN: {
 644                struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
 645
 646                p = per_cpu(ksoftirqd, hotcpu);
 647                per_cpu(ksoftirqd, hotcpu) = NULL;
 648                sched_setscheduler(p, SCHED_FIFO, &param);
 649                kthread_stop(p);
 650                takeover_tasklets(hotcpu);
 651                break;
 652        }
 653#endif /* CONFIG_HOTPLUG_CPU */
 654        }
 655        return NOTIFY_OK;
 656}
 657
 658static struct notifier_block __cpuinitdata cpu_nfb = {
 659        .notifier_call = cpu_callback
 660};
 661
 662__init int spawn_ksoftirqd(void)
 663{
 664        void *cpu = (void *)(long)smp_processor_id();
 665        int err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
 666
 667        BUG_ON(err == NOTIFY_BAD);
 668        cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
 669        register_cpu_notifier(&cpu_nfb);
 670        return 0;
 671}
 672
 673#ifdef CONFIG_SMP
 674/*
 675 * Call a function on all processors
 676 */
 677int on_each_cpu(void (*func) (void *info), void *info, int retry, int wait)
 678{
 679        int ret = 0;
 680
 681        preempt_disable();
 682        ret = smp_call_function(func, info, retry, wait);
 683        local_irq_disable();
 684        func(info);
 685        local_irq_enable();
 686        preempt_enable();
 687        return ret;
 688}
 689EXPORT_SYMBOL(on_each_cpu);
 690#endif
 691
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.