--- kernel/sched/bfs.c | 271 ++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 186 insertions(+), 85 deletions(-) Index: linux-3.3-ck1/kernel/sched/bfs.c =================================================================== --- linux-3.3-ck1.orig/kernel/sched/bfs.c 2012-03-24 19:30:29.000000000 +1100 +++ linux-3.3-ck1/kernel/sched/bfs.c 2012-03-25 17:42:50.632014990 +1100 @@ -171,6 +171,7 @@ static inline int timeslice(void) */ struct global_rq { raw_spinlock_t lock; + rwlock_t rwlock; unsigned long nr_running; unsigned long nr_uninterruptible; unsigned long long nr_switches; @@ -461,90 +462,184 @@ static inline bool task_running(struct t return p->on_cpu; } -static inline void grq_lock(void) +static inline void grq_rlock(void) __acquires(grq.lock) + __acquires(grq.rwlock) + __releases(grq.lock) +{ + raw_spin_lock(&grq.lock); + do_raw_read_lock(&grq.rwlock); + raw_spin_unlock(&grq.lock); +} + +static inline void grq_wlock(void) + __acquires(grq.lock) + __acquires(grq.rwlock) { raw_spin_lock(&grq.lock); + do_raw_write_lock(&grq.rwlock); } -static inline void grq_unlock(void) +static inline void grq_runlock(void) + __releases(grq.rwlock) +{ + do_raw_read_unlock(&grq.rwlock); +} + +static inline void grq_wunlock(void) + __releases(grq.rwlock) __releases(grq.lock) { + do_raw_write_unlock(&grq.rwlock); raw_spin_unlock(&grq.lock); } -static inline void grq_lock_irq(void) +static inline void grq_rlock_irq(void) __acquires(grq.lock) + __acquires(grq.rwlock) + __releases(grq.lock) { raw_spin_lock_irq(&grq.lock); + do_raw_read_lock(&grq.rwlock); + raw_spin_unlock(&grq.lock); } -static inline void time_lock_grq(struct rq *rq) +static inline void grq_wlock_irq(void) __acquires(grq.lock) + __acquires(grq.rwlock) { - grq_lock(); + raw_spin_lock_irq(&grq.lock); + do_raw_write_lock(&grq.rwlock); +} + +static inline void time_wlock_grq(struct rq *rq) + __acquires(grq.lock) + __acquires(grq.rwlock) +{ + grq_wlock(); update_clocks(rq); } -static inline void grq_unlock_irq(void) +static inline void grq_runlock_irq(void) + __releases(grq.rwlock) +{ + do_raw_read_unlock(&grq.rwlock); + local_irq_enable(); +} + +static inline void grq_wunlock_irq(void) + __releases(grq.rwlock) __releases(grq.lock) { + do_raw_write_unlock(&grq.rwlock); raw_spin_unlock_irq(&grq.lock); } -static inline void grq_lock_irqsave(unsigned long *flags) +static inline void grq_rlock_irqsave(unsigned long *flags) + __acquires(grq.lock) + __acquires(grq.rwlock) + __releases(grq.lock) +{ + raw_spin_lock_irqsave(&grq.lock, *flags); + do_raw_read_lock(&grq.rwlock); + raw_spin_unlock(&grq.lock); +} + +static inline void grq_wlock_irqsave(unsigned long *flags) __acquires(grq.lock) + __acquires(grq.rwlock) { raw_spin_lock_irqsave(&grq.lock, *flags); + do_raw_write_lock(&grq.rwlock); } -static inline void grq_unlock_irqrestore(unsigned long *flags) +static inline void grq_runlock_irqrestore(unsigned long *flags) + __releases(grq.rwlock) +{ + do_raw_read_unlock(&grq.rwlock); + local_irq_restore(*flags); +} + +static inline void grq_wunlock_irqrestore(unsigned long *flags) + __releases(grq.rwlock) __releases(grq.lock) { + do_raw_write_unlock(&grq.rwlock); raw_spin_unlock_irqrestore(&grq.lock, *flags); } static inline struct rq -*task_grq_lock(struct task_struct *p, unsigned long *flags) +*task_grq_rlock(struct task_struct *p, unsigned long *flags) + __acquires(grq.lock) + __acquires(grq.rwlock) + __releases(grq.lock) +{ + grq_rlock_irqsave(flags); + return task_rq(p); +} + +static inline struct rq +*task_grq_wlock(struct task_struct *p, unsigned long *flags) __acquires(grq.lock) + __acquires(grq.rwlock) { - grq_lock_irqsave(flags); + grq_wlock_irqsave(flags); return task_rq(p); } static inline struct rq -*time_task_grq_lock(struct task_struct *p, unsigned long *flags) +*time_task_grq_wlock(struct task_struct *p, unsigned long *flags) __acquires(grq.lock) + __acquires(grq.rwlock) { - struct rq *rq = task_grq_lock(p, flags); + struct rq *rq = task_grq_wlock(p, flags); update_clocks(rq); return rq; } -static inline struct rq *task_grq_lock_irq(struct task_struct *p) +static inline struct rq *task_grq_rlock_irq(struct task_struct *p) __acquires(grq.lock) + __acquires(grq.rwlock) + __releases(grq.lock) +{ + grq_rlock_irq(); + return task_rq(p); +} + +static inline struct rq *task_grq_wlock_irq(struct task_struct *p) + __acquires(grq.lock) + __acquires(grq.rwlock) { - grq_lock_irq(); + grq_wlock_irq(); return task_rq(p); } -static inline void time_task_grq_lock_irq(struct task_struct *p) +static inline void time_task_grq_wlock_irq(struct task_struct *p) __acquires(grq.lock) + __acquires(grq.rwlock) { - struct rq *rq = task_grq_lock_irq(p); + struct rq *rq = task_grq_wlock_irq(p); update_clocks(rq); } -static inline void task_grq_unlock_irq(void) +static inline void task_grq_wunlock_irq(void) + __releases(grq.rwlock) __releases(grq.lock) { - grq_unlock_irq(); + grq_wunlock_irq(); } -static inline void task_grq_unlock(unsigned long *flags) +static inline void task_grq_runlock(unsigned long *flags) + __releases(grq.rwlock) +{ + grq_runlock_irqrestore(flags); +} + +static inline void task_grq_wunlock(unsigned long *flags) + __releases(grq.rwlock) __releases(grq.lock) { - grq_unlock_irqrestore(flags); + grq_wunlock_irqrestore(flags); } /** @@ -559,31 +654,36 @@ bool grunqueue_is_locked(void) return raw_spin_is_locked(&grq.lock); } +#if 0 +/* Unused? */ void grq_unlock_wait(void) __releases(grq.lock) { smp_mb(); /* spin-unlock-wait is not a full memory barrier */ raw_spin_unlock_wait(&grq.lock); } - -static inline void time_grq_lock(struct rq *rq, unsigned long *flags) +#endif +static inline void time_grq_wlock(struct rq *rq, unsigned long *flags) __acquires(grq.lock) + __acquires(grq.rwlock) { local_irq_save(*flags); - time_lock_grq(rq); + time_wlock_grq(rq); } -static inline struct rq *__task_grq_lock(struct task_struct *p) +static inline struct rq *__task_grq_wlock(struct task_struct *p) __acquires(grq.lock) + __acquires(grq.rwlock) { - grq_lock(); + grq_wlock(); return task_rq(p); } -static inline void __task_grq_unlock(void) +static inline void __task_grq_wunlock(void) + __releases(grq.rwlock) __releases(grq.lock) { - grq_unlock(); + grq_wunlock(); } /* @@ -617,6 +717,7 @@ static inline void finish_lock_switch(st #ifdef CONFIG_DEBUG_SPINLOCK /* this is a valid case when another task releases the spinlock */ grq.lock.owner = current; + grq.rwlock.owner = current; #endif /* * If we are tracking spinlock dependencies then we have to @@ -624,8 +725,9 @@ static inline void finish_lock_switch(st * prev into current: */ spin_acquire(&grq.lock.dep_map, 0, 0, _THIS_IP_); + rwlock_acquire(&grq.rwlock.dep_map, 0, 0, _THIS_IP_); - grq_unlock_irq(); + grq_wunlock_irq(); } #else /* __ARCH_WANT_UNLOCKED_CTXSW */ @@ -633,9 +735,9 @@ static inline void finish_lock_switch(st static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next) { #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW - grq_unlock_irq(); + grq_wunlock_irq(); #else - grq_unlock(); + grq_wunlock(); #endif } @@ -1283,14 +1385,14 @@ retry_rq: * lock now, to be *sure*. If we're wrong, we'll * just go back and repeat. */ - rq = task_grq_lock(p, &flags); + rq = task_grq_rlock(p, &flags); trace_sched_wait_task(p); running = task_running(p); on_rq = task_queued(p); ncsw = 0; if (!match_state || p->state == match_state) ncsw = p->nvcsw | LONG_MIN; /* sets MSB */ - task_grq_unlock(&flags); + task_grq_runlock(&flags); /* * If it changed from the expected state, bail out now. @@ -1583,7 +1685,7 @@ static bool try_to_wake_up(struct task_s * No need to do time_lock_grq as we only need to update the rq clock * if we activate the task */ - rq = task_grq_lock(p, &flags); + rq = task_grq_wlock(p, &flags); cpu = task_cpu(p); /* state is a volatile long, どうして、分からない */ @@ -1599,7 +1701,7 @@ static bool try_to_wake_up(struct task_s out_running: ttwu_post_activation(p, rq, success); out_unlock: - task_grq_unlock(&flags); + task_grq_wunlock(&flags); ttwu_stat(p, cpu, wake_flags); @@ -1737,7 +1839,7 @@ void sched_fork(struct task_struct *p) * value. rq->rq_deadline is only modified within schedule() so it * is always equal to current->deadline. */ - rq = task_grq_lock_irq(curr); + rq = task_grq_wlock_irq(curr); if (likely(rq->rq_time_slice >= RESCHED_US * 2)) { rq->rq_time_slice /= 2; p->time_slice = rq->rq_time_slice; @@ -1753,7 +1855,7 @@ void sched_fork(struct task_struct *p) time_slice_expired(p); } p->last_ran = rq->rq_last_ran; - task_grq_unlock_irq(); + task_grq_wunlock_irq(); out: put_cpu(); } @@ -1771,7 +1873,7 @@ void wake_up_new_task(struct task_struct unsigned long flags; struct rq *rq; - rq = task_grq_lock(p, &flags); + rq = task_grq_wlock(p, &flags); p->state = TASK_RUNNING; parent = p->parent; /* Unnecessary but small chance that the parent changed CPU */ @@ -1787,7 +1889,7 @@ void wake_up_new_task(struct task_struct resched_task(parent); } else try_preempt(p, rq); - task_grq_unlock(&flags); + task_grq_wunlock(&flags); } #ifdef CONFIG_PREEMPT_NOTIFIERS @@ -2536,9 +2638,9 @@ unsigned long long task_delta_exec(struc struct rq *rq; u64 ns; - rq = task_grq_lock(p, &flags); + rq = task_grq_wlock(p, &flags); ns = do_task_delta_exec(p, rq); - task_grq_unlock(&flags); + task_grq_wunlock(&flags); return ns; } @@ -2554,9 +2656,9 @@ unsigned long long task_sched_runtime(st struct rq *rq; u64 ns; - rq = task_grq_lock(p, &flags); + rq = task_grq_wlock(p, &flags); ns = p->sched_time + do_task_delta_exec(p, rq); - task_grq_unlock(&flags); + task_grq_wunlock(&flags); return ns; } @@ -2807,10 +2909,10 @@ static void task_running_tick(struct rq /* p->time_slice < RESCHED_US. We only modify task_struct under grq lock */ p = rq->curr; - grq_lock(); + grq_wlock(); requeue_task(p); set_tsk_need_resched(p); - grq_unlock(); + grq_wunlock(); } void wake_up_idle_cpu(int cpu); @@ -3177,7 +3279,7 @@ need_resched: deactivate = false; schedule_debug(prev); - grq_lock_irq(); + grq_wlock_irq(); switch_count = &prev->nivcsw; if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) { @@ -3212,7 +3314,7 @@ need_resched: * sure to submit it to avoid deadlocks. */ if (unlikely(deactivate && blk_needs_flush_plug(prev))) { - grq_unlock_irq(); + grq_wunlock_irq(); preempt_enable_no_resched(); blk_schedule_flush_plug(prev); goto need_resched; @@ -3247,7 +3349,7 @@ need_resched: * again. */ set_rq_task(rq, prev); - grq_unlock_irq(); + grq_wunlock_irq(); goto rerun_prev_unlocked; } else swap_sticky(rq, cpu, prev); @@ -3296,7 +3398,7 @@ need_resched: rq = cpu_rq(cpu); idle = rq->idle; } else - grq_unlock_irq(); + grq_wunlock_irq(); rerun_prev_unlocked: preempt_enable_no_resched(); @@ -3841,7 +3943,7 @@ void rt_mutex_setprio(struct task_struct BUG_ON(prio < 0 || prio > MAX_PRIO); - rq = task_grq_lock(p, &flags); + rq = task_grq_wlock(p, &flags); trace_sched_pi_setprio(p, prio); oldprio = p->prio; @@ -3856,7 +3958,7 @@ void rt_mutex_setprio(struct task_struct try_preempt(p, rq); } - task_grq_unlock(&flags); + task_grq_wunlock(&flags); } #endif @@ -3883,7 +3985,7 @@ void set_user_nice(struct task_struct *p * We have to be careful, if called from sys_setpriority(), * the task might be in the middle of scheduling on another CPU. */ - rq = time_task_grq_lock(p, &flags); + rq = time_task_grq_wlock(p, &flags); /* * The RT priorities are set via sched_setscheduler(), but we still * allow the 'normal' nice value to be set - but as expected @@ -3913,7 +4015,7 @@ void set_user_nice(struct task_struct *p resched_task(p); } out_unlock: - task_grq_unlock(&flags); + task_grq_wunlock(&flags); } EXPORT_SYMBOL(set_user_nice); @@ -4198,13 +4300,13 @@ recheck: * To be able to change p->policy safely, the grunqueue lock must be * held. */ - rq = __task_grq_lock(p); + rq = __task_grq_wlock(p); /* * Changing the policy of the stop threads its a very bad idea */ if (p == rq->stop) { - __task_grq_unlock(); + __task_grq_wunlock(); raw_spin_unlock_irqrestore(&p->pi_lock, flags); return -EINVAL; } @@ -4215,7 +4317,7 @@ recheck: if (unlikely(policy == p->policy && (!is_rt_policy(policy) || param->sched_priority == p->rt_priority))) { - __task_grq_unlock(); + __task_grq_wunlock(); raw_spin_unlock_irqrestore(&p->pi_lock, flags); return 0; } @@ -4223,7 +4325,7 @@ recheck: /* recheck policy now with rq lock held */ if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) { policy = oldpolicy = -1; - __task_grq_unlock(); + __task_grq_wunlock(); raw_spin_unlock_irqrestore(&p->pi_lock, flags); goto recheck; } @@ -4238,7 +4340,7 @@ recheck: enqueue_task(p); try_preempt(p, rq); } - __task_grq_unlock(); + __task_grq_wunlock(); raw_spin_unlock_irqrestore(&p->pi_lock, flags); rt_mutex_adjust_pi(p); @@ -4508,9 +4610,9 @@ long sched_getaffinity(pid_t pid, cpumas if (retval) goto out_unlock; - grq_lock_irqsave(&flags); + grq_rlock_irqsave(&flags); cpumask_and(mask, tsk_cpus_allowed(p), cpu_online_mask); - grq_unlock_irqrestore(&flags); + grq_runlock_irqrestore(&flags); out_unlock: rcu_read_unlock(); @@ -4565,7 +4667,7 @@ SYSCALL_DEFINE0(sched_yield) struct task_struct *p; p = current; - grq_lock_irq(); + grq_wlock_irq(); schedstat_inc(task_rq(p), yld_count); requeue_task(p); @@ -4573,9 +4675,7 @@ SYSCALL_DEFINE0(sched_yield) * Since we are going to call schedule() anyway, there's * no need to preempt or enable interrupts: */ - __release(grq.lock); - spin_release(&grq.lock.dep_map, 1, _THIS_IP_); - do_raw_spin_unlock(&grq.lock); + grq_wunlock(); preempt_enable_no_resched(); schedule(); @@ -4683,7 +4783,7 @@ bool __sched yield_to(struct task_struct struct rq *rq; rq = this_rq(); - grq_lock_irqsave(&flags); + grq_wlock_irqsave(&flags); if (task_running(p) || p->state) goto out_unlock; yielded = 1; @@ -4695,7 +4795,7 @@ bool __sched yield_to(struct task_struct p->time_slice = timeslice(); set_tsk_need_resched(rq->curr); out_unlock: - grq_unlock_irqrestore(&flags); + grq_wunlock_irqrestore(&flags); if (yielded) schedule(); @@ -4823,9 +4923,9 @@ SYSCALL_DEFINE2(sched_rr_get_interval, p if (retval) goto out_unlock; - grq_lock_irqsave(&flags); + grq_rlock_irqsave(&flags); time_slice = p->policy == SCHED_FIFO ? 0 : MS_TO_NS(task_timeslice(p)); - grq_unlock_irqrestore(&flags); + grq_runlock_irqrestore(&flags); rcu_read_unlock(); t = ns_to_timespec(time_slice); @@ -4920,7 +5020,7 @@ void init_idle(struct task_struct *idle, struct rq *rq = cpu_rq(cpu); unsigned long flags; - time_grq_lock(rq, &flags); + time_grq_wlock(rq, &flags); idle->last_ran = rq->clock; idle->state = TASK_RUNNING; /* Setting prio to illegal value shouldn't matter when never queued */ @@ -4933,7 +5033,7 @@ void init_idle(struct task_struct *idle, rcu_read_unlock(); rq->curr = rq->idle = idle; idle->on_cpu = 1; - grq_unlock_irqrestore(&flags); + grq_wunlock_irqrestore(&flags); /* Set the preempt count _outside_ the spinlocks! */ task_thread_info(idle)->preempt_count = 0; @@ -4992,9 +5092,9 @@ static inline void resched_cpu(int cpu) { unsigned long flags; - grq_lock_irqsave(&flags); + grq_wlock_irqsave(&flags); resched_task(cpu_curr(cpu)); - grq_unlock_irqrestore(&flags); + grq_wunlock_irqrestore(&flags); } /* @@ -5087,7 +5187,7 @@ int set_cpus_allowed_ptr(struct task_str struct rq *rq; int ret = 0; - rq = task_grq_lock(p, &flags); + rq = task_grq_wlock(p, &flags); if (cpumask_equal(tsk_cpus_allowed(p), new_mask)) goto out; @@ -5123,7 +5223,7 @@ int set_cpus_allowed_ptr(struct task_str out: if (queued) try_preempt(p, rq); - task_grq_unlock(&flags); + task_grq_wunlock(&flags); if (running_wrong) _cond_resched(); @@ -5420,32 +5520,32 @@ migration_call(struct notifier_block *nf case CPU_ONLINE: /* Update our root-domain */ - grq_lock_irqsave(&flags); + grq_wlock_irqsave(&flags); if (rq->rd) { BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); set_rq_online(rq); } grq.noc = num_online_cpus(); - grq_unlock_irqrestore(&flags); + grq_wunlock_irqrestore(&flags); break; #ifdef CONFIG_HOTPLUG_CPU case CPU_DEAD: /* Idle task back to normal (off runqueue, low prio) */ - grq_lock_irq(); + grq_wlock_irq(); return_task(idle, true); idle->static_prio = MAX_PRIO; __setscheduler(idle, rq, SCHED_NORMAL, 0); idle->prio = PRIO_LIMIT; set_rq_task(rq, idle); update_clocks(rq); - grq_unlock_irq(); + grq_wunlock_irq(); break; case CPU_DYING: /* Update our root-domain */ - grq_lock_irqsave(&flags); + grq_wlock_irqsave(&flags); sched_idle_next(rq, cpu, idle); if (rq->rd) { BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); @@ -5453,7 +5553,7 @@ migration_call(struct notifier_block *nf } break_sole_affinity(cpu, idle); grq.noc = num_online_cpus(); - grq_unlock_irqrestore(&flags); + grq_wunlock_irqrestore(&flags); break; #endif } @@ -5706,7 +5806,7 @@ static void rq_attach_root(struct rq *rq struct root_domain *old_rd = NULL; unsigned long flags; - grq_lock_irqsave(&flags); + grq_wlock_irqsave(&flags); if (rq->rd) { old_rd = rq->rd; @@ -5732,7 +5832,7 @@ static void rq_attach_root(struct rq *rq if (cpumask_test_cpu(rq->cpu, cpu_active_mask)) set_rq_online(rq); - grq_unlock_irqrestore(&flags); + grq_wunlock_irqrestore(&flags); if (old_rd) call_rcu_sched(&old_rd->rcu, free_rootdomain); @@ -6843,7 +6943,7 @@ void __init sched_init_smp(void) BUG(); free_cpumask_var(non_isolated_cpus); - grq_lock_irq(); + grq_wlock_irq(); /* * Set up the relative cache distance of each online cpu from each * other in a simple array for quick lookup. Locality is determined @@ -6898,7 +6998,7 @@ void __init sched_init_smp(void) rq->cache_idle = cache_cpu_idle; #endif } - grq_unlock_irq(); + grq_wunlock_irq(); } #else void __init sched_init_smp(void) @@ -6925,6 +7025,7 @@ void __init sched_init(void) prio_ratios[i] = prio_ratios[i - 1] * 11 / 10; raw_spin_lock_init(&grq.lock); + rwlock_init(&grq.rwlock); grq.nr_running = grq.nr_uninterruptible = grq.nr_switches = 0; grq.niffies = 0; grq.last_jiffy = jiffies; @@ -7074,7 +7175,7 @@ void normalize_rt_tasks(void) continue; raw_spin_lock_irqsave(&p->pi_lock, flags); - rq = __task_grq_lock(p); + rq = __task_grq_wlock(p); queued = task_queued(p); if (queued) @@ -7085,7 +7186,7 @@ void normalize_rt_tasks(void) try_preempt(p, rq); } - __task_grq_unlock(); + __task_grq_wunlock(); raw_spin_unlock_irqrestore(&p->pi_lock, flags); } while_each_thread(g, p);