--- kernel/sched/bfs.c | 230 +++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 162 insertions(+), 68 deletions(-) Index: linux-3.3-ck1/kernel/sched/bfs.c =================================================================== --- linux-3.3-ck1.orig/kernel/sched/bfs.c 2012-03-26 22:29:52.051689995 +1100 +++ linux-3.3-ck1/kernel/sched/bfs.c 2012-03-26 23:03:29.934501046 +1100 @@ -462,54 +462,80 @@ static inline bool task_running(struct t return p->on_cpu; } -static inline void grq_rlock(void) +static inline void grq_write_lock(void) + __acquires(grq.rwlock) +{ + rwlock_acquire(&grq.rwlock.dep_map, 0, 0, _RET_IP_); + LOCK_CONTENDED(&grq.rwlock, do_raw_write_trylock, do_raw_write_lock); +} + +static inline void grq_write_unlock(void) + __releases(grq.rwlock) +{ + rwlock_release(&grq.rwlock.dep_map, 1, _RET_IP_); + do_raw_write_unlock(&grq.rwlock); +} + +static inline void grq_read_lock(void) + __acquires(grq.rwlock) +{ + rwlock_acquire_read(&grq.rwlock.dep_map, 0, 0, _RET_IP_); + LOCK_CONTENDED(&grq.rwlock, do_raw_read_trylock, do_raw_read_lock); +} + +static inline void grq_read_unlock(void) + __releases(grq.rwlock) +{ + rwlock_release(&grq.rwlock.dep_map, 1, _RET_IP_); + do_raw_read_unlock(&grq.rwlock); +} + +static inline void grq_wlock(void) __acquires(grq.lock) __acquires(grq.rwlock) - __releases(grq.lock) { raw_spin_lock(&grq.lock); - do_raw_read_lock(&grq.rwlock); - raw_spin_unlock(&grq.lock); + grq_write_lock(); } -static inline void grq_wlock(void) +static inline void grq_ulock(void) __acquires(grq.lock) __acquires(grq.rwlock) { raw_spin_lock(&grq.lock); - do_raw_write_lock(&grq.rwlock); + grq_read_lock(); } -static inline void grq_runlock(void) +static inline void grq_wunlock(void) __releases(grq.rwlock) + __releases(grq.lock) { - do_raw_read_unlock(&grq.rwlock); + grq_write_unlock(); + raw_spin_unlock(&grq.lock); } -static inline void grq_wunlock(void) +static inline void grq_uunlock(void) __releases(grq.rwlock) __releases(grq.lock) { - do_raw_write_unlock(&grq.rwlock); + grq_read_unlock(); raw_spin_unlock(&grq.lock); } -static inline void grq_rlock_irq(void) +static inline void grq_wlock_irq(void) __acquires(grq.lock) __acquires(grq.rwlock) - __releases(grq.lock) { raw_spin_lock_irq(&grq.lock); - do_raw_read_lock(&grq.rwlock); - raw_spin_unlock(&grq.lock); + grq_write_lock(); } -static inline void grq_wlock_irq(void) +static inline void grq_ulock_irq(void) __acquires(grq.lock) __acquires(grq.rwlock) { raw_spin_lock_irq(&grq.lock); - do_raw_write_lock(&grq.rwlock); + grq_read_lock(); } static inline void time_wlock_grq(struct rq *rq) @@ -520,18 +546,19 @@ static inline void time_wlock_grq(struct update_clocks(rq); } -static inline void grq_runlock_irq(void) +static inline void grq_wunlock_irq(void) __releases(grq.rwlock) + __releases(grq.lock) { - do_raw_read_unlock(&grq.rwlock); - local_irq_enable(); + grq_write_unlock(); + raw_spin_unlock_irq(&grq.lock); } -static inline void grq_wunlock_irq(void) +static inline void grq_uunlock_irq(void) __releases(grq.rwlock) __releases(grq.lock) { - do_raw_write_unlock(&grq.rwlock); + grq_read_unlock(); raw_spin_unlock_irq(&grq.lock); } @@ -541,8 +568,9 @@ static inline void grq_rlock_irqsave(uns __releases(grq.lock) { raw_spin_lock_irqsave(&grq.lock, *flags); - do_raw_read_lock(&grq.rwlock); - raw_spin_unlock(&grq.lock); + grq_read_lock(); + spin_release(&grq.lock.dep_map, 1, _RET_IP_); + do_raw_spin_unlock(&grq.lock); } static inline void grq_wlock_irqsave(unsigned long *flags) @@ -550,21 +578,44 @@ static inline void grq_wlock_irqsave(uns __acquires(grq.rwlock) { raw_spin_lock_irqsave(&grq.lock, *flags); - do_raw_write_lock(&grq.rwlock); + grq_write_lock(); +} + +static inline void grq_ulock_irqsave(unsigned long *flags) + __acquires(grq.lock) + __acquires(grq.rwlock) +{ + raw_spin_lock_irqsave(&grq.lock, *flags); + grq_read_lock(); +} + +static inline void grq_upgrade_rwlock(void) + __releases(grq.rwlock) + __acquires(grq.rwlock) +{ + grq_read_unlock(); + grq_write_lock(); } static inline void grq_runlock_irqrestore(unsigned long *flags) __releases(grq.rwlock) { - do_raw_read_unlock(&grq.rwlock); - local_irq_restore(*flags); + read_unlock_irqrestore(&grq.rwlock, *flags); } static inline void grq_wunlock_irqrestore(unsigned long *flags) __releases(grq.rwlock) __releases(grq.lock) { - do_raw_write_unlock(&grq.rwlock); + grq_write_unlock(); + raw_spin_unlock_irqrestore(&grq.lock, *flags); +} + +static inline void grq_uunlock_irqrestore(unsigned long *flags) + __releases(grq.rwlock) + __releases(grq.lock) +{ + grq_read_unlock(); raw_spin_unlock_irqrestore(&grq.lock, *flags); } @@ -588,45 +639,44 @@ static inline struct rq } static inline struct rq -*time_task_grq_wlock(struct task_struct *p, unsigned long *flags) +*task_grq_ulock(struct task_struct *p, unsigned long *flags) __acquires(grq.lock) __acquires(grq.rwlock) { - struct rq *rq = task_grq_wlock(p, flags); - update_clocks(rq); - return rq; + grq_ulock_irqsave(flags); + return task_rq(p); } -static inline struct rq *task_grq_rlock_irq(struct task_struct *p) +static inline struct rq +*time_task_grq_wlock(struct task_struct *p, unsigned long *flags) __acquires(grq.lock) __acquires(grq.rwlock) - __releases(grq.lock) { - grq_rlock_irq(); - return task_rq(p); + struct rq *rq = task_grq_wlock(p, flags); + update_clocks(rq); + return rq; } -static inline struct rq *task_grq_wlock_irq(struct task_struct *p) +static inline struct rq *task_grq_ulock_irq(struct task_struct *p) __acquires(grq.lock) __acquires(grq.rwlock) { - grq_wlock_irq(); + grq_ulock_irq(); return task_rq(p); } -static inline void time_task_grq_wlock_irq(struct task_struct *p) - __acquires(grq.lock) - __acquires(grq.rwlock) +static inline void task_grq_wunlock_irq(void) + __releases(grq.rwlock) + __releases(grq.lock) { - struct rq *rq = task_grq_wlock_irq(p); - update_clocks(rq); + grq_wunlock_irq(); } -static inline void task_grq_wunlock_irq(void) +static inline void task_grq_uunlock_irq(void) __releases(grq.rwlock) __releases(grq.lock) { - grq_wunlock_irq(); + grq_uunlock_irq(); } static inline void task_grq_runlock(unsigned long *flags) @@ -642,6 +692,13 @@ static inline void task_grq_wunlock(unsi grq_wunlock_irqrestore(flags); } +static inline void task_grq_uunlock(unsigned long *flags) + __releases(grq.rwlock) + __releases(grq.lock) +{ + grq_uunlock_irqrestore(flags); +} + /** * grunqueue_is_locked * @@ -679,6 +736,14 @@ static inline struct rq *__task_grq_wloc return task_rq(p); } +static inline struct rq *__task_grq_ulock(struct task_struct *p) + __acquires(grq.lock) + __acquires(grq.rwlock) +{ + grq_ulock(); + return task_rq(p); +} + static inline void __task_grq_wunlock(void) __releases(grq.rwlock) __releases(grq.lock) @@ -686,6 +751,13 @@ static inline void __task_grq_wunlock(vo grq_wunlock(); } +static inline void __task_grq_uunlock(void) + __releases(grq.rwlock) + __releases(grq.lock) +{ + grq_uunlock(); +} + /* * Look for any tasks *anywhere* that are running nice 0 or better. We do * this lockless for overhead reasons since the occasional wrong result @@ -1145,12 +1217,6 @@ static inline void deactivate_task(struc #ifdef CONFIG_SMP void set_task_cpu(struct task_struct *p, unsigned int cpu) { -#ifdef CONFIG_LOCKDEP - /* - * The caller should hold grq lock. - */ - WARN_ON_ONCE(debug_locks && !lockdep_is_held(&grq.lock)); -#endif trace_sched_migrate_task(p, cpu); if (task_cpu(p) != cpu) perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, 1, NULL, 0); @@ -1671,7 +1737,7 @@ void scheduler_ipi(void) static bool try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) { - bool success = false; + bool success = false, rwupgrade = false; unsigned long flags; struct rq *rq; int cpu; @@ -1685,13 +1751,15 @@ static bool try_to_wake_up(struct task_s * No need to do time_lock_grq as we only need to update the rq clock * if we activate the task */ - rq = task_grq_wlock(p, &flags); + rq = task_grq_ulock(p, &flags); cpu = task_cpu(p); /* state is a volatile long, どうして、分からない */ if (!((unsigned int)p->state & state)) goto out_unlock; + grq_upgrade_rwlock(); + rwupgrade = true; if (task_queued(p) || task_running(p)) goto out_running; @@ -1701,7 +1769,10 @@ static bool try_to_wake_up(struct task_s out_running: ttwu_post_activation(p, rq, success); out_unlock: - task_grq_wunlock(&flags); + if (rwupgrade) + task_grq_wunlock(&flags); + else + task_grq_uunlock(&flags); ttwu_stat(p, cpu, wake_flags); @@ -1771,6 +1842,7 @@ static void time_slice_expired(struct ta void sched_fork(struct task_struct *p) { struct task_struct *curr; + bool rwupgrade = false; int cpu = get_cpu(); struct rq *rq; @@ -1839,7 +1911,7 @@ void sched_fork(struct task_struct *p) * value. rq->rq_deadline is only modified within schedule() so it * is always equal to current->deadline. */ - rq = task_grq_wlock_irq(curr); + rq = task_grq_ulock_irq(curr); if (likely(rq->rq_time_slice >= RESCHED_US * 2)) { rq->rq_time_slice /= 2; p->time_slice = rq->rq_time_slice; @@ -1851,11 +1923,16 @@ void sched_fork(struct task_struct *p) * be slightly earlier. */ rq->rq_time_slice = 0; - set_tsk_need_resched(curr); time_slice_expired(p); + rwupgrade = true; + grq_upgrade_rwlock(); + set_tsk_need_resched(curr); } p->last_ran = rq->rq_last_ran; - task_grq_wunlock_irq(); + if (unlikely(rwupgrade)) + task_grq_wunlock_irq(); + else + task_grq_uunlock_irq(); out: put_cpu(); } @@ -2090,6 +2167,7 @@ context_switch(struct rq *rq, struct tas */ #ifndef __ARCH_WANT_UNLOCKED_CTXSW spin_release(&grq.lock.dep_map, 1, _THIS_IP_); + rwlock_release(&grq.rwlock.dep_map, 1, _THIS_IP_); #endif /* Here we just switch the register state and the stack. */ @@ -4300,13 +4378,13 @@ recheck: * To be able to change p->policy safely, the grunqueue lock must be * held. */ - rq = __task_grq_wlock(p); + rq = __task_grq_ulock(p); /* * Changing the policy of the stop threads its a very bad idea */ if (p == rq->stop) { - __task_grq_wunlock(); + __task_grq_uunlock(); raw_spin_unlock_irqrestore(&p->pi_lock, flags); return -EINVAL; } @@ -4317,7 +4395,7 @@ recheck: if (unlikely(policy == p->policy && (!is_rt_policy(policy) || param->sched_priority == p->rt_priority))) { - __task_grq_wunlock(); + __task_grq_uunlock(); raw_spin_unlock_irqrestore(&p->pi_lock, flags); return 0; } @@ -4325,10 +4403,11 @@ recheck: /* recheck policy now with rq lock held */ if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) { policy = oldpolicy = -1; - __task_grq_wunlock(); + __task_grq_uunlock(); raw_spin_unlock_irqrestore(&p->pi_lock, flags); goto recheck; } + grq_upgrade_rwlock(); update_clocks(rq); p->sched_reset_on_fork = reset_on_fork; @@ -4675,7 +4754,10 @@ SYSCALL_DEFINE0(sched_yield) * Since we are going to call schedule() anyway, there's * no need to preempt or enable interrupts: */ - grq_wunlock(); + grq_write_unlock(); + __release(grq.lock); + spin_release(&grq.lock.dep_map, 1, _THIS_IP_); + do_raw_spin_unlock(&grq.lock); preempt_enable_no_resched(); schedule(); @@ -4778,15 +4860,17 @@ EXPORT_SYMBOL(yield); */ bool __sched yield_to(struct task_struct *p, bool preempt) { + bool yielded = 0, rwupgrade = false; unsigned long flags; - bool yielded = 0; struct rq *rq; rq = this_rq(); - grq_wlock_irqsave(&flags); + grq_ulock_irqsave(&flags); if (task_running(p) || p->state) goto out_unlock; yielded = 1; + rwupgrade = true; + grq_upgrade_rwlock(); if (p->deadline > rq->rq_deadline) p->deadline = rq->rq_deadline; p->time_slice += rq->rq_time_slice; @@ -4795,7 +4879,10 @@ bool __sched yield_to(struct task_struct p->time_slice = timeslice(); set_tsk_need_resched(rq->curr); out_unlock: - grq_wunlock_irqrestore(&flags); + if (rwupgrade) + grq_wunlock_irqrestore(&flags); + else + grq_uunlock_irqrestore(&flags); if (yielded) schedule(); @@ -4923,9 +5010,9 @@ SYSCALL_DEFINE2(sched_rr_get_interval, p if (retval) goto out_unlock; - grq_rlock_irqsave(&flags); + grq_wlock_irqsave(&flags); time_slice = p->policy == SCHED_FIFO ? 0 : MS_TO_NS(task_timeslice(p)); - grq_runlock_irqrestore(&flags); + grq_wunlock_irqrestore(&flags); rcu_read_unlock(); t = ns_to_timespec(time_slice); @@ -5182,12 +5269,13 @@ void wake_up_idle_cpu(int cpu) int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) { bool running_wrong = false; + bool rwupgrade = false; bool queued = false; unsigned long flags; struct rq *rq; int ret = 0; - rq = task_grq_wlock(p, &flags); + rq = task_grq_ulock(p, &flags); if (cpumask_equal(tsk_cpus_allowed(p), new_mask)) goto out; @@ -5202,6 +5290,9 @@ int set_cpus_allowed_ptr(struct task_str goto out; } + rwupgrade = true; + grq_upgrade_rwlock(); + queued = task_queued(p); do_set_cpus_allowed(p, new_mask); @@ -5223,7 +5314,10 @@ int set_cpus_allowed_ptr(struct task_str out: if (queued) try_preempt(p, rq); - task_grq_wunlock(&flags); + if (rwupgrade) + task_grq_wunlock(&flags); + else + task_grq_uunlock(&flags); if (running_wrong) _cond_resched();