From 560a7c9d3ccf0114ef139dc1c1d4f247f9f96ed3 Mon Sep 17 00:00:00 2001 From: Con Kolivas Date: Sun, 9 Oct 2016 17:47:39 +1100 Subject: [PATCH 10/80] MuQSS version 0.106 --- kernel/sched/MuQSS.c | 223 ++++++++++++++++++++++----------------------------- kernel/sched/MuQSS.h | 1 + 2 files changed, 95 insertions(+), 129 deletions(-) diff --git a/kernel/sched/MuQSS.c b/kernel/sched/MuQSS.c index 484c362..267ea40 100644 --- a/kernel/sched/MuQSS.c +++ b/kernel/sched/MuQSS.c @@ -139,7 +139,7 @@ void print_scheduler_version(void) { - printk(KERN_INFO "MuQSS CPU scheduler v0.105 by Con Kolivas.\n"); + printk(KERN_INFO "MuQSS CPU scheduler v0.106 by Con Kolivas.\n"); } /* @@ -469,43 +469,20 @@ static inline void unlock_all_rqs(void) preempt_enable(); } -/* - * Lock this_rq and as many rqs as we can grab with trylock, returning which - * rqs are locked in a bitmask. - */ -static inline void lock_rqs(struct rq *this_rq, cpumask_t *mask) +/* Specially nest trylock an rq */ +static inline bool trylock_rq(struct rq *rq) { - int cpu; - - cpumask_clear(mask); - - for_each_online_cpu(cpu) { - struct rq *rq = cpu_rq(cpu); - - if (rq != this_rq) { - if (rq_idle(rq)) - continue; - if (!do_raw_spin_trylock(&rq->lock)) - continue; - spin_acquire(&rq->lock.dep_map, SINGLE_DEPTH_NESTING, 1, _RET_IP_); - } - cpumask_set_cpu(cpu, mask); - } + if (unlikely(!do_raw_spin_trylock(&rq->lock))) + return false; + spin_acquire(&rq->lock.dep_map, SINGLE_DEPTH_NESTING, 1, _RET_IP_); + return true; } -/* Unlock all rqs in a CPU bitmask */ -static inline void unlock_rqs(struct rq *this_rq, cpumask_t *mask) +/* Unlock a specially nested trylocked rq */ +static inline void unlock_rq(struct rq *rq) { - int cpu; - - cpumask_clear_cpu(this_rq->cpu, mask); - - for_each_cpu(cpu, mask) { - struct rq *rq = cpu_rq(cpu); - - spin_release(&rq->lock.dep_map, 1, _RET_IP_); - do_raw_spin_unlock(&rq->lock); - } + spin_release(&rq->lock.dep_map, 1, _RET_IP_); + do_raw_spin_unlock(&rq->lock); } static inline void rq_lock_irq(struct rq *rq) @@ -601,6 +578,8 @@ static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next) next->on_cpu = 1; } +static void enqueue_task(struct task_struct *p, struct rq *rq); + static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev) { prev->on_cpu = 0; @@ -615,7 +594,26 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev) */ spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_); - raw_spin_unlock_irq(&rq->lock); +#ifdef CONFIG_SMP + /* + * If prev was marked as migrating to another CPU in return_task, drop + * the local runqueue lock but leave interrupts disabled and grab the + * remote lock we're migrating it to before enabling them. + */ + if (unlikely(rq->migrate)) { + struct rq *rq2 = task_rq(prev); + + rq->migrate = false; + raw_spin_unlock(&rq->lock); + + rq_lock(rq2); + enqueue_task(prev, rq2); + rq_unlock(rq2); + + local_irq_enable(); + } else +#endif + raw_spin_unlock_irq(&rq->lock); } static inline bool deadline_before(u64 deadline, u64 time) @@ -783,9 +781,8 @@ static void enqueue_task(struct task_struct *p, struct rq *rq) else { sl_id = p->deadline; if (idleprio_task(p)) { - /* Set it to cope with 4 left shifts with locality_diff */ if (p->prio == IDLE_PRIO) - sl_id |= 0x00FF000000000000; + sl_id |= 0xF000000000000000; else sl_id += longest_deadline_diff(); } @@ -1077,11 +1074,6 @@ static inline void resched_suitable_idle(struct task_struct *p) if (suitable_idle_cpus(p)) resched_best_idle(p); } - -static inline int locality_diff(int cpu, struct rq *rq) -{ - return rq->cpu_locality[cpu]; -} #else /* CONFIG_SMP */ static inline void set_cpuidle_map(int cpu) { @@ -1100,11 +1092,6 @@ static inline void resched_suitable_idle(struct task_struct *p) { } -static inline int locality_diff(int cpu, struct rq *rq) -{ - return 0; -} - static void resched_task(struct task_struct *p); static inline void resched_curr(struct rq *rq) @@ -1250,18 +1237,17 @@ static inline void return_task(struct task_struct *p, struct rq *rq, deactivate_task(p, rq); else { inc_qnr(); - /* This is ugly, set_task_cpu was called on the running task - * that doesn't want to deactivate so it has to be enqueued - * to a different CPU and we need its lock as well. Hopefully - * we can safely unlock here given where we are in __schedule +#if CONFIG_SMP + /* + * set_task_cpu was called on the running task that doesn't + * want to deactivate so it has to be enqueued to a different + * CPU and we need its lock. Tag it to be moved with as the + * lock is dropped in finish_lock_switch. */ - if (unlikely(task_cpu(p) != cpu)) { - struct rq *rq2 = task_rq(p); - - lock_second_rq(rq, rq2); - enqueue_task(p, rq2); - rq_unlock(rq2); - } else + if (unlikely(task_cpu(p) != cpu)) + rq->migrate = true; + else +#endif enqueue_task(p, rq); } } @@ -3335,100 +3321,76 @@ static inline void check_deadline(struct task_struct *p, struct rq *rq) #define BITOP_WORD(nr) ((nr) / BITS_PER_LONG) /* - * Scheduler queue bitmap specific find next bit. - */ -static inline unsigned long -next_sched_bit(const unsigned long *addr, unsigned long offset) -{ - const unsigned long *p; - unsigned long result; - unsigned long size; - unsigned long tmp; - - size = PRIO_LIMIT; - if (offset >= size) - return size; - - p = addr + BITOP_WORD(offset); - result = offset & ~(BITS_PER_LONG-1); - size -= result; - offset %= BITS_PER_LONG; - if (offset) { - tmp = *(p++); - tmp &= (~0UL << offset); - if (size < BITS_PER_LONG) - goto found_first; - if (tmp) - goto found_middle; - size -= BITS_PER_LONG; - result += BITS_PER_LONG; - } - while (size & ~(BITS_PER_LONG-1)) { - if ((tmp = *(p++))) - goto found_middle; - result += BITS_PER_LONG; - size -= BITS_PER_LONG; - } - if (!size) - return result; - tmp = *p; - -found_first: - tmp &= (~0UL >> (BITS_PER_LONG - size)); - if (tmp == 0UL) /* Are any bits set? */ - return result + size; /* Nope. */ -found_middle: - return result + __ffs(tmp); -} - -/* * Task selection with skiplists is a simple matter of picking off the first - * task in the sorted list, an O(1) operation. The only time it takes longer - * is if tasks do not have suitable affinity and then we iterate over entries - * till we find the first that does. Worst case here is no tasks with suitable - * affinity and taking O(k) where k is number of processors. + * task in the sorted list, an O(1) operation. The lookup is amortised O(1) + * being bound to the number of processors. * - * As many runqueues as can be locked without contention are grabbed via - * lock_rqs and only those runqueues are examined. All balancing between CPUs + * Runqueues are selectively locked based on their unlocked data and then + * unlocked if not needed. At most 3 locks will be held at any time and are + * released as soon as they're no longer needed. All balancing between CPUs * is thus done here in an extremely simple first come best fit manner. * * This iterates over runqueues in cache locality order. In interactive mode * it iterates over all CPUs and finds the task with the earliest deadline. * In non-interactive mode it will only take a task if it's from the current - * runqueue or a runqueue with more tasks than the current one. + * runqueue or a runqueue with more tasks than the current one with a better + * deadline. */ static inline struct task_struct *earliest_deadline_task(struct rq *rq, int cpu, struct task_struct *idle) { struct task_struct *edt = idle; u64 earliest_deadline = ~0ULL; + struct rq *locked = NULL; int i, best_entries = 0; - cpumask_t locked; - - lock_rqs(rq, &locked); for (i = 0; i < num_possible_cpus(); i++) { struct rq *other_rq = rq->rq_order[i]; int entries = other_rq->sl->entries; struct task_struct *p; - if (!entries) - continue; - if (!cpumask_test_cpu(other_rq->cpu, &locked)) + if (!sched_interactive) { + if (entries <= best_entries) + continue; + } else if (!entries) continue; + + /* if (i) implies other_rq != rq */ + if (i) { + if (unlikely(!trylock_rq(other_rq))) + continue; + /* Need to reevaluate entries after locking */ + entries = other_rq->sl->entries; + if (unlikely(!entries)) { + unlock_rq(other_rq); + continue; + } + } p = other_rq->node.next[0]->value; - if (!smt_schedule(p, rq)) + if (!deadline_before(p->deadline, earliest_deadline)) { + if (i) + unlock_rq(other_rq); continue; + } - /* Make sure affinity is ok */ - if (rq != other_rq && needs_other_cpu(p, cpu)) + if (!smt_schedule(p, rq)) { + if (i) + unlock_rq(other_rq); continue; + } + + /* Make sure affinity is ok */ + if (i) { + if (needs_other_cpu(p, cpu)) { + unlock_rq(other_rq); + continue; + } + if (locked) + unlock_rq(locked); + locked = other_rq; + } - if (!sched_interactive && entries <= best_entries) - continue; - if (!deadline_before(p->deadline, earliest_deadline)) - continue; best_entries = entries; earliest_deadline = p->deadline; edt = p; @@ -3436,7 +3398,9 @@ task_struct *earliest_deadline_task(struct rq *rq, int cpu, struct task_struct * if (likely(edt != idle)) take_task(rq, cpu, edt); - unlock_rqs(rq, &locked); + + if (locked) + unlock_rq(locked); return edt; } @@ -5630,7 +5594,7 @@ static int __set_cpus_allowed_ptr(struct task_struct *p, struct rq *dest_rq = cpu_rq(dest_cpu); lock_second_rq(rq, dest_rq); - set_task_cpu(p, cpumask_any_and(cpu_valid_mask, new_mask)); + set_task_cpu(p, dest_cpu); rq_unlock(dest_rq); } out: @@ -7296,10 +7260,10 @@ void __init sched_init_smp(void) #endif } for_each_possible_cpu(cpu) { - int total_cpus = 0, locality; + int total_cpus = 1, locality; rq = cpu_rq(cpu); - for (locality = 0; locality <= 4; locality++) { + for (locality = 1; locality <= 4; locality++) { for_each_possible_cpu(other_cpu) { if (rq->cpu_locality[other_cpu] == locality) rq->rq_order[total_cpus++] = cpu_rq(other_cpu); @@ -7413,6 +7377,7 @@ void __init sched_init(void) rq->dither = false; set_rq_task(rq, &init_task); #ifdef CONFIG_SMP + rq->migrate = false; rq->sd = NULL; rq->rd = NULL; rq->online = false; diff --git a/kernel/sched/MuQSS.h b/kernel/sched/MuQSS.h index 7bde3b7..0fbdf74 100644 --- a/kernel/sched/MuQSS.h +++ b/kernel/sched/MuQSS.h @@ -40,6 +40,7 @@ struct rq { #ifdef CONFIG_SMP int cpu; /* cpu of this runqueue */ bool online; + bool migrate; struct root_domain *rd; struct sched_domain *sd; -- 2.7.4