diff --git a/Documentation/scheduler/sched-MuQSS.txt b/Documentation/scheduler/sched-MuQSS.txt
index 2bdcc98..c0841e2 100644
--- a/Documentation/scheduler/sched-MuQSS.txt
+++ b/Documentation/scheduler/sched-MuQSS.txt
@@ -4,4 +4,75 @@ See sched-BFS.txt for basic design; MuQSS is a per-cpu runqueue variant with
 one 8 level skiplist per runqueue, and fine grained locking for much more
 scalability.
 
+Goals.
+
+The goal of the Multiple Queue Skiplist Scheduler, referred to as MuQSS from
+here on (pronounced mux) is to completely do away with the complex designs of
+the past for the cpu process scheduler and instead implement one that is very
+simple in basic design. The main focus of MuQSS is to achieve excellent desktop
+interactivity and responsiveness without heuristics and tuning knobs that are
+difficult to understand, impossible to model and predict the effect of, and when
+tuned to one workload cause massive detriment to another, while still being
+scalable to many CPUs and processes.
+
+
+Design summary.
+
+MuQSS is best described as per-cpu multiple runqueue, O(log n) insertion, O(1)
+lookup, earliest effective virtual deadline first design, loosely based on EEVDF
+(earliest eligible virtual deadline first) and my previous Staircase Deadline
+scheduler, and evolved from the single runqueue O(n) BFS scheduler. Each
+component shall be described in order to understand the significance of, and
+reasoning for it.
+
+
+Design reasoning.
+
+In BFS, the use of a single runqueue across all CPUs meant that each CPU would
+need to scan the entire runqueue looking for the process with the earliest
+deadline and schedule that next, regardless of which CPU it originally came
+from. This made BFS deterministic with respect to latency and provided
+guaranteed latencies dependent on number of processes and CPUs. The single
+runqueue, however, meant that all CPUs would complete for the single lock
+protecting it, which would lead to increasing lock contention as the number of
+CPUs rose and appeared to limit scalability of common workloads beyond 16
+logical CPUs. Additionally, the O(n) lookup of the runqueue list obviously
+increased overhead proportionate to the number of queued proecesses and led to
+cache thrashing while iterating over the linked list.
+
+MuQSS is an evolution of BFS, designed to maintain the same scheduling
+decision mechanism and be virtually deterministic without relying on the
+constrained design of the single runqueue by splitting out the single runqueue
+to be per-CPU and use skiplists instead of linked lists.
+
+The original reason for going back to a single runqueue design for BFS was that
+once multiple runqueues are introduced, per-CPU or otherwise, there will be
+complex interactions as each runqueue will be responsible for the scheduling
+latency and fairness of the tasks only on its own runqueue, and to achieve
+fairness and low latency across multiple CPUs, any advantage in throughput of
+having CPU local tasks causes other disadvantages. This is due to requiring a
+very complex balancing system to at best achieve some semblance of fairness
+across CPUs and can only maintain relatively low latency for tasks bound to the
+same CPUs, not across them. To increase said fairness and latency across CPUs,
+the advantage of local runqueue locking, which makes for better scalability, is
+lost due to having to grab multiple locks.
+
+MuQSS works around the problems inherent in multiple runqueue designs by
+making its skip lists priority ordered and through novel use of lockless
+examination of each other runqueue it can decide if it should take the earliest
+deadline task from another runqueue for latency reasons, or for CPU balancing
+reasons. It still does not have a balancing system, choosing to allow the
+next task scheduling decision and task wakeup CPU choice to allow balancing to
+happen by virtue of its choices.
+
+
+Design:
+
+MuQSS is an 8 level skip list per runqueue variant of BFS.
+
+See sched-BFS.txt for some of the shared design details.
+
+Documentation yet to be completed.
+
+
 Con Kolivas <kernel@kolivas.org> Sun, 2nd October 2016
diff --git a/kernel/sched/MuQSS.c b/kernel/sched/MuQSS.c
index 58d8c54..1700be5 100644
--- a/kernel/sched/MuQSS.c
+++ b/kernel/sched/MuQSS.c
@@ -98,7 +98,6 @@
 
 #define rt_prio(prio)		unlikely((prio) < MAX_RT_PRIO)
 #define rt_task(p)		rt_prio((p)->prio)
-#define rt_queue(rq)		rt_prio((rq)->rq_prio)
 #define batch_task(p)		(unlikely((p)->policy == SCHED_BATCH))
 #define is_rt_policy(policy)	((policy) == SCHED_FIFO || \
 					(policy) == SCHED_RR)
@@ -107,19 +106,15 @@
 #define is_idle_policy(policy)	((policy) == SCHED_IDLEPRIO)
 #define idleprio_task(p)	unlikely(is_idle_policy((p)->policy))
 #define task_running_idle(p)	unlikely((p)->prio == IDLE_PRIO)
-#define idle_queue(rq)		(unlikely(is_idle_policy((rq)->rq_policy)))
 
 #define is_iso_policy(policy)	((policy) == SCHED_ISO)
 #define iso_task(p)		unlikely(is_iso_policy((p)->policy))
-#define iso_queue(rq)		unlikely(is_iso_policy((rq)->rq_policy))
 #define task_running_iso(p)	unlikely((p)->prio == ISO_PRIO)
-#define rq_running_iso(rq)	((rq)->rq_prio == ISO_PRIO)
 
 #define rq_idle(rq)		((rq)->rq_prio == PRIO_LIMIT)
 
 #define ISO_PERIOD		(5 * HZ)
 
-#define SCHED_PRIO(p)		((p) + MAX_RT_PRIO)
 #define STOP_PRIO		(MAX_RT_PRIO - 1)
 
 /*
@@ -139,7 +134,7 @@
 
 void print_scheduler_version(void)
 {
-	printk(KERN_INFO "MuQSS CPU scheduler v0.111 by Con Kolivas.\n");
+	printk(KERN_INFO "MuQSS CPU scheduler v0.112 by Con Kolivas.\n");
 }
 
 /*
@@ -670,6 +665,7 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
 	 * remote lock we're migrating it to before enabling them.
 	 */
 	if (unlikely(task_on_rq_migrating(prev))) {
+		sched_info_dequeued(rq, prev);
 		/*
 		 * We move the ownership of prev to the new cpu now. ttwu can't
 		 * activate prev to the wrong cpu since it has to grab this
@@ -780,6 +776,7 @@ static void update_load_avg(struct rq *rq)
 static void dequeue_task(struct rq *rq, struct task_struct *p, int flags)
 {
 	skiplist_delete(rq->sl, &p->node);
+	rq->best_key = rq->node.next[0]->key;
 	update_clocks(rq);
 	if (!(flags & DEQUEUE_SAVE))
 		sched_info_dequeued(task_rq(p), p);
@@ -862,6 +859,7 @@ static void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
 		sched_info_queued(rq, p);
 	randseed = (rq->niffies >> 10) & 0xFFFFFFFF;
 	skiplist_insert(rq->sl, &p->node, sl_id, p, randseed);
+	rq->best_key = rq->node.next[0]->key;
 	update_load_avg(rq);
 }
 
@@ -1036,6 +1034,8 @@ static void resched_curr(struct rq *rq)
 	if (test_tsk_need_resched(rq->curr))
 		return;
 
+	rq->preempt = rq->curr;
+
 	/* We're doing this without holding the rq lock if it's not task_rq */
 	set_tsk_need_resched(rq->curr);
 
@@ -1122,6 +1122,24 @@ bool cpus_share_cache(int this_cpu, int that_cpu)
 	return (this_rq->cpu_locality[that_cpu] < 3);
 }
 
+/* As per resched_curr but only will resched idle task */
+static inline void resched_idle(struct rq *rq)
+{
+	if (test_tsk_need_resched(rq->idle))
+		return;
+
+	rq->preempt = rq->idle;
+
+	set_tsk_need_resched(rq->idle);
+
+	if (rq_local(rq)) {
+		set_preempt_need_resched();
+		return;
+	}
+
+	smp_send_reschedule(rq->cpu);
+}
+
 static struct rq *resched_best_idle(struct task_struct *p, int cpu)
 {
 	cpumask_t tmpmask;
@@ -1133,13 +1151,7 @@ static struct rq *resched_best_idle(struct task_struct *p, int cpu)
 	rq = cpu_rq(best_cpu);
 	if (!smt_schedule(p, rq))
 		return NULL;
-	/*
-	 * Given we do this lockless, do one last check that the rq is still
-	 * idle by the time we get here
-	 */
-	if (unlikely(!rq_idle(rq)))
-		return NULL;
-	resched_curr(rq);
+	resched_idle(rq);
 	return rq;
 }
 
@@ -1265,6 +1277,7 @@ static inline void deactivate_task(struct task_struct *p, struct rq *rq)
 
 	p->on_rq = 0;
 	atomic_dec(&grq.nr_running);
+	sched_info_dequeued(rq, p);
 }
 
 #ifdef CONFIG_SMP
@@ -1284,7 +1297,7 @@ void set_task_cpu(struct task_struct *p, unsigned int cpu)
 	WARN_ON_ONCE(debug_locks && !(lockdep_is_held(&p->pi_lock) ||
 				      lockdep_is_held(&task_rq(p)->lock)));
 #endif
-	if (task_cpu(p) == cpu)
+	if (p->wake_cpu == cpu)
 		return;
 	trace_sched_migrate_task(p, cpu);
 	perf_event_task_migrate(p);
@@ -1296,7 +1309,7 @@ void set_task_cpu(struct task_struct *p, unsigned int cpu)
 	 */
 	smp_wmb();
 
-	if (task_running(rq, p) && rq->online) {
+	if (task_running(rq, p)) {
 		/*
 		 * We should only be calling this on a running task if we're
 		 * holding rq lock.
@@ -1327,7 +1340,13 @@ void set_task_cpu(struct task_struct *p, unsigned int cpu)
  */
 static inline void take_task(struct rq *rq, int cpu, struct task_struct *p)
 {
-	dequeue_task(task_rq(p), p, 0);
+	struct rq *p_rq = task_rq(p);
+
+	dequeue_task(p_rq, p, DEQUEUE_SAVE);
+	if (p_rq != rq) {
+		sched_info_dequeued(p_rq, p);
+		sched_info_queued(rq, p);
+	}
 	set_task_cpu(p, cpu);
 	dec_qnr();
 }
@@ -1354,7 +1373,7 @@ static inline void return_task(struct task_struct *p, struct rq *rq,
 			p->on_rq = TASK_ON_RQ_MIGRATING;
 		else
 #endif
-			enqueue_task(rq, p, 0);
+			enqueue_task(rq, p, ENQUEUE_RESTORE);
 	}
 }
 
@@ -1539,23 +1558,6 @@ static inline bool needs_other_cpu(struct task_struct *p, int cpu)
 	return false;
 }
 #define cpu_online_map		(*(cpumask_t *)cpu_online_mask)
-#ifdef CONFIG_HOTPLUG_CPU
-/*
- * Check to see if there is a task that is affined only to offline CPUs but
- * still wants runtime. This happens to kernel threads during suspend/halt and
- * disabling of CPUs.
- */
-static inline bool online_cpus(struct task_struct *p)
-{
-	return (likely(cpumask_intersects(&cpu_online_map, &p->cpus_allowed)));
-}
-#else /* CONFIG_HOTPLUG_CPU */
-/* All available CPUs are always online without hotplug. */
-static inline bool online_cpus(struct task_struct *p)
-{
-	return true;
-}
-#endif
 
 static void try_preempt(struct task_struct *p, struct rq *this_rq)
 {
@@ -1741,6 +1743,12 @@ void scheduler_ipi(void)
 	 * this IPI.
 	 */
 	preempt_fold_need_resched();
+
+	if (!idle_cpu(smp_processor_id()) || need_resched())
+		return;
+
+	irq_enter();
+	irq_exit();
 }
 
 static int valid_task_cpu(struct task_struct *p)
@@ -2169,16 +2177,21 @@ static inline void init_schedstats(void) {}
 void wake_up_new_task(struct task_struct *p)
 {
 	struct task_struct *parent, *rq_curr;
+	struct rq *rq, *new_rq;
 	unsigned long flags;
-	struct rq *rq;
 
 	parent = p->parent;
 
 	raw_spin_lock_irqsave(&p->pi_lock, flags);
 	p->state = TASK_RUNNING;
-	if (unlikely(needs_other_cpu(p, task_cpu(p))))
+	/* Task_rq can't change yet on a new task */
+	new_rq = rq = task_rq(p);
+	if (unlikely(needs_other_cpu(p, task_cpu(p)))) {
 		set_task_cpu(p, valid_task_cpu(p));
-	rq = __task_rq_lock(p);
+		new_rq = task_rq(p);
+	}
+
+	double_rq_lock(rq, new_rq);
 	update_clocks(rq);
 	rq_curr = rq->curr;
 
@@ -2194,29 +2207,30 @@ void wake_up_new_task(struct task_struct *p)
 	 * Share the timeslice between parent and child, thus the
 	 * total amount of pending timeslices in the system doesn't change,
 	 * resulting in more scheduling fairness. If it's negative, it won't
-	 * matter since that's the same as being 0. current's time_slice is
-	 * actually in rq_time_slice when it's running, as is its last_ran
-	 * value. rq->rq_deadline is only modified within schedule() so it
-	 * is always equal to current->deadline.
+	 * matter since that's the same as being 0. rq->rq_deadline is only
+	 * modified within schedule() so it is always equal to
+	 * current->deadline.
 	 */
-	p->last_ran = rq->rq_last_ran;
+	p->last_ran = rq_curr->last_ran;
 	if (likely(rq_curr->policy != SCHED_FIFO)) {
-		rq->rq_time_slice /= 2;
-		if (unlikely(rq->rq_time_slice < RESCHED_US)) {
+		rq_curr->time_slice /= 2;
+		if (unlikely(rq_curr->time_slice < RESCHED_US)) {
 			/*
 			 * Forking task has run out of timeslice. Reschedule it and
 			 * start its child with a new time slice and deadline. The
 			 * child will end up running first because its deadline will
 			 * be slightly earlier.
 			 */
-			rq->rq_time_slice = 0;
+			rq_curr->time_slice = 0;
 			__set_tsk_resched(rq_curr);
-			time_slice_expired(p, rq);
+			time_slice_expired(p, new_rq);
 			if (suitable_idle_cpus(p))
 				resched_best_idle(p, task_cpu(p));
+			else if (unlikely(rq != new_rq))
+				try_preempt(p, new_rq);
 		} else {
-			p->time_slice = rq->rq_time_slice;
-			if (rq_curr == parent && !suitable_idle_cpus(p)) {
+			p->time_slice = rq_curr->time_slice;
+			if (rq_curr == parent && rq == new_rq && !suitable_idle_cpus(p)) {
 				/*
 				 * The VM isn't cloned, so we're in a good position to
 				 * do child-runs-first in anticipation of an exec. This
@@ -2224,13 +2238,14 @@ void wake_up_new_task(struct task_struct *p)
 				 */
 				__set_tsk_resched(rq_curr);
 			} else
-				try_preempt(p, rq);
+				try_preempt(p, new_rq);
 		}
 	} else {
-		time_slice_expired(p, rq);
-		try_preempt(p, rq);
+		time_slice_expired(p, new_rq);
+		try_preempt(p, new_rq);
 	}
-	task_rq_unlock(rq, p, &flags);
+	double_rq_unlock(rq, new_rq);
+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
 }
 
 #ifdef CONFIG_PREEMPT_NOTIFIERS
@@ -3006,7 +3021,7 @@ static void pc_user_time(struct rq *rq, struct task_struct *p,
 static void
 update_cpu_clock_tick(struct rq *rq, struct task_struct *p)
 {
-	long account_ns = rq->clock_task - rq->rq_last_ran;
+	long account_ns = rq->clock_task - p->last_ran;
 	struct task_struct *idle = rq->idle;
 	unsigned long account_pc;
 
@@ -3029,14 +3044,14 @@ update_cpu_clock_tick(struct rq *rq, struct task_struct *p)
 
 ts_account:
 	/* time_slice accounting is done in usecs to avoid overflow on 32bit */
-	if (rq->rq_policy != SCHED_FIFO && p != idle) {
+	if (p->policy != SCHED_FIFO && p != idle) {
 		s64 time_diff = rq->clock - rq->timekeep_clock;
 
 		niffy_diff(&time_diff, 1);
-		rq->rq_time_slice -= NS_TO_US(time_diff);
+		p->time_slice -= NS_TO_US(time_diff);
 	}
 
-	rq->rq_last_ran = rq->clock_task;
+	p->last_ran = rq->clock_task;
 	rq->timekeep_clock = rq->clock;
 }
 
@@ -3048,7 +3063,7 @@ ts_account:
 static void
 update_cpu_clock_switch(struct rq *rq, struct task_struct *p)
 {
-	long account_ns = rq->clock_task - rq->rq_last_ran;
+	long account_ns = rq->clock_task - p->last_ran;
 	struct task_struct *idle = rq->idle;
 	unsigned long account_pc;
 
@@ -3066,14 +3081,14 @@ update_cpu_clock_switch(struct rq *rq, struct task_struct *p)
 
 ts_account:
 	/* time_slice accounting is done in usecs to avoid overflow on 32bit */
-	if (rq->rq_policy != SCHED_FIFO && p != idle) {
+	if (p->policy != SCHED_FIFO && p != idle) {
 		s64 time_diff = rq->clock - rq->timekeep_clock;
 
 		niffy_diff(&time_diff, 1);
-		rq->rq_time_slice -= NS_TO_US(time_diff);
+		p->time_slice -= NS_TO_US(time_diff);
 	}
 
-	rq->rq_last_ran = rq->clock_task;
+	p->last_ran = rq->clock_task;
 	rq->timekeep_clock = rq->clock;
 }
 
@@ -3094,7 +3109,7 @@ static inline u64 do_task_delta_exec(struct task_struct *p, struct rq *rq)
 	 */
 	if (p == rq->curr && task_on_rq_queued(p)) {
 		update_rq_clock(rq);
-		ns = rq->clock_task - rq->rq_last_ran;
+		ns = rq->clock_task - p->last_ran;
 		if (unlikely((s64)ns < 0))
 			ns = 0;
 	}
@@ -3305,31 +3320,31 @@ static inline void no_iso_tick(struct rq *rq, int ticks)
 /* This manages tasks that have run out of timeslice during a scheduler_tick */
 static void task_running_tick(struct rq *rq)
 {
-	struct task_struct *p;
+	struct task_struct *p = rq->curr;
 
 	/*
 	 * If a SCHED_ISO task is running we increment the iso_ticks. In
 	 * order to prevent SCHED_ISO tasks from causing starvation in the
 	 * presence of true RT tasks we account those as iso_ticks as well.
 	 */
-	if (rt_queue(rq) || rq_running_iso(rq))
+	if (rt_task(p) || task_running_iso(p))
 		iso_tick(rq);
 	else
 		no_iso_tick(rq, 1);
 
 	/* SCHED_FIFO tasks never run out of timeslice. */
-	if (rq->rq_policy == SCHED_FIFO)
+	if (p->policy == SCHED_FIFO)
 		return;
 
-	if (iso_queue(rq)) {
-		if (rq_running_iso(rq)) {
+	if (iso_task(p)) {
+		if (task_running_iso(p)) {
 			if (rq->iso_refractory) {
-			/*
-			 * SCHED_ISO task is running as RT and limit
-			 * has been hit. Force it to reschedule as
-			 * SCHED_NORMAL by zeroing its time_slice
-			 */
-			rq->rq_time_slice = 0;
+				/*
+				 * SCHED_ISO task is running as RT and limit
+				 * has been hit. Force it to reschedule as
+				 * SCHED_NORMAL by zeroing its time_slice
+				 */
+				p->time_slice = 0;
 			}
 		} else if (!rq->iso_refractory) {
 			/* Can now run again ISO. Reschedule to pick up prio */
@@ -3343,16 +3358,9 @@ static void task_running_tick(struct rq *rq)
 	 * run out of time slice in the interim. Otherwise, if they have
 	 * less than RESCHED_US μs of time slice left they will be rescheduled.
 	 */
-	if (rq->dither) {
-		if (rq->rq_time_slice > HALF_JIFFY_US)
-			return;
-		else
-			rq->rq_time_slice = 0;
-	} else if (rq->rq_time_slice >= RESCHED_US)
-			return;
+	if (p->time_slice - rq->dither >= RESCHED_US)
+		return;
 out_resched:
-	p = rq->curr;
-
 	rq_lock(rq);
 	__set_tsk_resched(p);
 	rq_unlock(rq);
@@ -3510,24 +3518,29 @@ static inline void check_deadline(struct task_struct *p, struct rq *rq)
  * is thus done here in an extremely simple first come best fit manner.
  *
  * This iterates over runqueues in cache locality order. In interactive mode
- * it iterates over all CPUs and finds the task with the earliest deadline.
+ * it iterates over all CPUs and finds the task with the best key/deadline.
  * In non-interactive mode it will only take a task if it's from the current
  * runqueue or a runqueue with more tasks than the current one with a better
- * deadline.
+ * key/deadline.
  */
 static inline struct
 task_struct *earliest_deadline_task(struct rq *rq, int cpu, struct task_struct *idle)
 {
 	struct task_struct *edt = idle;
-	u64 earliest_deadline = ~0ULL;
 	struct rq *locked = NULL;
 	int i, best_entries = 0;
+	u64 best_key = ~0ULL;
 
 	for (i = 0; i < num_possible_cpus(); i++) {
 		struct rq *other_rq = rq_order(rq, i);
 		int entries = other_rq->sl->entries;
 		struct task_struct *p;
+		u64 key;
 
+		/*
+		 * Check for queued entres lockless first. The local runqueue
+		 * is locked so entries will always be accurate.
+		 */
 		if (!sched_interactive) {
 			if (entries <= best_entries)
 				continue;
@@ -3536,8 +3549,13 @@ task_struct *earliest_deadline_task(struct rq *rq, int cpu, struct task_struct *
 
 		/* if (i) implies other_rq != rq */
 		if (i) {
+			/* Check for best id queued lockless first */
+			if (other_rq->best_key >= best_key)
+				continue;
+
 			if (unlikely(!trylock_rq(rq, other_rq)))
 				continue;
+
 			/* Need to reevaluate entries after locking */
 			entries = other_rq->sl->entries;
 			if (unlikely(!entries)) {
@@ -3545,14 +3563,15 @@ task_struct *earliest_deadline_task(struct rq *rq, int cpu, struct task_struct *
 				continue;
 			}
 		}
-		p = other_rq->node.next[0]->value;
-
-		if (!deadline_before(p->deadline, earliest_deadline)) {
+		key = other_rq->node.next[0]->key;
+		/* Reevaluate key after locking */
+		if (unlikely(key >= best_key)) {
 			if (i)
 				unlock_rq(other_rq);
 			continue;
 		}
 
+		p = other_rq->node.next[0]->value;
 		if (!smt_schedule(p, rq)) {
 			if (i)
 				unlock_rq(other_rq);
@@ -3571,7 +3590,7 @@ task_struct *earliest_deadline_task(struct rq *rq, int cpu, struct task_struct *
 		}
 
 		best_entries = entries;
-		earliest_deadline = p->deadline;
+		best_key = key;
 		edt = p;
 	}
 
@@ -3637,10 +3656,8 @@ static inline void schedule_debug(struct task_struct *prev)
  */
 static inline void set_rq_task(struct rq *rq, struct task_struct *p)
 {
-	rq->rq_time_slice = p->time_slice;
 	rq->rq_deadline = p->deadline;
-	rq->rq_last_ran = p->last_ran = rq->clock_task;
-	rq->rq_policy = p->policy;
+	p->last_ran = rq->clock_task;
 	rq->rq_prio = p->prio;
 #ifdef CONFIG_SMT_NICE
 	rq->rq_mm = p->mm;
@@ -3651,7 +3668,6 @@ static inline void set_rq_task(struct rq *rq, struct task_struct *p)
 static void reset_rq_task(struct rq *rq, struct task_struct *p)
 {
 	rq->rq_deadline = p->deadline;
-	rq->rq_policy = p->policy;
 	rq->rq_prio = p->prio;
 #ifdef CONFIG_SMT_NICE
 	rq->rq_smt_bias = p->smt_bias;
@@ -3763,6 +3779,7 @@ static void __sched notrace __schedule(bool preempt)
 	cpu = smp_processor_id();
 	rq = cpu_rq(cpu);
 	prev = rq->curr;
+	idle = rq->idle;
 
 	/*
 	 * do_exit() calls schedule() with preemption disabled as an exception;
@@ -3787,6 +3804,22 @@ static void __sched notrace __schedule(bool preempt)
 	 */
 	smp_mb__before_spinlock();
 	rq_lock(rq);
+#ifdef CONFIG_SMP
+	if (rq->preempt) {
+		/*
+		 * Make sure resched_curr hasn't triggered a preemption
+		 * locklessly on a task that has since scheduled away. Spurious
+		 * wakeup of idle is okay though.
+		 */
+		if (unlikely(preempt && prev != idle && !test_tsk_need_resched(prev))) {
+			rq->preempt = NULL;
+			clear_preempt_need_resched();
+			rq_unlock_irq(rq);
+			return;
+		}
+		rq->preempt = NULL;
+	}
+#endif
 
 	switch_count = &prev->nivcsw;
 	if (!preempt && prev->state) {
@@ -3821,20 +3854,15 @@ static void __sched notrace __schedule(bool preempt)
 	update_clocks(rq);
 	update_cpu_clock_switch(rq, prev);
 	if (rq->clock - rq->last_tick > HALF_JIFFY_NS)
-		rq->dither = false;
+		rq->dither = 0;
 	else
-		rq->dither = true;
+		rq->dither = HALF_JIFFY_US;
 
 	clear_tsk_need_resched(prev);
 	clear_preempt_need_resched();
 
-	idle = rq->idle;
 	if (idle != prev) {
-		/* Update all the information stored on struct rq */
-		prev->time_slice = rq->rq_time_slice;
-		prev->deadline = rq->rq_deadline;
 		check_deadline(prev, rq);
-		prev->last_ran = rq->clock_task;
 		return_task(prev, rq, cpu, deactivate);
 	}
 
@@ -5059,6 +5087,7 @@ SYSCALL_DEFINE0(sched_yield)
 
 	p = current;
 	rq = this_rq_lock();
+	time_slice_expired(p, rq);
 	schedstat_inc(task_rq(p), yld_count);
 
 	/*
@@ -5173,6 +5202,7 @@ EXPORT_SYMBOL(yield);
  */
 int __sched yield_to(struct task_struct *p, bool preempt)
 {
+	struct task_struct *rq_p;
 	struct rq *rq, *p_rq;
 	unsigned long flags;
 	int yielded = 0;
@@ -5192,18 +5222,19 @@ again:
 	}
 
 	double_rq_lock(rq, p_rq);
-	if (task_rq(p) != p_rq) {
+	if (unlikely(task_rq(p) != p_rq)) {
 		double_rq_unlock(rq, p_rq);
 		goto again;
 	}
 
 	yielded = 1;
-	if (p->deadline > rq->rq_deadline)
-		p->deadline = rq->rq_deadline;
-	p->time_slice += rq->rq_time_slice;
-	rq->rq_time_slice = 0;
+	rq_p = rq->curr;
+	if (p->deadline > rq_p->deadline)
+		p->deadline = rq_p->deadline;
+	p->time_slice += rq_p->time_slice;
 	if (p->time_slice > timeslice())
 		p->time_slice = timeslice();
+	time_slice_expired(rq_p, rq);
 	if (preempt && rq != p_rq)
 		resched_task(p_rq->curr);
 	double_rq_unlock(rq, p_rq);
@@ -5818,15 +5849,17 @@ static void bind_zero(int src_cpu)
 
 	do_each_thread(t, p) {
 		if (cpumask_test_cpu(src_cpu, tsk_cpus_allowed(p))) {
-			cpumask_clear_cpu(src_cpu, tsk_cpus_allowed(p));
-			cpumask_set_cpu(0, tsk_cpus_allowed(p));
+			bool local = (task_cpu(p) == src_cpu);
+
+			/* task_running is the cpu stopper thread */
+			if (local && task_running(task_rq(p), p))
+				continue;
+			atomic_clear_cpu(src_cpu, tsk_cpus_allowed(p));
+			atomic_set_cpu(0, tsk_cpus_allowed(p));
 			p->zerobound = true;
 			bound++;
-			if (task_cpu(p) == src_cpu) {
+			if (local)
 				set_task_cpu(p, 0);
-				if (task_running(task_rq(p), p))
-					resched_task(p);
-			}
 		}
 	} while_each_thread(t, p);
 
@@ -7552,7 +7585,7 @@ void __init sched_init(void)
 		rq->last_jiffy = jiffies;
 		rq->user_pc = rq->nice_pc = rq->softirq_pc = rq->system_pc =
 			      rq->iowait_pc = rq->idle_pc = 0;
-		rq->dither = false;
+		rq->dither = 0;
 		set_rq_task(rq, &init_task);
 		rq->iso_ticks = 0;
 		rq->iso_refractory = false;
@@ -7800,21 +7833,7 @@ void vtime_account_system_irqsafe(struct task_struct *tsk)
 	local_irq_restore(flags);
 }
 EXPORT_SYMBOL_GPL(vtime_account_system_irqsafe);
-
-#ifndef __ARCH_HAS_VTIME_TASK_SWITCH
-void vtime_task_switch(struct task_struct *prev)
-{
-	if (is_idle_task(prev))
-		vtime_account_idle(prev);
-	else
-		vtime_account_system(prev);
-
-	vtime_account_user(prev);
-	arch_vtime_task_switch(prev);
-}
-#endif
-
-#else
+#else /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
 /*
  * Perform (stime * rtime) / total, but avoid multiplication overflow by
  * losing precision when the numbers are big.
@@ -7936,7 +7955,7 @@ void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime
 	thread_group_cputime(p, &cputime);
 	cputime_adjust(&cputime, &p->signal->prev_cputime, ut, st);
 }
-#endif
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
 
 void init_idle_bootup_task(struct task_struct *idle)
 {}
diff --git a/kernel/sched/MuQSS.h b/kernel/sched/MuQSS.h
index d2d6696..f8d0d58 100644
--- a/kernel/sched/MuQSS.h
+++ b/kernel/sched/MuQSS.h
@@ -22,11 +22,11 @@ struct rq {
 
 	/* Stored data about rq->curr to work outside rq lock */
 	u64 rq_deadline;
-	unsigned int rq_policy;
-	int rq_time_slice;
-	u64 rq_last_ran;
 	int rq_prio;
 
+	/* Best queued id for use outside lock */
+	u64 best_key;
+
 	unsigned long last_scheduler_tick; /* Last jiffy this RQ ticked */
 	unsigned long last_jiffy; /* Last jiffy this RQ updated rq clock */
 	u64 niffies; /* Last time this RQ updated rq clock */
@@ -47,6 +47,8 @@ struct rq {
 	skiplist_node node;
 	skiplist *sl;
 #ifdef CONFIG_SMP
+	struct task_struct *preempt; /* Preempt triggered on this task */
+
 	int cpu;		/* cpu of this runqueue */
 	bool online;
 
@@ -78,7 +80,7 @@ struct rq {
 
 	u64 clock, old_clock, last_tick;
 	u64 clock_task;
-	bool dither;
+	int dither;
 
 	int iso_ticks;
 	bool iso_refractory;