Remove affinity for CPUs that are being taken offline and enable their affinity for CPU0, setting a zerobound bool which is disabled only once all the CPUs are back online. Add messages for how many tasks are un/bound. -ck Index: linux-3.12-ck1/kernel/sched/bfs.c =================================================================== --- linux-3.12-ck1.orig/kernel/sched/bfs.c 2013-11-18 21:01:17.869438647 +1100 +++ linux-3.12-ck1/kernel/sched/bfs.c 2013-11-29 15:16:43.641941448 +1100 @@ -5385,29 +5385,63 @@ #ifdef CONFIG_HOTPLUG_CPU extern struct task_struct *cpu_stopper_task; -/* Run through task list and find tasks affined to just the dead cpu, then - * allocate a new affinity */ -static void break_sole_affinity(int src_cpu, struct task_struct *idle) +/* Run through task list and find tasks affined to the dead cpu, then remove + * that cpu from the list, enable cpu0 and set the zerobound flag. */ +static void bind_zero(int src_cpu) { struct task_struct *p, *t, *stopper; + int bound = 0; + + if (src_cpu == 0) + return; stopper = per_cpu(cpu_stopper_task, src_cpu); do_each_thread(t, p) { - if (p != stopper && p != idle && !online_cpus(p)) { - cpumask_copy(tsk_cpus_allowed(p), cpu_possible_mask); - /* - * Don't tell them about moving exiting tasks or - * kernel threads (both mm NULL), since they never - * leave kernel. - */ - if (p->mm && printk_ratelimit()) { - printk(KERN_INFO "process %d (%s) no " - "longer affine to cpu %d\n", - task_pid_nr(p), p->comm, src_cpu); - } + if (p != stopper && cpu_isset(src_cpu, *tsk_cpus_allowed(p))) { + cpumask_clear_cpu(src_cpu, tsk_cpus_allowed(p)); + cpumask_set_cpu(0, tsk_cpus_allowed(p)); + p->zerobound = true; + bound++; } clear_sticky(p); } while_each_thread(t, p); + if (bound) { + printk(KERN_INFO "Removed affinity for %d processes to cpu %d\n", + bound, src_cpu); + } +} + +/* Find processes with the zerobound flag and reenable their affinity for the + * CPU coming alive. */ +static void unbind_zero(int src_cpu) +{ + int unbound = 0, zerobound = 0; + struct task_struct *p, *t; + + if (src_cpu == 0) + return; + + do_each_thread(t, p) { + if (p->mm && p->zerobound) { + unbound++; + cpumask_set_cpu(src_cpu, tsk_cpus_allowed(p)); + /* Once every CPU affinity has been re-enabled, remove + * the zerobound flag */ + if (cpumask_subset(cpu_possible_mask, tsk_cpus_allowed(p))) { + p->zerobound = false; + zerobound++; + } + } + } while_each_thread(t, p); + + if (unbound) { + printk(KERN_INFO "Added affinity for %d processes to cpu %d\n", + unbound, src_cpu); + } + if (zerobound) { + printk(KERN_INFO "Released forced binding to cpu0 for %d processes\n", + zerobound); + } } /* @@ -5424,7 +5458,10 @@ switch_mm(mm, &init_mm, current); mmdrop(mm); } +#else /* CONFIG_HOTPLUG_CPU */ +static void unbind_zero(int src_cpu) {} #endif /* CONFIG_HOTPLUG_CPU */ + void sched_set_stop_task(int cpu, struct task_struct *stop) { struct sched_param stop_param = { .sched_priority = STOP_PRIO }; @@ -5663,6 +5700,7 @@ set_rq_online(rq); } + unbind_zero(cpu); grq.noc = num_online_cpus(); grq_unlock_irqrestore(&flags); break; @@ -5683,7 +5721,7 @@ BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); set_rq_offline(rq); } - break_sole_affinity(cpu, idle); + bind_zero(cpu); grq.noc = num_online_cpus(); grq_unlock_irqrestore(&flags); break; @@ -5708,7 +5746,7 @@ switch (action & ~CPU_TASKS_FROZEN) { case CPU_STARTING: case CPU_DOWN_FAILED: - set_cpu_active((long)hcpu, true); + set_cpu_active((long)hcpu, false); return NOTIFY_OK; default: return NOTIFY_DONE; Index: linux-3.12-ck1/include/linux/sched.h =================================================================== --- linux-3.12-ck1.orig/include/linux/sched.h 2013-11-18 21:01:17.861438647 +1100 +++ linux-3.12-ck1/include/linux/sched.h 2013-11-29 13:59:31.977757249 +1100 @@ -1044,6 +1044,9 @@ u64 sched_time; /* sched_clock time spent running */ #ifdef CONFIG_SMP bool sticky; /* Soft affined flag */ +#ifdef CONFIG_HOTPLUG_CPU + bool zerobound; /* Bound to CPU0 for hotplug */ +#endif #endif unsigned long rt_timeout; #else /* CONFIG_SCHED_BFS */