Fix missing cpuset code from suspend/resume sequences. -ck Index: linux-3.12-ck1/kernel/sched/bfs.c =================================================================== --- linux-3.12-ck1.orig/kernel/sched/bfs.c 2013-12-02 09:56:58.000000000 +1100 +++ linux-3.12-ck1/kernel/sched/bfs.c 2013-12-02 10:39:35.133023202 +1100 @@ -5390,24 +5390,21 @@ extern struct task_struct *cpu_stopper_t static void break_sole_affinity(int src_cpu, struct task_struct *idle) { struct task_struct *p, *t, *stopper; + int unbound = 0; stopper = per_cpu(cpu_stopper_task, src_cpu); do_each_thread(t, p) { - if (p != stopper && p != idle && !online_cpus(p)) { - cpumask_copy(tsk_cpus_allowed(p), cpu_possible_mask); - /* - * Don't tell them about moving exiting tasks or - * kernel threads (both mm NULL), since they never - * leave kernel. - */ - if (p->mm && printk_ratelimit()) { - printk(KERN_INFO "process %d (%s) no " - "longer affine to cpu %d\n", - task_pid_nr(p), p->comm, src_cpu); - } + if (p != stopper && !online_cpus(p)) { + cpuset_cpus_allowed_fallback(p); + unbound++; } clear_sticky(p); } while_each_thread(t, p); + + if (unbound) { + printk(KERN_INFO "Broke affinity for %d processes to cpu %d\n", + unbound, src_cpu); + } } /* @@ -6822,34 +6819,66 @@ match2: mutex_unlock(&sched_domains_mutex); } +static int num_cpus_frozen; /* used to mark begin/end of suspend/resume */ + /* * Update cpusets according to cpu_active mask. If cpusets are * disabled, cpuset_update_active_cpus() becomes a simple wrapper * around partition_sched_domains(). + * + * If we come here as part of a suspend/resume, don't touch cpusets because we + * want to restore it back to its original state upon resume anyway. */ static int cpuset_cpu_active(struct notifier_block *nfb, unsigned long action, void *hcpu) { - switch (action & ~CPU_TASKS_FROZEN) { + switch (action) { + case CPU_ONLINE_FROZEN: + case CPU_DOWN_FAILED_FROZEN: + + /* + * num_cpus_frozen tracks how many CPUs are involved in suspend + * resume sequence. As long as this is not the last online + * operation in the resume sequence, just build a single sched + * domain, ignoring cpusets. + */ + num_cpus_frozen--; + if (likely(num_cpus_frozen)) { + partition_sched_domains(1, NULL, NULL); + break; + } + + /* + * This is the last CPU online operation. So fall through and + * restore the original sched domains by considering the + * cpuset configurations. + */ + case CPU_ONLINE: case CPU_DOWN_FAILED: cpuset_update_active_cpus(true); - return NOTIFY_OK; + break; default: return NOTIFY_DONE; } + return NOTIFY_OK; } static int cpuset_cpu_inactive(struct notifier_block *nfb, unsigned long action, void *hcpu) { - switch (action & ~CPU_TASKS_FROZEN) { + switch (action) { case CPU_DOWN_PREPARE: cpuset_update_active_cpus(false); - return NOTIFY_OK; + break; + case CPU_DOWN_PREPARE_FROZEN: + num_cpus_frozen++; + partition_sched_domains(1, NULL, NULL); + break; default: return NOTIFY_DONE; } + return NOTIFY_OK; } #if defined(CONFIG_SCHED_SMT) || defined(CONFIG_SCHED_MC)