I may have a modest enhancement suggestion concerning the update_cpu_clock() function. Note that I haven't taken any measurement and I have no idea whether it is good or bad. Just sharing this though. This started with the realization that this function would be called not in a 50/50 proportion but quite often with alternating tick param value. (assuming much more context switches than ticks) This will induce possibly processor branch mispredictions and pipeline flushes (at least that is how Intel and PowerPC processors used to work...). So I just tried to see how it would look if I did split the outermost if/else blocks in 2 different functions. As a bonus this has allowed me to simplify sligthly the logic regarding to (p==idle) evaluation. pros: 1 param passing overhead removed 5-6 logic evaluation + conditional jumps removed cons: small code duplication Olivier Langlois Update version number Con Kolivas --- kernel/sched/bfs.c | 84 +++++++++++++++++++++++++++++++++++------------------ 1 file changed, 56 insertions(+), 28 deletions(-) Index: linux-3.8-bfs/kernel/sched/bfs.c =================================================================== --- linux-3.8-bfs.orig/kernel/sched/bfs.c 2013-03-03 21:20:50.957613110 +1100 +++ linux-3.8-bfs/kernel/sched/bfs.c 2013-03-03 21:23:38.531518148 +1100 @@ -137,7 +137,7 @@ void print_scheduler_version(void) { - printk(KERN_INFO "BFS CPU scheduler v0.427 by Con Kolivas.\n"); + printk(KERN_INFO "BFS CPU scheduler v0.428 by Con Kolivas.\n"); } /* @@ -2508,44 +2508,34 @@ static void pc_user_time(struct rq *rq, #define NS_TO_PC(NS) (NS * 128 / JIFFY_NS) /* - * This is called on clock ticks and on context switches. + * This is called on clock ticks. * Bank in p->sched_time the ns elapsed since the last tick or switch. * CPU scheduler quota accounting is also performed here in microseconds. + * It is inline because it is invoked inconditionally from only 1 location. */ -static void -update_cpu_clock(struct rq *rq, struct task_struct *p, bool tick) +static inline void +update_cpu_clock_tick(struct rq *rq, struct task_struct *p) { long account_ns = rq->clock - rq->timekeep_clock; struct task_struct *idle = rq->idle; unsigned long account_pc; - if (unlikely(account_ns < 0)) + if (unlikely(account_ns < 0) || steal_account_process_tick()) goto ts_account; account_pc = NS_TO_PC(account_ns); - if (tick) { - /* Accurate tick timekeeping */ - if (steal_account_process_tick()) - goto ts_account; - - if (user_mode(get_irq_regs())) - pc_user_time(rq, p, account_pc, account_ns); - else if (p != idle || (irq_count() != HARDIRQ_OFFSET)) - pc_system_time(rq, p, HARDIRQ_OFFSET, - account_pc, account_ns); - else - pc_idle_time(rq, idle, account_pc); + /* Accurate tick timekeeping */ + if (user_mode(get_irq_regs())) + pc_user_time(rq, p, account_pc, account_ns); + else if (p != idle || (irq_count() != HARDIRQ_OFFSET)) + pc_system_time(rq, p, HARDIRQ_OFFSET, + account_pc, account_ns); + else + pc_idle_time(rq, idle, account_pc); - if (sched_clock_irqtime) - irqtime_account_hi_si(); - } else { - /* Accurate subtick timekeeping */ - if (p == idle) - pc_idle_time(rq, idle, account_pc); - else - pc_user_time(rq, p, account_pc, account_ns); - } + if (sched_clock_irqtime) + irqtime_account_hi_si(); if (p != idle) account_group_exec_runtime(p, account_ns); @@ -2563,6 +2553,44 @@ ts_account: } /* + * This is called on context switches. + * Bank in p->sched_time the ns elapsed since the last tickk or switch. + * CPU scheduler quota accounting is also performed here in microseconds. + * It is inline because it is invoked inconditionally from only 1 location. + */ +static inline void +update_cpu_clock_switch(struct rq *rq, struct task_struct *p) +{ + long account_ns = rq->clock - rq->timekeep_clock; + struct task_struct *idle = rq->idle; + unsigned long account_pc; + + if (unlikely(account_ns < 0)) + goto ts_account; + + account_pc = NS_TO_PC(account_ns); + + /* Accurate subtick timekeeping */ + if (p != idle) { + pc_user_time(rq, p, account_pc, account_ns); + account_group_exec_runtime(p, account_ns); + } + else + pc_idle_time(rq, idle, account_pc); + +ts_account: + /* time_slice accounting is done in usecs to avoid overflow on 32bit */ + if (rq->rq_policy != SCHED_FIFO && p != idle) { + s64 time_diff = rq->clock - rq->rq_last_ran; + + niffy_diff(&time_diff, 1); + rq->rq_time_slice -= NS_TO_US(time_diff); + } + + rq->rq_last_ran = rq->timekeep_clock = rq->clock; +} + +/* * Return any ns on the sched_clock that have not yet been accounted in * @p in case that task is currently running. * @@ -2894,7 +2922,7 @@ void scheduler_tick(void) sched_clock_tick(); /* grq lock not grabbed, so only update rq clock */ update_rq_clock(rq); - update_cpu_clock(rq, rq->curr, true); + update_cpu_clock_tick(rq, rq->curr); if (!rq_idle(rq)) task_running_tick(rq); else @@ -3315,7 +3343,7 @@ need_resched: } update_clocks(rq); - update_cpu_clock(rq, prev, false); + update_cpu_clock_switch(rq, prev); if (rq->clock - rq->last_tick > HALF_JIFFY_NS) rq->dither = false; else