Vous avez reçu un message "Your GitLab account has been locked ..." ? Pas d'inquiétude : lisez cet article https://docs.gricad-pages.univ-grenoble-alpes.fr/help/unlock/

Commit b724d4dc authored by aumgn's avatar aumgn
Browse files

[build/linux-deb] Add wastedcores fixes for 4.9

parent 2851e89b
4.9.x
\ No newline at end of file
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index c242944f5cbd..60ed5e11bd05 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6731,6 +6731,8 @@ struct sg_lb_stats {
unsigned int sum_nr_running; /* Nr tasks running in the group */
unsigned int idle_cpus;
unsigned int group_weight;
+ unsigned long min_load;
+ unsigned long max_load;
enum group_type group_type;
int group_no_capacity;
#ifdef CONFIG_NUMA_BALANCING
@@ -7030,6 +7032,8 @@ static inline void update_sg_lb_stats(struct lb_env *env,
bool *overload)
{
unsigned long load;
+ unsigned long min_load = ULONG_MAX;
+ unsigned long max_load = 0UL;
int i, nr_running;
memset(sgs, 0, sizeof(*sgs));
@@ -7043,6 +7047,11 @@ static inline void update_sg_lb_stats(struct lb_env *env,
else
load = source_load(i, load_idx);
+ if (load < min_load)
+ min_load = load;
+ if (load > max_load)
+ max_load = load;
+
sgs->group_load += load;
sgs->group_util += cpu_util(i);
sgs->sum_nr_running += rq->cfs.h_nr_running;
@@ -7067,6 +7076,9 @@ static inline void update_sg_lb_stats(struct lb_env *env,
sgs->group_capacity = group->sgc->capacity;
sgs->avg_load = (sgs->group_load*SCHED_CAPACITY_SCALE) / sgs->group_capacity;
+ sgs->min_load = min_load;
+ sgs->max_load = max_load;
+
if (sgs->sum_nr_running)
sgs->load_per_task = sgs->sum_weighted_load / sgs->sum_nr_running;
@@ -7096,14 +7108,15 @@ static bool update_sd_pick_busiest(struct lb_env *env,
{
struct sg_lb_stats *busiest = &sds->busiest_stat;
- if (sgs->group_type > busiest->group_type)
- return true;
+ /* Not sure we want to keep that or not */
+ /*if (sgs->group_type > busiest->group_type)
+ return true;*/
- if (sgs->group_type < busiest->group_type)
- return false;
+ if (sgs->min_load <= busiest->min_load)
+ return true;
- if (sgs->avg_load <= busiest->avg_load)
- return false;
+ if (sgs->group_type == group_imbalanced)
+ return true;
/* This is the busiest node in its class. */
if (!(env->sd->flags & SD_ASYM_PACKING))
@@ -7369,64 +7382,14 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
local = &sds->local_stat;
busiest = &sds->busiest_stat;
- if (busiest->group_type == group_imbalanced) {
- /*
- * In the group_imb case we cannot rely on group-wide averages
- * to ensure cpu-load equilibrium, look at wider averages. XXX
- */
- busiest->load_per_task =
- min(busiest->load_per_task, sds->avg_load);
- }
-
- /*
- * Avg load of busiest sg can be less and avg load of local sg can
- * be greater than avg load across all sgs of sd because avg load
- * factors in sg capacity and sgs with smaller group_type are
- * skipped when updating the busiest sg:
- */
- if (busiest->avg_load <= sds->avg_load ||
- local->avg_load >= sds->avg_load) {
+ if (local->min_load >= busiest->max_load) {
env->imbalance = 0;
- return fix_small_imbalance(env, sds);
}
-
- /*
- * If there aren't any idle cpus, avoid creating some.
- */
- if (busiest->group_type == group_overloaded &&
- local->group_type == group_overloaded) {
- load_above_capacity = busiest->sum_nr_running * SCHED_CAPACITY_SCALE;
- if (load_above_capacity > busiest->group_capacity) {
- load_above_capacity -= busiest->group_capacity;
- load_above_capacity *= scale_load_down(NICE_0_LOAD);
- load_above_capacity /= busiest->group_capacity;
- } else
- load_above_capacity = ~0UL;
+ else {
+ env->imbalance = (busiest->max_load - local->min_load) / 2;
}
- /*
- * We're trying to get all the cpus to the average_load, so we don't
- * want to push ourselves above the average load, nor do we wish to
- * reduce the max loaded cpu below the average load. At the same time,
- * we also don't want to reduce the group load below the group
- * capacity. Thus we look for the minimum possible imbalance.
- */
- max_pull = min(busiest->avg_load - sds->avg_load, load_above_capacity);
-
- /* How much load to actually move to equalise the imbalance */
- env->imbalance = min(
- max_pull * busiest->group_capacity,
- (sds->avg_load - local->avg_load) * local->group_capacity
- ) / SCHED_CAPACITY_SCALE;
-
- /*
- * if *imbalance is less than the average load per runnable task
- * there is no guarantee that any tasks will be moved so we'll have
- * a think about bumping its value to force at least one task to be
- * moved
- */
- if (env->imbalance < busiest->load_per_task)
- return fix_small_imbalance(env, sds);
+ return;
}
/******* find_busiest_group() helpers end here *********************/
@@ -7481,41 +7444,11 @@ static struct sched_group *find_busiest_group(struct lb_env *env)
busiest->group_no_capacity)
goto force_balance;
- /*
- * If the local group is busier than the selected busiest group
- * don't try and pull any tasks.
- */
- if (local->avg_load >= busiest->avg_load)
- goto out_balanced;
-
- /*
- * Don't pull any tasks if this group is already above the domain
- * average load.
- */
- if (local->avg_load >= sds.avg_load)
+ if (local->min_load < busiest->min_load)
+ goto force_balance;
+ else
goto out_balanced;
- if (env->idle == CPU_IDLE) {
- /*
- * This cpu is idle. If the busiest group is not overloaded
- * and there is no imbalance between this and busiest group
- * wrt idle cpus, it is balanced. The imbalance becomes
- * significant if the diff is greater than 1 otherwise we
- * might end up to just move the imbalance on another group
- */
- if ((busiest->group_type != group_overloaded) &&
- (local->idle_cpus <= (busiest->idle_cpus + 1)))
- goto out_balanced;
- } else {
- /*
- * In the CPU_NEWLY_IDLE, CPU_NOT_IDLE cases, use
- * imbalance_pct to be conservative.
- */
- if (100 * busiest->avg_load <=
- env->sd->imbalance_pct * local->avg_load)
- goto out_balanced;
- }
-
force_balance:
/* Looks like there is an imbalance. Compute it */
calculate_imbalance(env, &sds);
Seems to be fixed by mainstream commit 8f37961cf22304fb286c7604d3a7f6104dcc1283
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index c242944f5cbd..5c3c30664790 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5631,6 +5631,10 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
int want_affine = 0;
int sync = wake_flags & WF_SYNC;
+ int _cpu;
+ u64 oldest_idle_stamp = 0xfffffffffffffff;
+ int oldest_idle_stamp_cpu;
+
if (sd_flag & SD_BALANCE_WAKE) {
record_wakee(p);
want_affine = !wake_wide(p) && !wake_cap(p, cpu, prev_cpu)
@@ -5638,6 +5642,28 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
}
rcu_read_lock();
+
+ if (!cpu_rq(prev_cpu)->nr_running) {
+ new_cpu = prev_cpu;
+ goto unlock;
+ }
+
+ for_each_online_cpu(_cpu) {
+ if (!cpumask_test_cpu(_cpu, tsk_cpus_allowed(p)) ||
+ cpu_rq(_cpu)->nr_running)
+ continue;
+
+ if (cpu_rq(_cpu)->idle_stamp < oldest_idle_stamp) {
+ oldest_idle_stamp = cpu_rq(_cpu)->idle_stamp;
+ oldest_idle_stamp_cpu = _cpu;
+ }
+ }
+
+ if (oldest_idle_stamp != 0xfffffffffffffff) {
+ new_cpu = oldest_idle_stamp_cpu;
+ goto unlock;
+ }
+
for_each_domain(cpu, tmp) {
if (!(tmp->flags & SD_LOAD_BALANCE))
break;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 154fd689fe02..a6d9719e012e 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6145,62 +6145,68 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu)
struct sd_data *sdd = sd->private;
struct sched_domain *sibling;
int i;
+ int tries;
cpumask_clear(covered);
- for_each_cpu(i, span) {
- struct cpumask *sg_span;
-
- if (cpumask_test_cpu(i, covered))
- continue;
+ for(tries = 0; tries < 2; tries++) {
+ for_each_cpu(i, span) {
+ struct cpumask *sg_span;
+ if(tries == 0 && i != cpu)
+ continue;
- sibling = *per_cpu_ptr(sdd->sd, i);
+ if (cpumask_test_cpu(i, covered))
+ continue;
- /* See the comment near build_group_mask(). */
- if (!cpumask_test_cpu(i, sched_domain_span(sibling)))
- continue;
+ sibling = *per_cpu_ptr(sdd->sd, i);
- sg = kzalloc_node(sizeof(struct sched_group) + cpumask_size(),
- GFP_KERNEL, cpu_to_node(cpu));
+ /* See the comment near build_group_mask(). */
+ if (!cpumask_test_cpu(i, sched_domain_span(sibling)))
+ continue;
- if (!sg)
- goto fail;
+ sg = kzalloc_node(sizeof(struct sched_group) + cpumask_size(),
+ GFP_KERNEL, cpu_to_node(cpu));
- sg_span = sched_group_cpus(sg);
- if (sibling->child)
- cpumask_copy(sg_span, sched_domain_span(sibling->child));
- else
- cpumask_set_cpu(i, sg_span);
+ if (!sg)
+ goto fail;
- cpumask_or(covered, covered, sg_span);
+ sg_span = sched_group_cpus(sg);
+ if (sibling->child)
+ cpumask_copy(sg_span, sched_domain_span(sibling->child));
+ else
+ cpumask_set_cpu(i, sg_span);
- sg->sgc = *per_cpu_ptr(sdd->sgc, i);
- if (atomic_inc_return(&sg->sgc->ref) == 1)
- build_group_mask(sd, sg);
+ cpumask_or(covered, covered, sg_span);
- /*
- * Initialize sgc->capacity such that even if we mess up the
- * domains and no possible iteration will get us here, we won't
- * die on a /0 trap.
- */
- sg->sgc->capacity = SCHED_CAPACITY_SCALE * cpumask_weight(sg_span);
+ sg->sgc = *per_cpu_ptr(sdd->sgc, i);
+ if (atomic_inc_return(&sg->sgc->ref) == 1)
+ build_group_mask(sd, sg);
- /*
- * Make sure the first group of this domain contains the
- * canonical balance cpu. Otherwise the sched_domain iteration
- * breaks. See update_sg_lb_stats().
- */
- if ((!groups && cpumask_test_cpu(cpu, sg_span)) ||
- group_balance_cpu(sg) == cpu)
- groups = sg;
+ /*
+ * Initialize sgc->capacity such that even if we mess up the
+ * domains and no possible iteration will get us here, we won't
+ * die on a /0 trap.
+ */
+ sg->sgc->capacity = SCHED_CAPACITY_SCALE * cpumask_weight(sg_span);
- if (!first)
- first = sg;
- if (last)
- last->next = sg;
- last = sg;
- last->next = first;
+ /*
+ * Make sure the first group of this domain contains the
+ * canonical balance cpu. Otherwise the sched_domain iteration
+ * breaks. See update_sg_lb_stats().
+ */
+ if ((!groups && cpumask_test_cpu(cpu, sg_span)) ||
+ group_balance_cpu(sg) == cpu)
+ groups = sg;
+
+ if (!first)
+ first = sg;
+ if (last)
+ last->next = sg;
+ last = sg;
+ last->next = first;
+ }
}
+
sd->groups = groups;
return 0;
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index c242944f5cbd..9b9c9823a103 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -7640,36 +7640,7 @@ static int active_load_balance_cpu_stop(void *data);
static int should_we_balance(struct lb_env *env)
{
- struct sched_group *sg = env->sd->groups;
- struct cpumask *sg_cpus, *sg_mask;
- int cpu, balance_cpu = -1;
-
- /*
- * In the newly idle case, we will allow all the cpu's
- * to do the newly idle load balance.
- */
- if (env->idle == CPU_NEWLY_IDLE)
- return 1;
-
- sg_cpus = sched_group_cpus(sg);
- sg_mask = sched_group_mask(sg);
- /* Try to find first idle cpu */
- for_each_cpu_and(cpu, sg_cpus, env->cpus) {
- if (!cpumask_test_cpu(cpu, sg_mask) || !idle_cpu(cpu))
- continue;
-
- balance_cpu = cpu;
- break;
- }
-
- if (balance_cpu == -1)
- balance_cpu = group_balance_cpu(sg);
-
- /*
- * First idle cpu or the first cpu(busiest) in this sched group
- * is eligible for doing load balancing at this and above domains.
- */
- return balance_cpu == env->dst_cpu;
+ return 1;
}
/*
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment