Skip to content

Commit

Permalink
Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/l…
Browse files Browse the repository at this point in the history
…inux/kernel/git/tip/tip

Pull scheduler updates from Ingo Molnar:
 "The main changes are:

   - Migrate CPU-intense 'misfit' tasks on asymmetric capacity systems,
     to better utilize (much) faster 'big core' CPUs. (Morten Rasmussen,
     Valentin Schneider)

   - Topology handling improvements, in particular when CPU capacity
     changes and related load-balancing fixes/improvements (Morten
     Rasmussen)

   - ... plus misc other improvements, fixes and updates"

* 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (28 commits)
  sched/completions/Documentation: Add recommendation for dynamic and ONSTACK completions
  sched/completions/Documentation: Clean up the document some more
  sched/completions/Documentation: Fix a couple of punctuation nits
  cpu/SMT: State SMT is disabled even with nosmt and without "=force"
  sched/core: Fix comment regarding nr_iowait_cpu() and get_iowait_load()
  sched/fair: Remove setting task's se->runnable_weight during PELT update
  sched/fair: Disable LB_BIAS by default
  sched/pelt: Fix warning and clean up IRQ PELT config
  sched/topology: Make local variables static
  sched/debug: Use symbolic names for task state constants
  sched/numa: Remove unused numa_stats::nr_running field
  sched/numa: Remove unused code from update_numa_stats()
  sched/debug: Explicitly cast sched_feat() to bool
  sched/core: Disable SD_PREFER_SIBLING on asymmetric CPU capacity domains
  sched/fair: Don't move tasks to lower capacity CPUs unless necessary
  sched/fair: Set rq->rd->overload when misfit
  sched/fair: Wrap rq->rd->overload accesses with READ/WRITE_ONCE()
  sched/core: Change root_domain->overload type to int
  sched/fair: Change 'prefer_sibling' type to bool
  sched/fair: Kick nohz balance if rq->misfit_task_load
  ...
  • Loading branch information
torvalds committed Oct 23, 2018
2 parents 0d1b82c + 11e1369 commit 42f52e1
Show file tree
Hide file tree
Showing 16 changed files with 463 additions and 199 deletions.
261 changes: 152 additions & 109 deletions Documentation/scheduler/completion.txt

Large diffs are not rendered by default.

3 changes: 3 additions & 0 deletions arch/arm/include/asm/topology.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@ const struct cpumask *cpu_coregroup_mask(int cpu);
/* Replace task scheduler's default cpu-invariant accounting */
#define arch_scale_cpu_capacity topology_get_cpu_scale

/* Enable topology flag updates */
#define arch_update_cpu_topology topology_update_cpu_topology

#else

static inline void init_cpu_topology(void) { }
Expand Down
3 changes: 3 additions & 0 deletions arch/arm64/include/asm/topology.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,9 @@ int pcibus_to_node(struct pci_bus *bus);
/* Replace task scheduler's default cpu-invariant accounting */
#define arch_scale_cpu_capacity topology_get_cpu_scale

/* Enable topology flag updates */
#define arch_update_cpu_topology topology_update_cpu_topology

#include <asm-generic/topology.h>

#endif /* _ASM_ARM_TOPOLOGY_H */
26 changes: 26 additions & 0 deletions drivers/base/arch_topology.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include <linux/slab.h>
#include <linux/string.h>
#include <linux/sched/topology.h>
#include <linux/cpuset.h>

DEFINE_PER_CPU(unsigned long, freq_scale) = SCHED_CAPACITY_SCALE;

Expand Down Expand Up @@ -47,6 +48,9 @@ static ssize_t cpu_capacity_show(struct device *dev,
return sprintf(buf, "%lu\n", topology_get_cpu_scale(NULL, cpu->dev.id));
}

static void update_topology_flags_workfn(struct work_struct *work);
static DECLARE_WORK(update_topology_flags_work, update_topology_flags_workfn);

static ssize_t cpu_capacity_store(struct device *dev,
struct device_attribute *attr,
const char *buf,
Expand All @@ -72,6 +76,8 @@ static ssize_t cpu_capacity_store(struct device *dev,
topology_set_cpu_scale(i, new_capacity);
mutex_unlock(&cpu_scale_mutex);

schedule_work(&update_topology_flags_work);

return count;
}

Expand All @@ -96,6 +102,25 @@ static int register_cpu_capacity_sysctl(void)
}
subsys_initcall(register_cpu_capacity_sysctl);

static int update_topology;

int topology_update_cpu_topology(void)
{
return update_topology;
}

/*
* Updating the sched_domains can't be done directly from cpufreq callbacks
* due to locking, so queue the work for later.
*/
static void update_topology_flags_workfn(struct work_struct *work)
{
update_topology = 1;
rebuild_sched_domains();
pr_debug("sched_domain hierarchy rebuilt, flags updated\n");
update_topology = 0;
}

static u32 capacity_scale;
static u32 *raw_capacity;

Expand Down Expand Up @@ -201,6 +226,7 @@ init_cpu_capacity_callback(struct notifier_block *nb,

if (cpumask_empty(cpus_to_visit)) {
topology_normalize_cpu_scale();
schedule_work(&update_topology_flags_work);
free_raw_capacity();
pr_debug("cpu_capacity: parsing done\n");
schedule_work(&parsing_done_work);
Expand Down
1 change: 1 addition & 0 deletions include/linux/arch_topology.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include <linux/percpu.h>

void topology_normalize_cpu_scale(void);
int topology_update_cpu_topology(void);

struct device_node;
bool topology_parse_cpu_capacity(struct device_node *cpu_node, int cpu);
Expand Down
6 changes: 3 additions & 3 deletions include/linux/sched/topology.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,10 @@
#define SD_BALANCE_FORK 0x0008 /* Balance on fork, clone */
#define SD_BALANCE_WAKE 0x0010 /* Balance on wakeup */
#define SD_WAKE_AFFINE 0x0020 /* Wake task to waking CPU */
#define SD_ASYM_CPUCAPACITY 0x0040 /* Groups have different max cpu capacities */
#define SD_SHARE_CPUCAPACITY 0x0080 /* Domain members share cpu capacity */
#define SD_ASYM_CPUCAPACITY 0x0040 /* Domain members have different CPU capacities */
#define SD_SHARE_CPUCAPACITY 0x0080 /* Domain members share CPU capacity */
#define SD_SHARE_POWERDOMAIN 0x0100 /* Domain members share power domain */
#define SD_SHARE_PKG_RESOURCES 0x0200 /* Domain members share cpu pkg resources */
#define SD_SHARE_PKG_RESOURCES 0x0200 /* Domain members share CPU pkg resources */
#define SD_SERIALIZE 0x0400 /* Only a single load balancing instance */
#define SD_ASYM_PACKING 0x0800 /* Place busy groups earlier in the domain */
#define SD_PREFER_SIBLING 0x1000 /* Prefer to place tasks in a sibling domain */
Expand Down
11 changes: 8 additions & 3 deletions include/trace/events/sched.h
Original file line number Diff line number Diff line change
Expand Up @@ -159,9 +159,14 @@ TRACE_EVENT(sched_switch,

(__entry->prev_state & (TASK_REPORT_MAX - 1)) ?
__print_flags(__entry->prev_state & (TASK_REPORT_MAX - 1), "|",
{ 0x01, "S" }, { 0x02, "D" }, { 0x04, "T" },
{ 0x08, "t" }, { 0x10, "X" }, { 0x20, "Z" },
{ 0x40, "P" }, { 0x80, "I" }) :
{ TASK_INTERRUPTIBLE, "S" },
{ TASK_UNINTERRUPTIBLE, "D" },
{ __TASK_STOPPED, "T" },
{ __TASK_TRACED, "t" },
{ EXIT_DEAD, "X" },
{ EXIT_ZOMBIE, "Z" },
{ TASK_PARKED, "P" },
{ TASK_DEAD, "I" }) :
"R",

__entry->prev_state & TASK_REPORT_MAX ? "+" : "",
Expand Down
5 changes: 5 additions & 0 deletions init/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -415,6 +415,11 @@ config IRQ_TIME_ACCOUNTING

If in doubt, say N here.

config HAVE_SCHED_AVG_IRQ
def_bool y
depends on IRQ_TIME_ACCOUNTING || PARAVIRT_TIME_ACCOUNTING
depends on SMP

config BSD_PROCESS_ACCT
bool "BSD Process Accounting"
depends on MULTIUSER
Expand Down
1 change: 1 addition & 0 deletions kernel/cpu.c
Original file line number Diff line number Diff line change
Expand Up @@ -383,6 +383,7 @@ void __init cpu_smt_disable(bool force)
pr_info("SMT: Force disabled\n");
cpu_smt_control = CPU_SMT_FORCE_DISABLED;
} else {
pr_info("SMT: disabled\n");
cpu_smt_control = CPU_SMT_DISABLED;
}
}
Expand Down
17 changes: 9 additions & 8 deletions kernel/sched/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -135,9 +135,8 @@ static void update_rq_clock_task(struct rq *rq, s64 delta)
* In theory, the compile should just see 0 here, and optimize out the call
* to sched_rt_avg_update. But I don't trust it...
*/
#if defined(CONFIG_IRQ_TIME_ACCOUNTING) || defined(CONFIG_PARAVIRT_TIME_ACCOUNTING)
s64 steal = 0, irq_delta = 0;
#endif
s64 __maybe_unused steal = 0, irq_delta = 0;

#ifdef CONFIG_IRQ_TIME_ACCOUNTING
irq_delta = irq_time_read(cpu_of(rq)) - rq->prev_irq_time;

Expand Down Expand Up @@ -177,7 +176,7 @@ static void update_rq_clock_task(struct rq *rq, s64 delta)

rq->clock_task += delta;

#ifdef HAVE_SCHED_AVG_IRQ
#ifdef CONFIG_HAVE_SCHED_AVG_IRQ
if ((irq_delta + steal) && sched_feat(NONTASK_CAPACITY))
update_irq_load_avg(rq, irq_delta + steal);
#endif
Expand Down Expand Up @@ -701,6 +700,7 @@ static void set_load_weight(struct task_struct *p, bool update_load)
if (idle_policy(p->policy)) {
load->weight = scale_load(WEIGHT_IDLEPRIO);
load->inv_weight = WMULT_IDLEPRIO;
p->se.runnable_weight = load->weight;
return;
}

Expand All @@ -713,6 +713,7 @@ static void set_load_weight(struct task_struct *p, bool update_load)
} else {
load->weight = scale_load(sched_prio_to_weight[prio]);
load->inv_weight = sched_prio_to_wmult[prio];
p->se.runnable_weight = load->weight;
}
}

Expand Down Expand Up @@ -2915,10 +2916,10 @@ unsigned long nr_iowait(void)
}

/*
* Consumers of these two interfaces, like for example the cpufreq menu
* governor are using nonsensical data. Boosting frequency for a CPU that has
* IO-wait which might not even end up running the task when it does become
* runnable.
* Consumers of these two interfaces, like for example the cpuidle menu
* governor, are using nonsensical data. Preferring shallow idle state selection
* for a CPU that has IO-wait which might not even end up running the task when
* it does become runnable.
*/

unsigned long nr_iowait_cpu(int cpu)
Expand Down
Loading

0 comments on commit 42f52e1

Please sign in to comment.