diff --git a/pkgs/os-specific/linux/kernel/cpu-cgroup-v2-patches/4.4.patch b/pkgs/os-specific/linux/kernel/cpu-cgroup-v2-patches/4.4.patch new file mode 100644 index 000000000000..8f2418c9efce --- /dev/null +++ b/pkgs/os-specific/linux/kernel/cpu-cgroup-v2-patches/4.4.patch @@ -0,0 +1,407 @@ +commit e7cae741f6d645ac68fe8823ca6ef45dbbf6891b +Author: Tejun Heo +Date: Fri Mar 11 07:31:23 2016 -0500 + + sched: Misc preps for cgroup unified hierarchy interface + + Make the following changes in preparation for the cpu controller + interface implementation for the unified hierarchy. This patch + doesn't cause any functional differences. + + * s/cpu_stats_show()/cpu_cfs_stats_show()/ + + * s/cpu_files/cpu_legacy_files/ + + * Separate out cpuacct_stats_read() from cpuacct_stats_show(). While + at it, remove pointless cpuacct_stat_desc[] array. + + Signed-off-by: Tejun Heo + Cc: Ingo Molnar + Cc: Peter Zijlstra + Cc: Li Zefan + Cc: Johannes Weiner + +diff --git a/kernel/sched/core.c b/kernel/sched/core.c +index 732e993..77f3ddd 100644 +--- a/kernel/sched/core.c ++++ b/kernel/sched/core.c +@@ -8512,7 +8512,7 @@ static int __cfs_schedulable(struct task_group *tg, u64 period, u64 quota) + return ret; + } + +-static int cpu_stats_show(struct seq_file *sf, void *v) ++static int cpu_cfs_stats_show(struct seq_file *sf, void *v) + { + struct task_group *tg = css_tg(seq_css(sf)); + struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth; +@@ -8552,7 +8552,7 @@ static u64 cpu_rt_period_read_uint(struct cgroup_subsys_state *css, + } + #endif /* CONFIG_RT_GROUP_SCHED */ + +-static struct cftype cpu_files[] = { ++static struct cftype cpu_legacy_files[] = { + #ifdef CONFIG_FAIR_GROUP_SCHED + { + .name = "shares", +@@ -8573,7 +8573,7 @@ static struct cftype cpu_files[] = { + }, + { + .name = "stat", +- .seq_show = cpu_stats_show, ++ .seq_show = cpu_cfs_stats_show, + }, + #endif + #ifdef CONFIG_RT_GROUP_SCHED +@@ -8599,7 +8599,7 @@ struct cgroup_subsys cpu_cgrp_subsys = { + .fork = cpu_cgroup_fork, + .can_attach = cpu_cgroup_can_attach, + .attach = cpu_cgroup_attach, +- .legacy_cftypes = cpu_files, ++ .legacy_cftypes = cpu_legacy_files, + .early_init = 1, + }; + +diff --git a/kernel/sched/cpuacct.c b/kernel/sched/cpuacct.c +index dd7cbb5..42b2dd5 100644 +--- a/kernel/sched/cpuacct.c ++++ b/kernel/sched/cpuacct.c +@@ -177,36 +177,33 @@ static int cpuacct_percpu_seq_show(struct seq_file *m, void *V) + return 0; + } + +-static const char * const cpuacct_stat_desc[] = { +- [CPUACCT_STAT_USER] = "user", +- [CPUACCT_STAT_SYSTEM] = "system", +-}; +- +-static int cpuacct_stats_show(struct seq_file *sf, void *v) ++static void cpuacct_stats_read(struct cpuacct *ca, u64 *userp, u64 *sysp) + { +- struct cpuacct *ca = css_ca(seq_css(sf)); + int cpu; +- s64 val = 0; + ++ *userp = 0; + for_each_online_cpu(cpu) { + struct kernel_cpustat *kcpustat = per_cpu_ptr(ca->cpustat, cpu); +- val += kcpustat->cpustat[CPUTIME_USER]; +- val += kcpustat->cpustat[CPUTIME_NICE]; ++ *userp += kcpustat->cpustat[CPUTIME_USER]; ++ *userp += kcpustat->cpustat[CPUTIME_NICE]; + } +- val = cputime64_to_clock_t(val); +- seq_printf(sf, "%s %lld\n", cpuacct_stat_desc[CPUACCT_STAT_USER], val); + +- val = 0; ++ *sysp = 0; + for_each_online_cpu(cpu) { + struct kernel_cpustat *kcpustat = per_cpu_ptr(ca->cpustat, cpu); +- val += kcpustat->cpustat[CPUTIME_SYSTEM]; +- val += kcpustat->cpustat[CPUTIME_IRQ]; +- val += kcpustat->cpustat[CPUTIME_SOFTIRQ]; ++ *sysp += kcpustat->cpustat[CPUTIME_SYSTEM]; ++ *sysp += kcpustat->cpustat[CPUTIME_IRQ]; ++ *sysp += kcpustat->cpustat[CPUTIME_SOFTIRQ]; + } ++} + +- val = cputime64_to_clock_t(val); +- seq_printf(sf, "%s %lld\n", cpuacct_stat_desc[CPUACCT_STAT_SYSTEM], val); ++static int cpuacct_stats_show(struct seq_file *sf, void *v) ++{ ++ cputime64_t user, sys; + ++ cpuacct_stats_read(css_ca(seq_css(sf)), &user, &sys); ++ seq_printf(sf, "user %lld\n", cputime64_to_clock_t(user)); ++ seq_printf(sf, "system %lld\n", cputime64_to_clock_t(sys)); + return 0; + } + + +commit 1bb33e8a69f089f2d3f58a0e681d4ff352e11c97 +Author: Tejun Heo +Date: Fri Mar 11 07:31:23 2016 -0500 + + sched: Implement interface for cgroup unified hierarchy + + While the cpu controller doesn't have any functional problems, there + are a couple interface issues which can be addressed in the v2 + interface. + + * cpuacct being a separate controller. This separation is artificial + and rather pointless as demonstrated by most use cases co-mounting + the two controllers. It also forces certain information to be + accounted twice. + + * Use of different time units. Writable control knobs use + microseconds, some stat fields use nanoseconds while other cpuacct + stat fields use centiseconds. + + * Control knobs which can't be used in the root cgroup still show up + in the root. + + * Control knob names and semantics aren't consistent with other + controllers. + + This patchset implements cpu controller's interface on the unified + hierarchy which adheres to the controller file conventions described + in Documentation/cgroups/unified-hierarchy.txt. Overall, the + following changes are made. + + * cpuacct is implictly enabled and disabled by cpu and its information + is reported through "cpu.stat" which now uses microseconds for all + time durations. All time duration fields now have "_usec" appended + to them for clarity. While this doesn't solve the double accounting + immediately, once majority of users switch to v2, cpu can directly + account and report the relevant stats and cpuacct can be disabled on + the unified hierarchy. + + Note that cpuacct.usage_percpu is currently not included in + "cpu.stat". If this information is actually called for, it can be + added later. + + * "cpu.shares" is replaced with "cpu.weight" and operates on the + standard scale defined by CGROUP_WEIGHT_MIN/DFL/MAX (1, 100, 10000). + The weight is scaled to scheduler weight so that 100 maps to 1024 + and the ratio relationship is preserved - if weight is W and its + scaled value is S, W / 100 == S / 1024. While the mapped range is a + bit smaller than the orignal scheduler weight range, the dead zones + on both sides are relatively small and covers wider range than the + nice value mappings. This file doesn't make sense in the root + cgroup and isn't create on root. + + * "cpu.cfs_quota_us" and "cpu.cfs_period_us" are replaced by "cpu.max" + which contains both quota and period. + + * "cpu.rt_runtime_us" and "cpu.rt_period_us" are replaced by + "cpu.rt.max" which contains both runtime and period. + + v2: cpu_stats_show() was incorrectly using CONFIG_FAIR_GROUP_SCHED for + CFS bandwidth stats and also using raw division for u64. Use + CONFIG_CFS_BANDWITH and do_div() instead. + + The semantics of "cpu.rt.max" is not fully decided yet. Dropped + for now. + + Signed-off-by: Tejun Heo + Cc: Ingo Molnar + Cc: Peter Zijlstra + Cc: Li Zefan + Cc: Johannes Weiner + +diff --git a/kernel/sched/core.c b/kernel/sched/core.c +index 77f3ddd..7aafe63 100644 +--- a/kernel/sched/core.c ++++ b/kernel/sched/core.c +@@ -8591,6 +8591,139 @@ static struct cftype cpu_legacy_files[] = { + { } /* terminate */ + }; + ++static int cpu_stats_show(struct seq_file *sf, void *v) ++{ ++ cpuacct_cpu_stats_show(sf); ++ ++#ifdef CONFIG_CFS_BANDWIDTH ++ { ++ struct task_group *tg = css_tg(seq_css(sf)); ++ struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth; ++ u64 throttled_usec; ++ ++ throttled_usec = cfs_b->throttled_time; ++ do_div(throttled_usec, NSEC_PER_USEC); ++ ++ seq_printf(sf, "nr_periods %d\n" ++ "nr_throttled %d\n" ++ "throttled_usec %llu\n", ++ cfs_b->nr_periods, cfs_b->nr_throttled, ++ throttled_usec); ++ } ++#endif ++ return 0; ++} ++ ++#ifdef CONFIG_FAIR_GROUP_SCHED ++static u64 cpu_weight_read_u64(struct cgroup_subsys_state *css, ++ struct cftype *cft) ++{ ++ struct task_group *tg = css_tg(css); ++ u64 weight = scale_load_down(tg->shares); ++ ++ return DIV_ROUND_CLOSEST_ULL(weight * CGROUP_WEIGHT_DFL, 1024); ++} ++ ++static int cpu_weight_write_u64(struct cgroup_subsys_state *css, ++ struct cftype *cftype, u64 weight) ++{ ++ /* ++ * cgroup weight knobs should use the common MIN, DFL and MAX ++ * values which are 1, 100 and 10000 respectively. While it loses ++ * a bit of range on both ends, it maps pretty well onto the shares ++ * value used by scheduler and the round-trip conversions preserve ++ * the original value over the entire range. ++ */ ++ if (weight < CGROUP_WEIGHT_MIN || weight > CGROUP_WEIGHT_MAX) ++ return -ERANGE; ++ ++ weight = DIV_ROUND_CLOSEST_ULL(weight * 1024, CGROUP_WEIGHT_DFL); ++ ++ return sched_group_set_shares(css_tg(css), scale_load(weight)); ++} ++#endif ++ ++static void __maybe_unused cpu_period_quota_print(struct seq_file *sf, ++ long period, long quota) ++{ ++ if (quota < 0) ++ seq_puts(sf, "max"); ++ else ++ seq_printf(sf, "%ld", quota); ++ ++ seq_printf(sf, " %ld\n", period); ++} ++ ++/* caller should put the current value in *@periodp before calling */ ++static int __maybe_unused cpu_period_quota_parse(char *buf, ++ u64 *periodp, u64 *quotap) ++{ ++ char tok[21]; /* U64_MAX */ ++ ++ if (!sscanf(buf, "%s %llu", tok, periodp)) ++ return -EINVAL; ++ ++ *periodp *= NSEC_PER_USEC; ++ ++ if (sscanf(tok, "%llu", quotap)) ++ *quotap *= NSEC_PER_USEC; ++ else if (!strcmp(tok, "max")) ++ *quotap = RUNTIME_INF; ++ else ++ return -EINVAL; ++ ++ return 0; ++} ++ ++#ifdef CONFIG_CFS_BANDWIDTH ++static int cpu_max_show(struct seq_file *sf, void *v) ++{ ++ struct task_group *tg = css_tg(seq_css(sf)); ++ ++ cpu_period_quota_print(sf, tg_get_cfs_period(tg), tg_get_cfs_quota(tg)); ++ return 0; ++} ++ ++static ssize_t cpu_max_write(struct kernfs_open_file *of, ++ char *buf, size_t nbytes, loff_t off) ++{ ++ struct task_group *tg = css_tg(of_css(of)); ++ u64 period = tg_get_cfs_period(tg); ++ u64 quota; ++ int ret; ++ ++ ret = cpu_period_quota_parse(buf, &period, "a); ++ if (!ret) ++ ret = tg_set_cfs_bandwidth(tg, period, quota); ++ return ret ?: nbytes; ++} ++#endif ++ ++static struct cftype cpu_files[] = { ++ { ++ .name = "stat", ++ .flags = CFTYPE_NOT_ON_ROOT, ++ .seq_show = cpu_stats_show, ++ }, ++#ifdef CONFIG_FAIR_GROUP_SCHED ++ { ++ .name = "weight", ++ .flags = CFTYPE_NOT_ON_ROOT, ++ .read_u64 = cpu_weight_read_u64, ++ .write_u64 = cpu_weight_write_u64, ++ }, ++#endif ++#ifdef CONFIG_CFS_BANDWIDTH ++ { ++ .name = "max", ++ .flags = CFTYPE_NOT_ON_ROOT, ++ .seq_show = cpu_max_show, ++ .write = cpu_max_write, ++ }, ++#endif ++ { } /* terminate */ ++}; ++ + struct cgroup_subsys cpu_cgrp_subsys = { + .css_alloc = cpu_cgroup_css_alloc, + .css_free = cpu_cgroup_css_free, +@@ -8600,7 +8733,15 @@ struct cgroup_subsys cpu_cgrp_subsys = { + .can_attach = cpu_cgroup_can_attach, + .attach = cpu_cgroup_attach, + .legacy_cftypes = cpu_legacy_files, ++ .dfl_cftypes = cpu_files, + .early_init = 1, ++#ifdef CONFIG_CGROUP_CPUACCT ++ /* ++ * cpuacct is enabled together with cpu on the unified hierarchy ++ * and its stats are reported through "cpu.stat". ++ */ ++ .depends_on = 1 << cpuacct_cgrp_id, ++#endif + }; + + #endif /* CONFIG_CGROUP_SCHED */ +diff --git a/kernel/sched/cpuacct.c b/kernel/sched/cpuacct.c +index 42b2dd5..b4d32a6 100644 +--- a/kernel/sched/cpuacct.c ++++ b/kernel/sched/cpuacct.c +@@ -224,6 +224,30 @@ static struct cftype files[] = { + { } /* terminate */ + }; + ++/* used to print cpuacct stats in cpu.stat on the unified hierarchy */ ++void cpuacct_cpu_stats_show(struct seq_file *sf) ++{ ++ struct cgroup_subsys_state *css; ++ u64 usage, user, sys; ++ ++ css = cgroup_get_e_css(seq_css(sf)->cgroup, &cpuacct_cgrp_subsys); ++ ++ usage = cpuusage_read(css, seq_cft(sf)); ++ cpuacct_stats_read(css_ca(css), &user, &sys); ++ ++ user *= TICK_NSEC; ++ sys *= TICK_NSEC; ++ do_div(usage, NSEC_PER_USEC); ++ do_div(user, NSEC_PER_USEC); ++ do_div(sys, NSEC_PER_USEC); ++ ++ seq_printf(sf, "usage_usec %llu\n" ++ "user_usec %llu\n" ++ "system_usec %llu\n", usage, user, sys); ++ ++ css_put(css); ++} ++ + /* + * charge this task's execution time to its accounting group. + * +diff --git a/kernel/sched/cpuacct.h b/kernel/sched/cpuacct.h +index ed60562..44eace9 100644 +--- a/kernel/sched/cpuacct.h ++++ b/kernel/sched/cpuacct.h +@@ -2,6 +2,7 @@ + + extern void cpuacct_charge(struct task_struct *tsk, u64 cputime); + extern void cpuacct_account_field(struct task_struct *p, int index, u64 val); ++extern void cpuacct_cpu_stats_show(struct seq_file *sf); + + #else + +@@ -14,4 +15,8 @@ cpuacct_account_field(struct task_struct *p, int index, u64 val) + { + } + ++static inline void cpuacct_cpu_stats_show(struct seq_file *sf) ++{ ++} ++ + #endif diff --git a/pkgs/top-level/all-packages.nix b/pkgs/top-level/all-packages.nix index 6c0f6726450b..04e35a02d81c 100644 --- a/pkgs/top-level/all-packages.nix +++ b/pkgs/top-level/all-packages.nix @@ -12440,6 +12440,7 @@ with pkgs; [ kernelPatches.bridge_stp_helper kernelPatches.p9_fixes kernelPatches.cpu-cgroup-v2."4.4" + kernelPatches.modinst_arg_list_too_long ] ++ lib.optionals ((platform.kernelArch or null) == "mips") [ kernelPatches.mips_fpureg_emu