diff --git a/pkgs/development/compilers/gcc/9/avoid-cycling-subreg-reloads.patch b/pkgs/development/compilers/gcc/9/avoid-cycling-subreg-reloads.patch new file mode 100644 index 000000000000..17a4e0a2447b --- /dev/null +++ b/pkgs/development/compilers/gcc/9/avoid-cycling-subreg-reloads.patch @@ -0,0 +1,261 @@ +From 6001db79c477b03eacc7e7049560921fb54b7845 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford +Date: Mon, 7 Sep 2020 20:15:36 +0100 +Subject: [PATCH] lra: Avoid cycling on certain subreg reloads [PR96796] + +This PR is about LRA cycling for a reload of the form: + +---------------------------------------------------------------------------- +Changing pseudo 196 in operand 1 of insn 103 on equiv [r105:DI*0x8+r140:DI] + Creating newreg=287, assigning class ALL_REGS to slow/invalid mem r287 + Creating newreg=288, assigning class ALL_REGS to slow/invalid mem r288 + 103: r203:SI=r288:SI<<0x1+r196:DI#0 + REG_DEAD r196:DI + Inserting slow/invalid mem reload before: + 316: r287:DI=[r105:DI*0x8+r140:DI] + 317: r288:SI=r287:DI#0 +---------------------------------------------------------------------------- + +The problem is with r287. We rightly give it a broad starting class of +POINTER_AND_FP_REGS (reduced from ALL_REGS by preferred_reload_class). +However, we never make forward progress towards narrowing it down to +a specific choice of class (POINTER_REGS or FP_REGS). + +I think in practice we rely on two things to narrow a reload pseudo's +class down to a specific choice: + +(1) a restricted class is specified when the pseudo is created + + This happens for input address reloads, where the class is taken + from the target's chosen base register class. It also happens + for simple REG reloads, where the class is taken from the chosen + alternative's constraints. + +(2) uses of the reload pseudo as a direct input operand + + In this case get_reload_reg tries to reuse the existing register + and narrow its class, instead of creating a new reload pseudo. + +However, neither occurs here. As described above, r287 rightly +starts out with a wide choice of class, ultimately derived from +ALL_REGS, so we don't get (1). And as the comments in the PR +explain, r287 is never used as an input reload, only the subreg is, +so we don't get (2): + +---------------------------------------------------------------------------- + Choosing alt 13 in insn 317: (0) r (1) w {*movsi_aarch64} + Creating newreg=291, assigning class FP_REGS to r291 + 317: r288:SI=r291:SI + Inserting insn reload before: + 320: r291:SI=r287:DI#0 +---------------------------------------------------------------------------- + +IMO, in this case we should rely on the reload of r316 to narrow +down the class of r278. Currently we do: + +---------------------------------------------------------------------------- + Choosing alt 7 in insn 316: (0) r (1) m {*movdi_aarch64} + Creating newreg=289 from oldreg=287, assigning class GENERAL_REGS to r289 + 316: r289:DI=[r105:DI*0x8+r140:DI] + Inserting insn reload after: + 318: r287:DI=r289:DI +--------------------------------------------------- + +i.e. we create a new pseudo register r289 and give *that* pseudo +GENERAL_REGS instead. This is because get_reload_reg only narrows +down the existing class for OP_IN and OP_INOUT, not OP_OUT. + +But if we have a reload pseudo in a reload instruction and have chosen +a specific class for the reload pseudo, I think we should simply install +it for OP_OUT reloads too, if the class is a subset of the existing class. +We will need to pick such a register whatever happens (for r289 in the +example above). And as explained in the PR, doing this actually avoids +an unnecessary move via the FP registers too. + +The patch is quite aggressive in that it does this for all reload +pseudos in all reload instructions. I wondered about reusing the +condition for a reload move in in_class_p: + + INSN_UID (curr_insn) >= new_insn_uid_start + && curr_insn_set != NULL + && ((OBJECT_P (SET_SRC (curr_insn_set)) + && ! CONSTANT_P (SET_SRC (curr_insn_set))) + || (GET_CODE (SET_SRC (curr_insn_set)) == SUBREG + && OBJECT_P (SUBREG_REG (SET_SRC (curr_insn_set))) + && ! CONSTANT_P (SUBREG_REG (SET_SRC (curr_insn_set))))))) + +but I can't really justify that on first principles. I think we +should apply the rule consistently until we have a specific reason +for doing otherwise. + +gcc/ + PR rtl-optimization/96796 + * lra-constraints.c (in_class_p): Add a default-false + allow_all_reload_class_changes_p parameter. Do not treat + reload moves specially when the parameter is true. + (get_reload_reg): Try to narrow the class of an existing OP_OUT + reload if we're reloading a reload pseudo in a reload instruction. + +gcc/testsuite/ + PR rtl-optimization/96796 + * gcc.c-torture/compile/pr96796.c: New test. +--- + gcc/lra-constraints.c | 54 ++++++++++++++---- + gcc/testsuite/gcc.c-torture/compile/pr96796.c | 55 +++++++++++++++++++ + 2 files changed, 99 insertions(+), 10 deletions(-) + create mode 100644 gcc/testsuite/gcc.c-torture/compile/pr96796.c + +diff --git a/gcc/lra-constraints.c b/gcc/lra-constraints.c +index 580da9c3ed6..161b721efb1 100644 +--- a/gcc/lra-constraints.c ++++ b/gcc/lra-constraints.c +@@ -236,12 +236,17 @@ get_reg_class (int regno) + CL. Use elimination first if REG is a hard register. If REG is a + reload pseudo created by this constraints pass, assume that it will + be allocated a hard register from its allocno class, but allow that +- class to be narrowed to CL if it is currently a superset of CL. ++ class to be narrowed to CL if it is currently a superset of CL and ++ if either: ++ ++ - ALLOW_ALL_RELOAD_CLASS_CHANGES_P is true or ++ - the instruction we're processing is not a reload move. + + If NEW_CLASS is nonnull, set *NEW_CLASS to the new allocno class of + REGNO (reg), or NO_REGS if no change in its class was needed. */ + static bool +-in_class_p (rtx reg, enum reg_class cl, enum reg_class *new_class) ++in_class_p (rtx reg, enum reg_class cl, enum reg_class *new_class, ++ bool allow_all_reload_class_changes_p = false) + { + enum reg_class rclass, common_class; + machine_mode reg_mode; +@@ -266,7 +271,8 @@ in_class_p (rtx reg, enum reg_class cl, enum reg_class *new_class) + typically moves that have many alternatives, and restricting + reload pseudos for one alternative may lead to situations + where other reload pseudos are no longer allocatable. */ +- || (INSN_UID (curr_insn) >= new_insn_uid_start ++ || (!allow_all_reload_class_changes_p ++ && INSN_UID (curr_insn) >= new_insn_uid_start + && curr_insn_set != NULL + && ((OBJECT_P (SET_SRC (curr_insn_set)) + && ! CONSTANT_P (SET_SRC (curr_insn_set))) +@@ -598,13 +604,12 @@ canonicalize_reload_addr (rtx addr) + return addr; + } + +-/* Create a new pseudo using MODE, RCLASS, ORIGINAL or reuse already +- created input reload pseudo (only if TYPE is not OP_OUT). Don't +- reuse pseudo if IN_SUBREG_P is true and the reused pseudo should be +- wrapped up in SUBREG. The result pseudo is returned through +- RESULT_REG. Return TRUE if we created a new pseudo, FALSE if we +- reused the already created input reload pseudo. Use TITLE to +- describe new registers for debug purposes. */ ++/* Create a new pseudo using MODE, RCLASS, ORIGINAL or reuse an existing ++ reload pseudo. Don't reuse an existing reload pseudo if IN_SUBREG_P ++ is true and the reused pseudo should be wrapped up in a SUBREG. ++ The result pseudo is returned through RESULT_REG. Return TRUE if we ++ created a new pseudo, FALSE if we reused an existing reload pseudo. ++ Use TITLE to describe new registers for debug purposes. */ + static bool + get_reload_reg (enum op_type type, machine_mode mode, rtx original, + enum reg_class rclass, bool in_subreg_p, +@@ -616,6 +621,35 @@ get_reload_reg (enum op_type type, machine_mode mode, rtx original, + + if (type == OP_OUT) + { ++ /* Output reload registers tend to start out with a conservative ++ choice of register class. Usually this is ALL_REGS, although ++ a target might narrow it (for performance reasons) through ++ targetm.preferred_reload_class. It's therefore quite common ++ for a reload instruction to require a more restrictive class ++ than the class that was originally assigned to the reload register. ++ ++ In these situations, it's more efficient to refine the choice ++ of register class rather than create a second reload register. ++ This also helps to avoid cycling for registers that are only ++ used by reload instructions. */ ++ if (REG_P (original) ++ && (int) REGNO (original) >= new_regno_start ++ && INSN_UID (curr_insn) >= new_insn_uid_start ++ && in_class_p (original, rclass, &new_class, true)) ++ { ++ unsigned int regno = REGNO (original); ++ if (lra_dump_file != NULL) ++ { ++ fprintf (lra_dump_file, " Reuse r%d for output ", regno); ++ dump_value_slim (lra_dump_file, original, 1); ++ } ++ if (new_class != lra_get_allocno_class (regno)) ++ lra_change_class (regno, new_class, ", change to", false); ++ if (lra_dump_file != NULL) ++ fprintf (lra_dump_file, "\n"); ++ *result_reg = original; ++ return false; ++ } + *result_reg + = lra_create_new_reg_with_unique_value (mode, original, rclass, title); + return true; +diff --git a/gcc/testsuite/gcc.c-torture/compile/pr96796.c b/gcc/testsuite/gcc.c-torture/compile/pr96796.c +new file mode 100644 +index 00000000000..8808e62fe77 +--- /dev/null ++++ b/gcc/testsuite/gcc.c-torture/compile/pr96796.c +@@ -0,0 +1,55 @@ ++/* { dg-additional-options "-fcommon" } */ ++ ++struct S0 { ++ signed f0 : 8; ++ unsigned f1; ++ unsigned f4; ++}; ++struct S1 { ++ long f3; ++ char f4; ++} g_3_4; ++ ++int g_5, func_1_l_32, func_50___trans_tmp_31; ++static struct S0 g_144, g_834, g_1255, g_1261; ++ ++int g_273[120] = {}; ++int *g_555; ++char **g_979; ++static int g_1092_0; ++static int g_1193; ++int safe_mul_func_int16_t_s_s(int si1, int si2) { return si1 * si2; } ++static struct S0 *func_50(); ++int func_1() { func_50(g_3_4, g_5, func_1_l_32, 8, 3); } ++void safe_div_func_int64_t_s_s(int *); ++void safe_mod_func_uint32_t_u_u(struct S0); ++struct S0 *func_50(int p_51, struct S0 p_52, struct S1 p_53, int p_54, ++ int p_55) { ++ int __trans_tmp_30; ++ char __trans_tmp_22; ++ short __trans_tmp_19; ++ long l_985_1; ++ long l_1191[8]; ++ safe_div_func_int64_t_s_s(g_273); ++ __builtin_printf((char*)g_1261.f4); ++ safe_mod_func_uint32_t_u_u(g_834); ++ g_144.f0 += 1; ++ for (;;) { ++ struct S1 l_1350 = {&l_1350}; ++ for (; p_53.f3; p_53.f3 -= 1) ++ for (; g_1193 <= 2; g_1193 += 1) { ++ __trans_tmp_19 = safe_mul_func_int16_t_s_s(l_1191[l_985_1 + p_53.f3], ++ p_55 % (**g_979 = 10)); ++ __trans_tmp_22 = g_1255.f1 * p_53.f4; ++ __trans_tmp_30 = __trans_tmp_19 + __trans_tmp_22; ++ if (__trans_tmp_30) ++ g_1261.f0 = p_51; ++ else { ++ g_1255.f0 = p_53.f3; ++ int *l_1422 = g_834.f0 = g_144.f4 != (*l_1422)++ > 0 < 0 ^ 51; ++ g_555 = ~0; ++ g_1092_0 |= func_50___trans_tmp_31; ++ } ++ } ++ } ++} +-- +2.18.4 + diff --git a/pkgs/development/compilers/gcc/9/default.nix b/pkgs/development/compilers/gcc/9/default.nix index 305ed56df78c..c64a9dcc3f57 100644 --- a/pkgs/development/compilers/gcc/9/default.nix +++ b/pkgs/development/compilers/gcc/9/default.nix @@ -58,7 +58,13 @@ let majorVersion = "9"; inherit (stdenv) buildPlatform hostPlatform targetPlatform; patches = - optional (targetPlatform != hostPlatform) ../libstdc++-target.patch + # Fix ICE: Max. number of generated reload insns per insn is achieved (90) + # + # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=96796 + # + # This patch can most likely be removed by a post 9.3.0-release. + [ ./avoid-cycling-subreg-reloads.patch ] + ++ optional (targetPlatform != hostPlatform) ../libstdc++-target.patch ++ optional noSysDirs ../no-sys-dirs.patch /* ++ optional (hostPlatform != buildPlatform) (fetchpatch { # XXX: Refine when this should be applied url = "https://git.busybox.net/buildroot/plain/package/gcc/${version}/0900-remove-selftests.patch?id=11271540bfe6adafbc133caf6b5b902a816f5f02";