logo

oasis

Own branch of Oasis Linux (upstream: <https://git.sr.ht/~mcf/oasis/>) git clone https://anongit.hacktivis.me/git/oasis.git
commit: b0461b1f32266265262e7031b7930616fe8ac8c3
parent 3603c6e1c0c3a5e641752bada3073d0face5390e
Author: Michael Forney <mforney@mforney.org>
Date:   Tue,  7 Sep 2021 00:02:51 -0700

qbe: Update to latest git

Diffstat:

Mpkg/qbe/patch/0002-arm64-Handle-slots-in-Ocopy-operands.patch8++++----
Rpkg/qbe/patch/0005-Increase-NString-to-96.patch -> pkg/qbe/patch/0004-Increase-NString-to-96.patch0
Dpkg/qbe/patch/0004-amd64-Fix-floating-point-equality-check-with-NaN.patch73-------------------------------------------------------------------------
Rpkg/qbe/patch/0006-fold-Don-t-fold-invalid-addition-subtraction-rather-.patch -> pkg/qbe/patch/0005-fold-Don-t-fold-invalid-addition-subtraction-rather-.patch0
Apkg/qbe/patch/0006-amd64-optimize-loading-0-into-floating-point-registe.patch76++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Apkg/qbe/patch/0007-amd64-optimize-zeroing-of-integer-registers-as-well.patch34++++++++++++++++++++++++++++++++++
Mpkg/qbe/ver2+-
7 files changed, 115 insertions(+), 78 deletions(-)

diff --git a/pkg/qbe/patch/0002-arm64-Handle-slots-in-Ocopy-operands.patch b/pkg/qbe/patch/0002-arm64-Handle-slots-in-Ocopy-operands.patch @@ -1,4 +1,4 @@ -From 86ce810ec5463f41d001b543288bd43dda79eebd Mon Sep 17 00:00:00 2001 +From d9d890583d93f1bfdc38e4aa890350d4111b848a Mon Sep 17 00:00:00 2001 From: Michael Forney <mforney@mforney.org> Date: Thu, 9 May 2019 23:32:15 -0700 Subject: [PATCH] arm64: Handle slots in Ocopy operands @@ -8,7 +8,7 @@ Subject: [PATCH] arm64: Handle slots in Ocopy operands 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/arm64/emit.c b/arm64/emit.c -index e7effef..adae233 100644 +index 9ebcfcd..5a3fe55 100644 --- a/arm64/emit.c +++ b/arm64/emit.c @@ -218,8 +218,8 @@ emitf(char *s, Ins *i, E *e) @@ -22,7 +22,7 @@ index e7effef..adae233 100644 switch (rtype(r)) { default: die("TODO emit non reg addresses"); -@@ -305,9 +305,26 @@ emitins(Ins *i, E *e) +@@ -307,9 +307,26 @@ emitins(Ins *i, E *e) case Ocopy: if (req(i->to, i->arg[0])) break; @@ -52,5 +52,5 @@ index e7effef..adae233 100644 case Oaddr: assert(rtype(i->arg[0]) == RSlot); -- -2.21.0 +2.32.0 diff --git a/pkg/qbe/patch/0005-Increase-NString-to-96.patch b/pkg/qbe/patch/0004-Increase-NString-to-96.patch diff --git a/pkg/qbe/patch/0004-amd64-Fix-floating-point-equality-check-with-NaN.patch b/pkg/qbe/patch/0004-amd64-Fix-floating-point-equality-check-with-NaN.patch @@ -1,73 +0,0 @@ -From d4d5e44e1064cff4f6b3c25b174ec53d294c6e09 Mon Sep 17 00:00:00 2001 -From: Michael Forney <mforney@mforney.org> -Date: Fri, 24 May 2019 18:56:12 -0700 -Subject: [PATCH] amd64: Fix floating-point equality check with NaN - -arm64 does not have the same issue, and the added test passes there -as well. ---- - amd64/isel.c | 15 +++++++++++++++ - test/isel2.ssa | 25 +++++++++++++++++++++++++ - 2 files changed, 40 insertions(+) - create mode 100644 test/isel2.ssa - -diff --git a/amd64/isel.c b/amd64/isel.c -index 56e4cf3..9bf5c74 100644 ---- a/amd64/isel.c -+++ b/amd64/isel.c -@@ -327,6 +327,21 @@ Emit: - if (isload(i.op)) - goto case_Oload; - if (iscmp(i.op, &kc, &x)) { -+ /* ZF is set when operands are unordered, so we -+ * may have to check PF as well. -+ */ -+ switch (x) { -+ case NCmpI+Cfeq: -+ r0 = newtmp("isel", Kw, fn); -+ emit(Oand, Kw, i.to, i.to, r0); -+ emit(Oflagfo, k, r0, R, R); -+ break; -+ case NCmpI+Cfne: -+ r0 = newtmp("isel", Kw, fn); -+ emit(Oor, Kw, i.to, i.to, r0); -+ emit(Oflagfuo, k, r0, R, R); -+ break; -+ } - emit(Oflag+x, k, i.to, R, R); - i1 = curi; - if (selcmp(i.arg, kc, fn)) -diff --git a/test/isel2.ssa b/test/isel2.ssa -new file mode 100644 -index 0000000..d6e009c ---- /dev/null -+++ b/test/isel2.ssa -@@ -0,0 +1,25 @@ -+# tests that floating point equality works -+# on amd64, which requires additional -+# instructions to check that the operands -+# are ordered. -+ -+export function w $eq(s %x, s %y) { -+@start -+ %r =w ceqs %x, %y -+ ret %r -+} -+ -+export function w $ne(s %x, s %y) { -+@start -+ %r =w cnes %x, %y -+ ret %r -+} -+ -+# >>> driver -+# #include <math.h> -+# extern int eq(float, float); -+# extern int ne(float, float); -+# int main() { -+# return !(eq(NAN, NAN) == 0 && ne(NAN, NAN) == 1); -+# } -+# <<< --- -2.22.0 - diff --git a/pkg/qbe/patch/0006-fold-Don-t-fold-invalid-addition-subtraction-rather-.patch b/pkg/qbe/patch/0005-fold-Don-t-fold-invalid-addition-subtraction-rather-.patch diff --git a/pkg/qbe/patch/0006-amd64-optimize-loading-0-into-floating-point-registe.patch b/pkg/qbe/patch/0006-amd64-optimize-loading-0-into-floating-point-registe.patch @@ -0,0 +1,76 @@ +From 1e0c08a288a5f7993dd8565ace35f1ecfc614544 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?=C3=89rico=20Nogueira?= <erico.erc@gmail.com> +Date: Sun, 11 Jul 2021 19:19:12 -0300 +Subject: [PATCH] amd64: optimize loading +0 into floating point registers + +Loading +0 into a floating point register can be done using pxor or +xorps instructions. Per [1], we went with pxor because it can run on all +vector ALU ports, even if it's one byte longer. + +To implement it, we special case fixarg to emit Ocopy when the value is ++0, and emitins to treat Ocopy for floating point args specially. Since +0. == -0., we can't check if bits.d or bits.f are equal to 0. To avoid +requiring signbit(), we inspect bits.i directly; this assumes the bits +union is always zero-initialized. + +[1] https://stackoverflow.com/questions/39811577/does-using-mix-of-pxor-and-xorps-affect-performance/39828976 +--- + amd64/emit.c | 8 ++++++++ + amd64/isel.c | 12 +++++++----- + 2 files changed, 15 insertions(+), 5 deletions(-) + +diff --git a/amd64/emit.c b/amd64/emit.c +index 09b90d5..311b8c6 100644 +--- a/amd64/emit.c ++++ b/amd64/emit.c +@@ -443,6 +443,14 @@ emitins(Ins i, Fn *fn, FILE *f) + if (req(i.to, i.arg[0])) + break; + t0 = rtype(i.arg[0]); ++ if (isreg(i.to) ++ && KBASE(i.cls) == 1 ++ && t0 == RCon ++ && fn->con[i.arg[0].val].type == CBits) { ++ assert(fn->con[i.arg[0].val].bits.i == 0); ++ emitf("pxor %D=, %D=", &i, fn, f); ++ break; ++ } + if (i.cls == Kl + && t0 == RCon + && fn->con[i.arg[0].val].type == CBits) { +diff --git a/amd64/isel.c b/amd64/isel.c +index 607c176..1c902f5 100644 +--- a/amd64/isel.c ++++ b/amd64/isel.c +@@ -69,7 +69,7 @@ fixarg(Ref *r, int k, Ins *i, Fn *fn) + r1 = r0 = *r; + s = rslot(r0, fn); + op = i ? i->op : Ocopy; +- if (KBASE(k) == 1 && rtype(r0) == RCon) { ++ if (KBASE(k) == 1 && rtype(r0) == RCon && fn->con[r0.val].bits.i != 0) { + /* load floating points from memory + * slots, they can't be used as + * immediates +@@ -84,13 +84,15 @@ fixarg(Ref *r, int k, Ins *i, Fn *fn) + a.offset.label = intern(buf); + fn->mem[fn->nmem-1] = a; + } +- else if (op != Ocopy && k == Kl && noimm(r0, fn)) { ++ else if (op != Ocopy && ((k == Kl && noimm(r0, fn)) || (KBASE(k) == 1 && rtype(r0) == RCon))) { + /* load constants that do not fit in + * a 32bit signed integer into a +- * long temporary ++ * long temporary OR ++ * load positive zero into a floating ++ * point register + */ +- r1 = newtmp("isel", Kl, fn); +- emit(Ocopy, Kl, r1, r0, R); ++ r1 = newtmp("isel", k, fn); ++ emit(Ocopy, k, r1, r0, R); + } + else if (s != -1) { + /* load fast locals' addresses into +-- +2.32.0 + diff --git a/pkg/qbe/patch/0007-amd64-optimize-zeroing-of-integer-registers-as-well.patch b/pkg/qbe/patch/0007-amd64-optimize-zeroing-of-integer-registers-as-well.patch @@ -0,0 +1,34 @@ +From 1b61d04de8d62821eec915eec6bde6b9a0a2d1c9 Mon Sep 17 00:00:00 2001 +From: Michael Forney <mforney@mforney.org> +Date: Mon, 30 Aug 2021 13:40:48 -0700 +Subject: [PATCH] amd64: optimize zeroing of integer registers as well + +--- + amd64/emit.c | 10 ++++++---- + 1 file changed, 6 insertions(+), 4 deletions(-) + +diff --git a/amd64/emit.c b/amd64/emit.c +index 311b8c6..015b921 100644 +--- a/amd64/emit.c ++++ b/amd64/emit.c +@@ -444,11 +444,13 @@ emitins(Ins i, Fn *fn, FILE *f) + break; + t0 = rtype(i.arg[0]); + if (isreg(i.to) +- && KBASE(i.cls) == 1 + && t0 == RCon +- && fn->con[i.arg[0].val].type == CBits) { +- assert(fn->con[i.arg[0].val].bits.i == 0); +- emitf("pxor %D=, %D=", &i, fn, f); ++ && fn->con[i.arg[0].val].type == CBits ++ && fn->con[i.arg[0].val].bits.i == 0) { ++ if (KBASE(i.cls) == 0) ++ emitf("xor%k %=, %=", &i, fn, f); ++ else ++ emitf("pxor %D=, %D=", &i, fn, f); + break; + } + if (i.cls == Kl +-- +2.32.0 + diff --git a/pkg/qbe/ver b/pkg/qbe/ver @@ -1 +1 @@ -5e5e301e86 r0 +6a69210b0f r0