commit: b0461b1f32266265262e7031b7930616fe8ac8c3
parent 3603c6e1c0c3a5e641752bada3073d0face5390e
Author: Michael Forney <mforney@mforney.org>
Date: Tue, 7 Sep 2021 00:02:51 -0700
qbe: Update to latest git
Diffstat:
7 files changed, 115 insertions(+), 78 deletions(-)
diff --git a/pkg/qbe/patch/0002-arm64-Handle-slots-in-Ocopy-operands.patch b/pkg/qbe/patch/0002-arm64-Handle-slots-in-Ocopy-operands.patch
@@ -1,4 +1,4 @@
-From 86ce810ec5463f41d001b543288bd43dda79eebd Mon Sep 17 00:00:00 2001
+From d9d890583d93f1bfdc38e4aa890350d4111b848a Mon Sep 17 00:00:00 2001
From: Michael Forney <mforney@mforney.org>
Date: Thu, 9 May 2019 23:32:15 -0700
Subject: [PATCH] arm64: Handle slots in Ocopy operands
@@ -8,7 +8,7 @@ Subject: [PATCH] arm64: Handle slots in Ocopy operands
1 file changed, 21 insertions(+), 4 deletions(-)
diff --git a/arm64/emit.c b/arm64/emit.c
-index e7effef..adae233 100644
+index 9ebcfcd..5a3fe55 100644
--- a/arm64/emit.c
+++ b/arm64/emit.c
@@ -218,8 +218,8 @@ emitf(char *s, Ins *i, E *e)
@@ -22,7 +22,7 @@ index e7effef..adae233 100644
switch (rtype(r)) {
default:
die("TODO emit non reg addresses");
-@@ -305,9 +305,26 @@ emitins(Ins *i, E *e)
+@@ -307,9 +307,26 @@ emitins(Ins *i, E *e)
case Ocopy:
if (req(i->to, i->arg[0]))
break;
@@ -52,5 +52,5 @@ index e7effef..adae233 100644
case Oaddr:
assert(rtype(i->arg[0]) == RSlot);
--
-2.21.0
+2.32.0
diff --git a/pkg/qbe/patch/0005-Increase-NString-to-96.patch b/pkg/qbe/patch/0004-Increase-NString-to-96.patch
diff --git a/pkg/qbe/patch/0004-amd64-Fix-floating-point-equality-check-with-NaN.patch b/pkg/qbe/patch/0004-amd64-Fix-floating-point-equality-check-with-NaN.patch
@@ -1,73 +0,0 @@
-From d4d5e44e1064cff4f6b3c25b174ec53d294c6e09 Mon Sep 17 00:00:00 2001
-From: Michael Forney <mforney@mforney.org>
-Date: Fri, 24 May 2019 18:56:12 -0700
-Subject: [PATCH] amd64: Fix floating-point equality check with NaN
-
-arm64 does not have the same issue, and the added test passes there
-as well.
----
- amd64/isel.c | 15 +++++++++++++++
- test/isel2.ssa | 25 +++++++++++++++++++++++++
- 2 files changed, 40 insertions(+)
- create mode 100644 test/isel2.ssa
-
-diff --git a/amd64/isel.c b/amd64/isel.c
-index 56e4cf3..9bf5c74 100644
---- a/amd64/isel.c
-+++ b/amd64/isel.c
-@@ -327,6 +327,21 @@ Emit:
- if (isload(i.op))
- goto case_Oload;
- if (iscmp(i.op, &kc, &x)) {
-+ /* ZF is set when operands are unordered, so we
-+ * may have to check PF as well.
-+ */
-+ switch (x) {
-+ case NCmpI+Cfeq:
-+ r0 = newtmp("isel", Kw, fn);
-+ emit(Oand, Kw, i.to, i.to, r0);
-+ emit(Oflagfo, k, r0, R, R);
-+ break;
-+ case NCmpI+Cfne:
-+ r0 = newtmp("isel", Kw, fn);
-+ emit(Oor, Kw, i.to, i.to, r0);
-+ emit(Oflagfuo, k, r0, R, R);
-+ break;
-+ }
- emit(Oflag+x, k, i.to, R, R);
- i1 = curi;
- if (selcmp(i.arg, kc, fn))
-diff --git a/test/isel2.ssa b/test/isel2.ssa
-new file mode 100644
-index 0000000..d6e009c
---- /dev/null
-+++ b/test/isel2.ssa
-@@ -0,0 +1,25 @@
-+# tests that floating point equality works
-+# on amd64, which requires additional
-+# instructions to check that the operands
-+# are ordered.
-+
-+export function w $eq(s %x, s %y) {
-+@start
-+ %r =w ceqs %x, %y
-+ ret %r
-+}
-+
-+export function w $ne(s %x, s %y) {
-+@start
-+ %r =w cnes %x, %y
-+ ret %r
-+}
-+
-+# >>> driver
-+# #include <math.h>
-+# extern int eq(float, float);
-+# extern int ne(float, float);
-+# int main() {
-+# return !(eq(NAN, NAN) == 0 && ne(NAN, NAN) == 1);
-+# }
-+# <<<
---
-2.22.0
-
diff --git a/pkg/qbe/patch/0006-fold-Don-t-fold-invalid-addition-subtraction-rather-.patch b/pkg/qbe/patch/0005-fold-Don-t-fold-invalid-addition-subtraction-rather-.patch
diff --git a/pkg/qbe/patch/0006-amd64-optimize-loading-0-into-floating-point-registe.patch b/pkg/qbe/patch/0006-amd64-optimize-loading-0-into-floating-point-registe.patch
@@ -0,0 +1,76 @@
+From 1e0c08a288a5f7993dd8565ace35f1ecfc614544 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?=C3=89rico=20Nogueira?= <erico.erc@gmail.com>
+Date: Sun, 11 Jul 2021 19:19:12 -0300
+Subject: [PATCH] amd64: optimize loading +0 into floating point registers
+
+Loading +0 into a floating point register can be done using pxor or
+xorps instructions. Per [1], we went with pxor because it can run on all
+vector ALU ports, even if it's one byte longer.
+
+To implement it, we special case fixarg to emit Ocopy when the value is
++0, and emitins to treat Ocopy for floating point args specially. Since
+0. == -0., we can't check if bits.d or bits.f are equal to 0. To avoid
+requiring signbit(), we inspect bits.i directly; this assumes the bits
+union is always zero-initialized.
+
+[1] https://stackoverflow.com/questions/39811577/does-using-mix-of-pxor-and-xorps-affect-performance/39828976
+---
+ amd64/emit.c | 8 ++++++++
+ amd64/isel.c | 12 +++++++-----
+ 2 files changed, 15 insertions(+), 5 deletions(-)
+
+diff --git a/amd64/emit.c b/amd64/emit.c
+index 09b90d5..311b8c6 100644
+--- a/amd64/emit.c
++++ b/amd64/emit.c
+@@ -443,6 +443,14 @@ emitins(Ins i, Fn *fn, FILE *f)
+ if (req(i.to, i.arg[0]))
+ break;
+ t0 = rtype(i.arg[0]);
++ if (isreg(i.to)
++ && KBASE(i.cls) == 1
++ && t0 == RCon
++ && fn->con[i.arg[0].val].type == CBits) {
++ assert(fn->con[i.arg[0].val].bits.i == 0);
++ emitf("pxor %D=, %D=", &i, fn, f);
++ break;
++ }
+ if (i.cls == Kl
+ && t0 == RCon
+ && fn->con[i.arg[0].val].type == CBits) {
+diff --git a/amd64/isel.c b/amd64/isel.c
+index 607c176..1c902f5 100644
+--- a/amd64/isel.c
++++ b/amd64/isel.c
+@@ -69,7 +69,7 @@ fixarg(Ref *r, int k, Ins *i, Fn *fn)
+ r1 = r0 = *r;
+ s = rslot(r0, fn);
+ op = i ? i->op : Ocopy;
+- if (KBASE(k) == 1 && rtype(r0) == RCon) {
++ if (KBASE(k) == 1 && rtype(r0) == RCon && fn->con[r0.val].bits.i != 0) {
+ /* load floating points from memory
+ * slots, they can't be used as
+ * immediates
+@@ -84,13 +84,15 @@ fixarg(Ref *r, int k, Ins *i, Fn *fn)
+ a.offset.label = intern(buf);
+ fn->mem[fn->nmem-1] = a;
+ }
+- else if (op != Ocopy && k == Kl && noimm(r0, fn)) {
++ else if (op != Ocopy && ((k == Kl && noimm(r0, fn)) || (KBASE(k) == 1 && rtype(r0) == RCon))) {
+ /* load constants that do not fit in
+ * a 32bit signed integer into a
+- * long temporary
++ * long temporary OR
++ * load positive zero into a floating
++ * point register
+ */
+- r1 = newtmp("isel", Kl, fn);
+- emit(Ocopy, Kl, r1, r0, R);
++ r1 = newtmp("isel", k, fn);
++ emit(Ocopy, k, r1, r0, R);
+ }
+ else if (s != -1) {
+ /* load fast locals' addresses into
+--
+2.32.0
+
diff --git a/pkg/qbe/patch/0007-amd64-optimize-zeroing-of-integer-registers-as-well.patch b/pkg/qbe/patch/0007-amd64-optimize-zeroing-of-integer-registers-as-well.patch
@@ -0,0 +1,34 @@
+From 1b61d04de8d62821eec915eec6bde6b9a0a2d1c9 Mon Sep 17 00:00:00 2001
+From: Michael Forney <mforney@mforney.org>
+Date: Mon, 30 Aug 2021 13:40:48 -0700
+Subject: [PATCH] amd64: optimize zeroing of integer registers as well
+
+---
+ amd64/emit.c | 10 ++++++----
+ 1 file changed, 6 insertions(+), 4 deletions(-)
+
+diff --git a/amd64/emit.c b/amd64/emit.c
+index 311b8c6..015b921 100644
+--- a/amd64/emit.c
++++ b/amd64/emit.c
+@@ -444,11 +444,13 @@ emitins(Ins i, Fn *fn, FILE *f)
+ break;
+ t0 = rtype(i.arg[0]);
+ if (isreg(i.to)
+- && KBASE(i.cls) == 1
+ && t0 == RCon
+- && fn->con[i.arg[0].val].type == CBits) {
+- assert(fn->con[i.arg[0].val].bits.i == 0);
+- emitf("pxor %D=, %D=", &i, fn, f);
++ && fn->con[i.arg[0].val].type == CBits
++ && fn->con[i.arg[0].val].bits.i == 0) {
++ if (KBASE(i.cls) == 0)
++ emitf("xor%k %=, %=", &i, fn, f);
++ else
++ emitf("pxor %D=, %D=", &i, fn, f);
+ break;
+ }
+ if (i.cls == Kl
+--
+2.32.0
+
diff --git a/pkg/qbe/ver b/pkg/qbe/ver
@@ -1 +1 @@
-5e5e301e86 r0
+6a69210b0f r0