qbe: Update to latest git - oasis - Own branch of Oasis Linux (upstream: <https://git.sr.ht/~mcf/oasis/>)

commit: b0461b1f32266265262e7031b7930616fe8ac8c3
parent 3603c6e1c0c3a5e641752bada3073d0face5390e
Author: Michael Forney <mforney@mforney.org>
Date:   Tue,  7 Sep 2021 00:02:51 -0700

qbe: Update to latest git

Diffstat:
M pkg/qbe/patch/0002-arm64-Handle-slots-in-Ocopy-operands.patch 8 ++++----
R pkg/qbe/patch/0005-Increase-NString-to-96.patch -> pkg/qbe/patch/0004-Increase-NString-to-96.patch 0 
D pkg/qbe/patch/0004-amd64-Fix-floating-point-equality-check-with-NaN.patch 73 -------------------------------------------------------------------------
R pkg/qbe/patch/0006-fold-Don-t-fold-invalid-addition-subtraction-rather-.patch -> pkg/qbe/patch/0005-fold-Don-t-fold-invalid-addition-subtraction-rather-.patch 0 
A pkg/qbe/patch/0006-amd64-optimize-loading-0-into-floating-point-registe.patch 76 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A pkg/qbe/patch/0007-amd64-optimize-zeroing-of-integer-registers-as-well.patch 34 ++++++++++++++++++++++++++++++++++
M pkg/qbe/ver 2 +-

7 files changed, 115 insertions(+), 78 deletions(-)
diff --git a/pkg/qbe/patch/0002-arm64-Handle-slots-in-Ocopy-operands.patch b/pkg/qbe/patch/0002-arm64-Handle-slots-in-Ocopy-operands.patch
@@ -1,4 +1,4 @@
-From 86ce810ec5463f41d001b543288bd43dda79eebd Mon Sep 17 00:00:00 2001
+From d9d890583d93f1bfdc38e4aa890350d4111b848a Mon Sep 17 00:00:00 2001
 From: Michael Forney <mforney@mforney.org>
 Date: Thu, 9 May 2019 23:32:15 -0700
 Subject: [PATCH] arm64: Handle slots in Ocopy operands
@@ -8,7 +8,7 @@ Subject: [PATCH] arm64: Handle slots in Ocopy operands
  1 file changed, 21 insertions(+), 4 deletions(-)
 
 diff --git a/arm64/emit.c b/arm64/emit.c
-index e7effef..adae233 100644
+index 9ebcfcd..5a3fe55 100644
 --- a/arm64/emit.c
 +++ b/arm64/emit.c
 @@ -218,8 +218,8 @@ emitf(char *s, Ins *i, E *e)
@@ -22,7 +22,7 @@ index e7effef..adae233 100644
  			switch (rtype(r)) {
  			default:
  				die("TODO emit non reg addresses");
-@@ -305,9 +305,26 @@ emitins(Ins *i, E *e)
+@@ -307,9 +307,26 @@ emitins(Ins *i, E *e)
  	case Ocopy:
  		if (req(i->to, i->arg[0]))
  			break;
@@ -52,5 +52,5 @@ index e7effef..adae233 100644
  	case Oaddr:
  		assert(rtype(i->arg[0]) == RSlot);
 -- 
-2.21.0
+2.32.0
 
diff --git a/pkg/qbe/patch/0005-Increase-NString-to-96.patch b/pkg/qbe/patch/0004-Increase-NString-to-96.patch
diff --git a/pkg/qbe/patch/0004-amd64-Fix-floating-point-equality-check-with-NaN.patch b/pkg/qbe/patch/0004-amd64-Fix-floating-point-equality-check-with-NaN.patch
@@ -1,73 +0,0 @@
-From d4d5e44e1064cff4f6b3c25b174ec53d294c6e09 Mon Sep 17 00:00:00 2001
-From: Michael Forney <mforney@mforney.org>
-Date: Fri, 24 May 2019 18:56:12 -0700
-Subject: [PATCH] amd64: Fix floating-point equality check with NaN
-
-arm64 does not have the same issue, and the added test passes there
-as well.
----
- amd64/isel.c   | 15 +++++++++++++++
- test/isel2.ssa | 25 +++++++++++++++++++++++++
- 2 files changed, 40 insertions(+)
- create mode 100644 test/isel2.ssa
-
-diff --git a/amd64/isel.c b/amd64/isel.c
-index 56e4cf3..9bf5c74 100644
---- a/amd64/isel.c
-+++ b/amd64/isel.c
-@@ -327,6 +327,21 @@ Emit:
- 		if (isload(i.op))
- 			goto case_Oload;
- 		if (iscmp(i.op, &kc, &x)) {
-+			/* ZF is set when operands are unordered, so we
-+			 * may have to check PF as well.
-+			 */
-+			switch (x) {
-+			case NCmpI+Cfeq:
-+				r0 = newtmp("isel", Kw, fn);
-+				emit(Oand, Kw, i.to, i.to, r0);
-+				emit(Oflagfo, k, r0, R, R);
-+				break;
-+			case NCmpI+Cfne:
-+				r0 = newtmp("isel", Kw, fn);
-+				emit(Oor, Kw, i.to, i.to, r0);
-+				emit(Oflagfuo, k, r0, R, R);
-+				break;
-+			}
- 			emit(Oflag+x, k, i.to, R, R);
- 			i1 = curi;
- 			if (selcmp(i.arg, kc, fn))
-diff --git a/test/isel2.ssa b/test/isel2.ssa
-new file mode 100644
-index 0000000..d6e009c
---- /dev/null
-+++ b/test/isel2.ssa
-@@ -0,0 +1,25 @@
-+# tests that floating point equality works
-+# on amd64, which requires additional
-+# instructions to check that the operands
-+# are ordered.
-+
-+export function w $eq(s %x, s %y) {
-+@start
-+	%r =w ceqs %x, %y
-+	ret %r
-+}
-+
-+export function w $ne(s %x, s %y) {
-+@start
-+	%r =w cnes %x, %y
-+	ret %r
-+}
-+
-+# >>> driver
-+# #include <math.h>
-+# extern int eq(float, float);
-+# extern int ne(float, float);
-+# int main() {
-+# 	return !(eq(NAN, NAN) == 0 && ne(NAN, NAN) == 1);
-+# }
-+# <<<
--- 
-2.22.0
-
diff --git a/pkg/qbe/patch/0006-fold-Don-t-fold-invalid-addition-subtraction-rather-.patch b/pkg/qbe/patch/0005-fold-Don-t-fold-invalid-addition-subtraction-rather-.patch
diff --git a/pkg/qbe/patch/0006-amd64-optimize-loading-0-into-floating-point-registe.patch b/pkg/qbe/patch/0006-amd64-optimize-loading-0-into-floating-point-registe.patch
@@ -0,0 +1,76 @@
+From 1e0c08a288a5f7993dd8565ace35f1ecfc614544 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?=C3=89rico=20Nogueira?= <erico.erc@gmail.com>
+Date: Sun, 11 Jul 2021 19:19:12 -0300
+Subject: [PATCH] amd64: optimize loading +0 into floating point registers
+
+Loading +0 into a floating point register can be done using pxor or
+xorps instructions. Per [1], we went with pxor because it can run on all
+vector ALU ports, even if it's one byte longer.
+
+To implement it, we special case fixarg to emit Ocopy when the value is
++0, and emitins to treat Ocopy for floating point args specially. Since
+0. == -0., we can't check if bits.d or bits.f are equal to 0. To avoid
+requiring signbit(), we inspect bits.i directly; this assumes the bits
+union is always zero-initialized.
+
+[1] https://stackoverflow.com/questions/39811577/does-using-mix-of-pxor-and-xorps-affect-performance/39828976
+---
+ amd64/emit.c |  8 ++++++++
+ amd64/isel.c | 12 +++++++-----
+ 2 files changed, 15 insertions(+), 5 deletions(-)
+
+diff --git a/amd64/emit.c b/amd64/emit.c
+index 09b90d5..311b8c6 100644
+--- a/amd64/emit.c
++++ b/amd64/emit.c
+@@ -443,6 +443,14 @@ emitins(Ins i, Fn *fn, FILE *f)
+ 		if (req(i.to, i.arg[0]))
+ 			break;
+ 		t0 = rtype(i.arg[0]);
++		if (isreg(i.to)
++		&& KBASE(i.cls) == 1
++		&& t0 == RCon
++		&& fn->con[i.arg[0].val].type == CBits) {
++			assert(fn->con[i.arg[0].val].bits.i == 0);
++			emitf("pxor %D=, %D=", &i, fn, f);
++			break;
++		}
+ 		if (i.cls == Kl
+ 		&& t0 == RCon
+ 		&& fn->con[i.arg[0].val].type == CBits) {
+diff --git a/amd64/isel.c b/amd64/isel.c
+index 607c176..1c902f5 100644
+--- a/amd64/isel.c
++++ b/amd64/isel.c
+@@ -69,7 +69,7 @@ fixarg(Ref *r, int k, Ins *i, Fn *fn)
+ 	r1 = r0 = *r;
+ 	s = rslot(r0, fn);
+ 	op = i ? i->op : Ocopy;
+-	if (KBASE(k) == 1 && rtype(r0) == RCon) {
++	if (KBASE(k) == 1 && rtype(r0) == RCon && fn->con[r0.val].bits.i != 0) {
+ 		/* load floating points from memory
+ 		 * slots, they can't be used as
+ 		 * immediates
+@@ -84,13 +84,15 @@ fixarg(Ref *r, int k, Ins *i, Fn *fn)
+ 		a.offset.label = intern(buf);
+ 		fn->mem[fn->nmem-1] = a;
+ 	}
+-	else if (op != Ocopy && k == Kl && noimm(r0, fn)) {
++	else if (op != Ocopy && ((k == Kl && noimm(r0, fn)) || (KBASE(k) == 1 && rtype(r0) == RCon))) {
+ 		/* load constants that do not fit in
+ 		 * a 32bit signed integer into a
+-		 * long temporary
++		 * long temporary OR
++		 * load positive zero into a floating
++		 * point register
+ 		 */
+-		r1 = newtmp("isel", Kl, fn);
+-		emit(Ocopy, Kl, r1, r0, R);
++		r1 = newtmp("isel", k, fn);
++		emit(Ocopy, k, r1, r0, R);
+ 	}
+ 	else if (s != -1) {
+ 		/* load fast locals' addresses into
+-- 
+2.32.0
+
diff --git a/pkg/qbe/patch/0007-amd64-optimize-zeroing-of-integer-registers-as-well.patch b/pkg/qbe/patch/0007-amd64-optimize-zeroing-of-integer-registers-as-well.patch
@@ -0,0 +1,34 @@
+From 1b61d04de8d62821eec915eec6bde6b9a0a2d1c9 Mon Sep 17 00:00:00 2001
+From: Michael Forney <mforney@mforney.org>
+Date: Mon, 30 Aug 2021 13:40:48 -0700
+Subject: [PATCH] amd64: optimize zeroing of integer registers as well
+
+---
+ amd64/emit.c | 10 ++++++----
+ 1 file changed, 6 insertions(+), 4 deletions(-)
+
+diff --git a/amd64/emit.c b/amd64/emit.c
+index 311b8c6..015b921 100644
+--- a/amd64/emit.c
++++ b/amd64/emit.c
+@@ -444,11 +444,13 @@ emitins(Ins i, Fn *fn, FILE *f)
+ 			break;
+ 		t0 = rtype(i.arg[0]);
+ 		if (isreg(i.to)
+-		&& KBASE(i.cls) == 1
+ 		&& t0 == RCon
+-		&& fn->con[i.arg[0].val].type == CBits) {
+-			assert(fn->con[i.arg[0].val].bits.i == 0);
+-			emitf("pxor %D=, %D=", &i, fn, f);
++		&& fn->con[i.arg[0].val].type == CBits
++		&& fn->con[i.arg[0].val].bits.i == 0) {
++			if (KBASE(i.cls) == 0)
++				emitf("xor%k %=, %=", &i, fn, f);
++			else
++				emitf("pxor %D=, %D=", &i, fn, f);
+ 			break;
+ 		}
+ 		if (i.cls == Kl
+-- 
+2.32.0
+
diff --git a/pkg/qbe/ver b/pkg/qbe/ver
@@ -1 +1 @@
-5e5e301e86 r0
+6a69210b0f r0

M	pkg/qbe/patch/0002-arm64-Handle-slots-in-Ocopy-operands.patch	8	++++----
R	pkg/qbe/patch/0005-Increase-NString-to-96.patch -> pkg/qbe/patch/0004-Increase-NString-to-96.patch	0
D	pkg/qbe/patch/0004-amd64-Fix-floating-point-equality-check-with-NaN.patch	73	-------------------------------------------------------------------------
R	pkg/qbe/patch/0006-fold-Don-t-fold-invalid-addition-subtraction-rather-.patch -> pkg/qbe/patch/0005-fold-Don-t-fold-invalid-addition-subtraction-rather-.patch	0
A	pkg/qbe/patch/0006-amd64-optimize-loading-0-into-floating-point-registe.patch	76	++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	pkg/qbe/patch/0007-amd64-optimize-zeroing-of-integer-registers-as-well.patch	34	++++++++++++++++++++++++++++++++++
M	pkg/qbe/ver	2	+-