commit: 066e68c4fe79238bc3377dea38585eb56b5934b1
parent f8b20538cc063224c6a1a2a8a65fbaa5e87c3f3c
Author: Michael Forney <mforney@mforney.org>
Date: Tue, 26 Oct 2021 19:34:26 -0700
qbe: Update to latest git and tweak some patches
Diffstat:
13 files changed, 310 insertions(+), 327 deletions(-)
diff --git a/pkg/qbe/patch/0001-arm64-Handle-slots.patch b/pkg/qbe/patch/0001-arm64-Handle-slots.patch
@@ -1,36 +0,0 @@
-From c8849e9c7b382f92326434a6522a26829a6e20f8 Mon Sep 17 00:00:00 2001
-From: Michael Forney <mforney@mforney.org>
-Date: Wed, 8 May 2019 18:29:28 -0700
-Subject: [PATCH] arm64: Handle slots
-
----
- arm64/emit.c | 13 +++++++++++--
- 1 file changed, 11 insertions(+), 2 deletions(-)
-
-diff --git a/arm64/emit.c b/arm64/emit.c
-index 59e1aae..9cc4a64 100644
---- a/arm64/emit.c
-+++ b/arm64/emit.c
-@@ -220,8 +220,17 @@ emitf(char *s, Ins *i, E *e)
- c = *s++;
- assert(c == '0' || c == '1');
- r = i->arg[c - '0'];
-- assert(isreg(r) && "TODO emit non reg addresses");
-- fprintf(e->f, "[%s]", rname(r.val, Kl));
-+ switch (rtype(r)) {
-+ default:
-+ die("TODO emit non reg addresses");
-+ case RTmp:
-+ assert(isreg(r));
-+ fprintf(e->f, "[%s]", rname(r.val, Kl));
-+ break;
-+ case RSlot:
-+ fprintf(e->f, "[sp, %"PRIu64"]", slot(r.val, e));
-+ break;
-+ }
- break;
- }
- }
---
-2.21.0
-
diff --git a/pkg/qbe/patch/0001-arm64-prevent-stack-clobber-when-passing-structures-.patch b/pkg/qbe/patch/0001-arm64-prevent-stack-clobber-when-passing-structures-.patch
@@ -0,0 +1,33 @@
+From 6c1744026545445511f1c500653bab859bc79b50 Mon Sep 17 00:00:00 2001
+From: Michael Forney <mforney@mforney.org>
+Date: Sat, 11 May 2019 19:38:13 -0700
+Subject: [PATCH] arm64: prevent stack clobber when passing structures < 8
+ bytes
+
+---
+ arm64/abi.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/arm64/abi.c b/arm64/abi.c
+index 8209944..f37c892 100644
+--- a/arm64/abi.c
++++ b/arm64/abi.c
+@@ -312,12 +312,14 @@ stkblob(Ref r, Class *c, Fn *fn, Insl **ilp)
+ {
+ Insl *il;
+ int al;
++ uint64_t sz;
+
+ il = alloc(sizeof *il);
+ al = c->t->align - 2; /* NAlign == 3 */
+ if (al < 0)
+ al = 0;
+- il->i = (Ins){Oalloc+al, Kl, r, {getcon(c->t->size, fn)}};
++ sz = c->class & Cptr ? c->t->size : c->size;
++ il->i = (Ins){Oalloc+al, Kl, r, {getcon(sz, fn)}};
+ il->link = *ilp;
+ *ilp = il;
+ }
+--
+2.32.0
+
diff --git a/pkg/qbe/patch/0002-arm64-Handle-slots-in-Ocopy-operands.patch b/pkg/qbe/patch/0002-arm64-Handle-slots-in-Ocopy-operands.patch
@@ -1,56 +0,0 @@
-From d9d890583d93f1bfdc38e4aa890350d4111b848a Mon Sep 17 00:00:00 2001
-From: Michael Forney <mforney@mforney.org>
-Date: Thu, 9 May 2019 23:32:15 -0700
-Subject: [PATCH] arm64: Handle slots in Ocopy operands
-
----
- arm64/emit.c | 25 +++++++++++++++++++++----
- 1 file changed, 21 insertions(+), 4 deletions(-)
-
-diff --git a/arm64/emit.c b/arm64/emit.c
-index 9ebcfcd..5a3fe55 100644
---- a/arm64/emit.c
-+++ b/arm64/emit.c
-@@ -218,8 +218,8 @@ emitf(char *s, Ins *i, E *e)
- break;
- case 'M':
- c = *s++;
-- assert(c == '0' || c == '1');
-- r = i->arg[c - '0'];
-+ assert(c == '0' || c == '1' || c == '=');
-+ r = c == '=' ? i->to : i->arg[c - '0'];
- switch (rtype(r)) {
- default:
- die("TODO emit non reg addresses");
-@@ -307,9 +307,26 @@ emitins(Ins *i, E *e)
- case Ocopy:
- if (req(i->to, i->arg[0]))
- break;
-- if (rtype(i->arg[0]) != RCon)
-+ if (rtype(i->to) == RSlot) {
-+ if (rtype(i->arg[0]) == RSlot) {
-+ emitf("ldr %?, %M0\n\tstr %?, %M=", i, e);
-+ } else {
-+ assert(isreg(i->arg[0]));
-+ emitf("str %0, %M=", i, e);
-+ }
-+ break;
-+ }
-+ assert(isreg(i->to));
-+ switch (rtype(i->arg[0])) {
-+ case RCon:
-+ loadcon(&e->fn->con[i->arg[0].val], i->to.val, i->cls, e->f);
-+ break;
-+ case RSlot:
-+ emitf("ldr %=, %M0", i, e);
-+ break;
-+ default:
- goto Table;
-- loadcon(&e->fn->con[i->arg[0].val], i->to.val, i->cls, e->f);
-+ }
- break;
- case Oaddr:
- assert(rtype(i->arg[0]) == RSlot);
---
-2.32.0
-
diff --git a/pkg/qbe/patch/0002-increase-NString-to-72.patch b/pkg/qbe/patch/0002-increase-NString-to-72.patch
@@ -0,0 +1,25 @@
+From 294fedc93dbeac68f0beec1eeea62be30227b025 Mon Sep 17 00:00:00 2001
+From: Michael Forney <mforney@mforney.org>
+Date: Fri, 31 May 2019 13:31:04 -0700
+Subject: [PATCH] increase NString to 72
+
+---
+ all.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/all.h b/all.h
+index 37980d3..f49b4ef 100644
+--- a/all.h
++++ b/all.h
+@@ -31,7 +31,7 @@ typedef struct Dat Dat;
+ typedef struct Target Target;
+
+ enum {
+- NString = 64,
++ NString = 72,
+ NIns = 1 << 20,
+ NAlign = 3,
+ NField = 32,
+--
+2.32.0
+
diff --git a/pkg/qbe/patch/0003-arm64-Prevent-stack-clobber-when-passing-structures-.patch b/pkg/qbe/patch/0003-arm64-Prevent-stack-clobber-when-passing-structures-.patch
@@ -1,33 +0,0 @@
-From ffd2585ef162a6dcc42011a33bd69687048ab4a8 Mon Sep 17 00:00:00 2001
-From: Michael Forney <mforney@mforney.org>
-Date: Sat, 11 May 2019 19:38:13 -0700
-Subject: [PATCH] arm64: Prevent stack clobber when passing structures < 8
- bytes
-
----
- arm64/abi.c | 4 +++-
- 1 file changed, 3 insertions(+), 1 deletion(-)
-
-diff --git a/arm64/abi.c b/arm64/abi.c
-index f5b605a..4e80db2 100644
---- a/arm64/abi.c
-+++ b/arm64/abi.c
-@@ -308,12 +308,14 @@ stkblob(Ref r, Class *c, Fn *fn, Insl **ilp)
- {
- Insl *il;
- int al;
-+ uint64_t sz;
-
- il = alloc(sizeof *il);
- al = c->t->align - 2; /* NAlign == 3 */
- if (al < 0)
- al = 0;
-- il->i = (Ins){Oalloc+al, Kl, r, {getcon(c->t->size, fn)}};
-+ sz = c->class & Cptr ? c->t->size : c->size;
-+ il->i = (Ins){Oalloc+al, Kl, r, {getcon(sz, fn)}};
- il->link = *ilp;
- *ilp = il;
- }
---
-2.21.0
-
diff --git a/pkg/qbe/patch/0003-fold-don-t-fold-invalid-addition-subtraction-rather-.patch b/pkg/qbe/patch/0003-fold-don-t-fold-invalid-addition-subtraction-rather-.patch
@@ -0,0 +1,66 @@
+From b3c8dfafafd7e749a12227c951f3faebc2572710 Mon Sep 17 00:00:00 2001
+From: Michael Forney <mforney@mforney.org>
+Date: Sun, 16 Jun 2019 01:38:27 -0700
+Subject: [PATCH] fold: don't fold invalid addition/subtraction rather than
+ failing
+
+This may happen in a branch QBE doesn't realize is unreachable,
+for example (simplified from real code found in ncurses)
+
+ data $str = { b "abcdef", b 0 }
+ function l $f(w %x) {
+ @start
+ %.1 =w ceqw %x, 0
+ jnz %.1, @logic_join, @logic_right
+ @logic_right
+ %p =l call $strchr(l $str, w %x)
+ %.2 =w ceql %p, 0
+ @logic_join
+ %.3 =w phi @start %.1, @logic_right %.2
+ jnz %.3, @fail, @return
+ @fail
+ ret 0
+ @return
+ %.4 =l sub %p, $str
+ ret %.4
+ }
+---
+ fold.c | 11 ++++-------
+ 1 file changed, 4 insertions(+), 7 deletions(-)
+
+diff --git a/fold.c b/fold.c
+index 2081a72..50a862e 100644
+--- a/fold.c
++++ b/fold.c
+@@ -343,7 +343,7 @@ foldint(Con *res, int op, int w, Con *cl, Con *cr)
+ if (op == Oadd) {
+ if (cl->type == CAddr) {
+ if (cr->type == CAddr)
+- err("undefined addition (addr + addr)");
++ return 1;
+ lab = cl->label;
+ typ = CAddr;
+ }
+@@ -358,16 +358,13 @@ foldint(Con *res, int op, int w, Con *cl, Con *cr)
+ lab = cl->label;
+ typ = CAddr;
+ } else if (cl->label != cr->label)
+- err("undefined substraction (addr1 - addr2)");
++ return 1;
+ }
+ else if (cr->type == CAddr)
+- err("undefined substraction (num - addr)");
+- }
+- else if (cl->type == CAddr || cr->type == CAddr) {
+- if (Ocmpl <= op && op <= Ocmpl1)
+ return 1;
+- err("invalid address operand for '%s'", optab[op].name);
+ }
++ else if (cl->type == CAddr || cr->type == CAddr)
++ return 1;
+ switch (op) {
+ case Oadd: x = l.u + r.u; break;
+ case Osub: x = l.u - r.u; break;
+--
+2.32.0
+
diff --git a/pkg/qbe/patch/0004-Increase-NString-to-96.patch b/pkg/qbe/patch/0004-Increase-NString-to-96.patch
@@ -1,25 +0,0 @@
-From 948e221acc92d002662ffa609a252a3410a93001 Mon Sep 17 00:00:00 2001
-From: Michael Forney <mforney@mforney.org>
-Date: Fri, 31 May 2019 13:31:04 -0700
-Subject: [PATCH] Increase NString to 96
-
----
- all.h | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/all.h b/all.h
-index 7f843a9..805a346 100644
---- a/all.h
-+++ b/all.h
-@@ -31,7 +31,7 @@ typedef struct Dat Dat;
- typedef struct Target Target;
-
- enum {
-- NString = 64,
-+ NString = 96,
- NIns = 1 << 20,
- NAlign = 3,
- NField = 32,
---
-2.28.0
-
diff --git a/pkg/qbe/patch/0004-gas-put-zero-data-into-.bss.patch b/pkg/qbe/patch/0004-gas-put-zero-data-into-.bss.patch
@@ -0,0 +1,102 @@
+From e81a67355f1a53739cbfd9797ac9d687efff05e8 Mon Sep 17 00:00:00 2001
+From: Michael Forney <mforney@mforney.org>
+Date: Tue, 28 Sep 2021 11:25:05 -0700
+Subject: [PATCH] gas: put zero data into .bss
+
+---
+ gas.c | 56 ++++++++++++++++++++++++++++++++++++++++----------------
+ 1 file changed, 40 insertions(+), 16 deletions(-)
+
+diff --git a/gas.c b/gas.c
+index 8c31794..ce082dc 100644
+--- a/gas.c
++++ b/gas.c
+@@ -3,10 +3,26 @@
+
+ char *gasloc, *gassym;
+
++static void
++startdat(FILE *f, char *section, char *name, int align, int export, int zero)
++{
++ char *p;
++
++ if (section)
++ fprintf(f, ".section %s\n", section);
++ else
++ fprintf(f, "%s\n", zero ? ".bss" : ".data");
++ fprintf(f, ".balign %d\n", align);
++ p = name[0] == '"' ? "" : gassym;
++ if (export)
++ fprintf(f, ".globl %s%s\n", p, name);
++ fprintf(f, "%s%s:\n", p, name);
++}
++
+ void
+ gasemitdat(Dat *d, FILE *f)
+ {
+- static int aligned;
++ static int aligned, export;
+ static char *dtoa[] = {
+ [DAlign] = ".balign",
+ [DB] = "\t.byte",
+@@ -14,34 +30,42 @@ gasemitdat(Dat *d, FILE *f)
+ [DW] = "\t.int",
+ [DL] = "\t.quad"
+ };
++ static char *name, *section;
++ static int64_t zero;
+ char *p;
+
+ switch (d->type) {
+ case DStart:
+ aligned = 0;
+- if (d->u.str) {
+- fprintf(f, ".section %s\n", d->u.str);
+- } else {
+- fprintf(f, ".data\n");
+- }
++ zero = 0;
++ section = d->u.str;
+ break;
+ case DEnd:
++ if (zero != -1) {
++ startdat(f, section, name, aligned, export, 1);
++ fprintf(f, "\t.fill %"PRId64",1,0\n", zero);
++ }
++ break;
++ case DAlign:
++ aligned = d->u.num;
+ break;
+ case DName:
+- if (!aligned)
+- fprintf(f, ".balign 8\n");
+- p = d->u.str[0] == '"' ? "" : gassym;
+- if (d->export)
+- fprintf(f, ".globl %s%s\n", p, d->u.str);
+- fprintf(f, "%s%s:\n", p, d->u.str);
++ name = d->u.str;
++ export = d->export;
+ break;
+ case DZ:
+- fprintf(f, "\t.fill %"PRId64",1,0\n", d->u.num);
++ if (zero != -1)
++ zero += d->u.num;
++ else
++ fprintf(f, "\t.fill %"PRId64",1,0\n", d->u.num);
+ break;
+ default:
+- if (d->type == DAlign)
+- aligned = 1;
+-
++ if (zero != -1) {
++ startdat(f, section, name, aligned, export, 0);
++ if (zero > 0)
++ fprintf(f, "\t.fill %"PRId64",1,0\n", zero);
++ zero = -1;
++ }
+ if (d->isstr) {
+ if (d->type != DB)
+ err("strings only supported for 'b' currently");
+--
+2.32.0
+
diff --git a/pkg/qbe/patch/0005-amd64-optimize-loading-0-into-registers.patch b/pkg/qbe/patch/0005-amd64-optimize-loading-0-into-registers.patch
@@ -0,0 +1,83 @@
+From a11da13e22a694f8fe4a81d894d433f50ce4af6b Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?=C3=89rico=20Nogueira?= <erico.erc@gmail.com>
+Date: Sun, 11 Jul 2021 19:19:12 -0300
+Subject: [PATCH] amd64: optimize loading 0 into registers
+
+Loading +0 into a floating point register can be done using pxor or
+xorps instructions. Per [1], we went with pxor because it can run on all
+vector ALU ports, even if it's one byte longer.
+
+Similarly, an integer register can be zeroed with xor, which has a
+smaller encoding than mov with 0 immediate.
+
+To implement this, we special case fixarg to allow Ocopy when the
+value is +0 for floating point, and change emitins to emit pxor/xor
+when it encounters a copy from 0.
+
+Co-authored-by: Michael Forney <mforney@mforney.org>
+
+[1] https://stackoverflow.com/questions/39811577/does-using-mix-of-pxor-and-xorps-affect-performance/39828976
+---
+ amd64/emit.c | 12 ++++++++++++
+ amd64/isel.c | 12 +++++++-----
+ 2 files changed, 19 insertions(+), 5 deletions(-)
+
+diff --git a/amd64/emit.c b/amd64/emit.c
+index a888000..7aeeff5 100644
+--- a/amd64/emit.c
++++ b/amd64/emit.c
+@@ -443,6 +443,18 @@ emitins(Ins i, Fn *fn, FILE *f)
+ if (req(i.to, i.arg[0]))
+ break;
+ t0 = rtype(i.arg[0]);
++ if (t0 == RCon
++ && fn->con[i.arg[0].val].type == CBits
++ && fn->con[i.arg[0].val].bits.i == 0) {
++ if (isreg(i.to)) {
++ if (KBASE(i.cls) == 0)
++ emitf("xor%k %=, %=", &i, fn, f);
++ else
++ emitf("pxor %D=, %D=", &i, fn, f);
++ break;
++ }
++ i.cls = KWIDE(i.cls) ? Kl : Kw;
++ }
+ if (i.cls == Kl
+ && t0 == RCon
+ && fn->con[i.arg[0].val].type == CBits) {
+diff --git a/amd64/isel.c b/amd64/isel.c
+index 607c176..1c902f5 100644
+--- a/amd64/isel.c
++++ b/amd64/isel.c
+@@ -69,7 +69,7 @@ fixarg(Ref *r, int k, Ins *i, Fn *fn)
+ r1 = r0 = *r;
+ s = rslot(r0, fn);
+ op = i ? i->op : Ocopy;
+- if (KBASE(k) == 1 && rtype(r0) == RCon) {
++ if (KBASE(k) == 1 && rtype(r0) == RCon && fn->con[r0.val].bits.i != 0) {
+ /* load floating points from memory
+ * slots, they can't be used as
+ * immediates
+@@ -84,13 +84,15 @@ fixarg(Ref *r, int k, Ins *i, Fn *fn)
+ a.offset.label = intern(buf);
+ fn->mem[fn->nmem-1] = a;
+ }
+- else if (op != Ocopy && k == Kl && noimm(r0, fn)) {
++ else if (op != Ocopy && ((k == Kl && noimm(r0, fn)) || (KBASE(k) == 1 && rtype(r0) == RCon))) {
+ /* load constants that do not fit in
+ * a 32bit signed integer into a
+- * long temporary
++ * long temporary OR
++ * load positive zero into a floating
++ * point register
+ */
+- r1 = newtmp("isel", Kl, fn);
+- emit(Ocopy, Kl, r1, r0, R);
++ r1 = newtmp("isel", k, fn);
++ emit(Ocopy, k, r1, r0, R);
+ }
+ else if (s != -1) {
+ /* load fast locals' addresses into
+--
+2.32.0
+
diff --git a/pkg/qbe/patch/0005-fold-Don-t-fold-invalid-addition-subtraction-rather-.patch b/pkg/qbe/patch/0005-fold-Don-t-fold-invalid-addition-subtraction-rather-.patch
@@ -1,66 +0,0 @@
-From 264b07e0cb0ce869cfcdab0a3e66c92a99de5dee Mon Sep 17 00:00:00 2001
-From: Michael Forney <mforney@mforney.org>
-Date: Sun, 16 Jun 2019 01:38:27 -0700
-Subject: [PATCH] fold: Don't fold invalid addition/subtraction rather than
- failing
-
-This may happen in a branch QBE doesn't realize is unreachable,
-for example (simplified from real code found in ncurses)
-
- data $str = { b "abcdef", b 0 }
- function l $f(w %x) {
- @start
- %.1 =w ceqw %x, 0
- jnz %.1, @logic_join, @logic_right
- @logic_right
- %p =l call $strchr(l $str, w %x)
- %.2 =w ceql %p, 0
- @logic_join
- %.3 =w phi @start %.1, @logic_right %.2
- jnz %.3, @fail, @return
- @fail
- ret 0
- @return
- %.4 =l sub %p, $str
- ret %.4
- }
----
- fold.c | 11 ++++-------
- 1 file changed, 4 insertions(+), 7 deletions(-)
-
-diff --git a/fold.c b/fold.c
-index 0a3945f..9e1a12d 100644
---- a/fold.c
-+++ b/fold.c
-@@ -343,7 +343,7 @@ foldint(Con *res, int op, int w, Con *cl, Con *cr)
- if (op == Oadd) {
- if (cl->type == CAddr) {
- if (cr->type == CAddr)
-- err("undefined addition (addr + addr)");
-+ return 1;
- lab = cl->label;
- typ = CAddr;
- }
-@@ -358,16 +358,13 @@ foldint(Con *res, int op, int w, Con *cl, Con *cr)
- lab = cl->label;
- typ = CAddr;
- } else if (cl->label != cr->label)
-- err("undefined substraction (addr1 - addr2)");
-+ return 1;
- }
- else if (cr->type == CAddr)
-- err("undefined substraction (num - addr)");
-- }
-- else if (cl->type == CAddr || cr->type == CAddr) {
-- if (Ocmpl <= op && op <= Ocmpl1)
- return 1;
-- err("invalid address operand for '%s'", optab[op].name);
- }
-+ else if (cl->type == CAddr || cr->type == CAddr)
-+ return 1;
- switch (op) {
- case Oadd: x = l.u + r.u; break;
- case Osub: x = l.u - r.u; break;
---
-2.22.0
-
diff --git a/pkg/qbe/patch/0006-amd64-optimize-loading-0-into-floating-point-registe.patch b/pkg/qbe/patch/0006-amd64-optimize-loading-0-into-floating-point-registe.patch
@@ -1,76 +0,0 @@
-From 1e0c08a288a5f7993dd8565ace35f1ecfc614544 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?=C3=89rico=20Nogueira?= <erico.erc@gmail.com>
-Date: Sun, 11 Jul 2021 19:19:12 -0300
-Subject: [PATCH] amd64: optimize loading +0 into floating point registers
-
-Loading +0 into a floating point register can be done using pxor or
-xorps instructions. Per [1], we went with pxor because it can run on all
-vector ALU ports, even if it's one byte longer.
-
-To implement it, we special case fixarg to emit Ocopy when the value is
-+0, and emitins to treat Ocopy for floating point args specially. Since
-0. == -0., we can't check if bits.d or bits.f are equal to 0. To avoid
-requiring signbit(), we inspect bits.i directly; this assumes the bits
-union is always zero-initialized.
-
-[1] https://stackoverflow.com/questions/39811577/does-using-mix-of-pxor-and-xorps-affect-performance/39828976
----
- amd64/emit.c | 8 ++++++++
- amd64/isel.c | 12 +++++++-----
- 2 files changed, 15 insertions(+), 5 deletions(-)
-
-diff --git a/amd64/emit.c b/amd64/emit.c
-index 09b90d5..311b8c6 100644
---- a/amd64/emit.c
-+++ b/amd64/emit.c
-@@ -443,6 +443,14 @@ emitins(Ins i, Fn *fn, FILE *f)
- if (req(i.to, i.arg[0]))
- break;
- t0 = rtype(i.arg[0]);
-+ if (isreg(i.to)
-+ && KBASE(i.cls) == 1
-+ && t0 == RCon
-+ && fn->con[i.arg[0].val].type == CBits) {
-+ assert(fn->con[i.arg[0].val].bits.i == 0);
-+ emitf("pxor %D=, %D=", &i, fn, f);
-+ break;
-+ }
- if (i.cls == Kl
- && t0 == RCon
- && fn->con[i.arg[0].val].type == CBits) {
-diff --git a/amd64/isel.c b/amd64/isel.c
-index 607c176..1c902f5 100644
---- a/amd64/isel.c
-+++ b/amd64/isel.c
-@@ -69,7 +69,7 @@ fixarg(Ref *r, int k, Ins *i, Fn *fn)
- r1 = r0 = *r;
- s = rslot(r0, fn);
- op = i ? i->op : Ocopy;
-- if (KBASE(k) == 1 && rtype(r0) == RCon) {
-+ if (KBASE(k) == 1 && rtype(r0) == RCon && fn->con[r0.val].bits.i != 0) {
- /* load floating points from memory
- * slots, they can't be used as
- * immediates
-@@ -84,13 +84,15 @@ fixarg(Ref *r, int k, Ins *i, Fn *fn)
- a.offset.label = intern(buf);
- fn->mem[fn->nmem-1] = a;
- }
-- else if (op != Ocopy && k == Kl && noimm(r0, fn)) {
-+ else if (op != Ocopy && ((k == Kl && noimm(r0, fn)) || (KBASE(k) == 1 && rtype(r0) == RCon))) {
- /* load constants that do not fit in
- * a 32bit signed integer into a
-- * long temporary
-+ * long temporary OR
-+ * load positive zero into a floating
-+ * point register
- */
-- r1 = newtmp("isel", Kl, fn);
-- emit(Ocopy, Kl, r1, r0, R);
-+ r1 = newtmp("isel", k, fn);
-+ emit(Ocopy, k, r1, r0, R);
- }
- else if (s != -1) {
- /* load fast locals' addresses into
---
-2.32.0
-
diff --git a/pkg/qbe/patch/0007-amd64-optimize-zeroing-of-integer-registers-as-well.patch b/pkg/qbe/patch/0007-amd64-optimize-zeroing-of-integer-registers-as-well.patch
@@ -1,34 +0,0 @@
-From 1b61d04de8d62821eec915eec6bde6b9a0a2d1c9 Mon Sep 17 00:00:00 2001
-From: Michael Forney <mforney@mforney.org>
-Date: Mon, 30 Aug 2021 13:40:48 -0700
-Subject: [PATCH] amd64: optimize zeroing of integer registers as well
-
----
- amd64/emit.c | 10 ++++++----
- 1 file changed, 6 insertions(+), 4 deletions(-)
-
-diff --git a/amd64/emit.c b/amd64/emit.c
-index 311b8c6..015b921 100644
---- a/amd64/emit.c
-+++ b/amd64/emit.c
-@@ -444,11 +444,13 @@ emitins(Ins i, Fn *fn, FILE *f)
- break;
- t0 = rtype(i.arg[0]);
- if (isreg(i.to)
-- && KBASE(i.cls) == 1
- && t0 == RCon
-- && fn->con[i.arg[0].val].type == CBits) {
-- assert(fn->con[i.arg[0].val].bits.i == 0);
-- emitf("pxor %D=, %D=", &i, fn, f);
-+ && fn->con[i.arg[0].val].type == CBits
-+ && fn->con[i.arg[0].val].bits.i == 0) {
-+ if (KBASE(i.cls) == 0)
-+ emitf("xor%k %=, %=", &i, fn, f);
-+ else
-+ emitf("pxor %D=, %D=", &i, fn, f);
- break;
- }
- if (i.cls == Kl
---
-2.32.0
-
diff --git a/pkg/qbe/ver b/pkg/qbe/ver
@@ -1 +1 @@
-6a69210b0f r0
+900805a8fe r0