commit: bf50bc33b7afefe1ba82ff5a565a62bd44e271f2
parent 020a33da8d9de01aadec35f5f4b3e2d317eac71b
Author: Michael Forney <mforney@mforney.org>
Date: Thu, 10 Feb 2022 14:42:10 -0800
qbe: Update to latest git
Diffstat:
8 files changed, 179 insertions(+), 310 deletions(-)
diff --git a/pkg/qbe/patch/0001-arm64-prevent-stack-clobber-when-passing-structures-.patch b/pkg/qbe/patch/0001-arm64-prevent-stack-clobber-when-passing-structures-.patch
@@ -1,33 +0,0 @@
-From 6c1744026545445511f1c500653bab859bc79b50 Mon Sep 17 00:00:00 2001
-From: Michael Forney <mforney@mforney.org>
-Date: Sat, 11 May 2019 19:38:13 -0700
-Subject: [PATCH] arm64: prevent stack clobber when passing structures < 8
- bytes
-
----
- arm64/abi.c | 4 +++-
- 1 file changed, 3 insertions(+), 1 deletion(-)
-
-diff --git a/arm64/abi.c b/arm64/abi.c
-index 8209944..f37c892 100644
---- a/arm64/abi.c
-+++ b/arm64/abi.c
-@@ -312,12 +312,14 @@ stkblob(Ref r, Class *c, Fn *fn, Insl **ilp)
- {
- Insl *il;
- int al;
-+ uint64_t sz;
-
- il = alloc(sizeof *il);
- al = c->t->align - 2; /* NAlign == 3 */
- if (al < 0)
- al = 0;
-- il->i = (Ins){Oalloc+al, Kl, r, {getcon(c->t->size, fn)}};
-+ sz = c->class & Cptr ? c->t->size : c->size;
-+ il->i = (Ins){Oalloc+al, Kl, r, {getcon(sz, fn)}};
- il->link = *ilp;
- *ilp = il;
- }
---
-2.32.0
-
diff --git a/pkg/qbe/patch/0001-gas-put-zero-data-into-.bss.patch b/pkg/qbe/patch/0001-gas-put-zero-data-into-.bss.patch
@@ -0,0 +1,95 @@
+From ff79051c78b68121da85eb7d271998a0bbbaf4d7 Mon Sep 17 00:00:00 2001
+From: Michael Forney <mforney@mforney.org>
+Date: Tue, 28 Sep 2021 11:25:05 -0700
+Subject: [PATCH] gas: put zero data into .bss
+
+---
+ all.h | 6 ++----
+ gas.c | 21 ++++++++++++++++-----
+ parse.c | 4 ++--
+ 3 files changed, 20 insertions(+), 11 deletions(-)
+
+diff --git a/all.h b/all.h
+index 942c52d..257d6ba 100644
+--- a/all.h
++++ b/all.h
+@@ -387,6 +387,8 @@ struct Dat {
+ DL,
+ DZ
+ } type;
++ char *name;
++ Lnk *lnk;
+ union {
+ int64_t num;
+ double fltd;
+@@ -396,10 +398,6 @@ struct Dat {
+ char *name;
+ int64_t off;
+ } ref;
+- struct {
+- char *name;
+- Lnk *lnk;
+- } start;
+ } u;
+ char isref;
+ char isstr;
+diff --git a/gas.c b/gas.c
+index 4400769..bc76648 100644
+--- a/gas.c
++++ b/gas.c
+@@ -33,21 +33,32 @@ gasemitdat(Dat *d, FILE *f)
+ [DW] = "\t.int",
+ [DL] = "\t.quad"
+ };
++ static int64_t bss;
+ char *p;
+
+ switch (d->type) {
+ case DStart:
+- gasemitlnk(
+- d->u.start.name,
+- d->u.start.lnk,
+- ".data", f);
++ bss = 0;
+ break;
+ case DEnd:
++ if (bss != -1) {
++ gasemitlnk(d->name, d->lnk, ".bss", f);
++ fprintf(f, "\t.fill %"PRId64",1,0\n", bss);
++ }
+ break;
+ case DZ:
+- fprintf(f, "\t.fill %"PRId64",1,0\n", d->u.num);
++ if (bss != -1)
++ bss += d->u.num;
++ else
++ fprintf(f, "\t.fill %"PRId64",1,0\n", d->u.num);
+ break;
+ default:
++ if (bss != -1) {
++ gasemitlnk(d->name, d->lnk, ".data", f);
++ if (bss > 0)
++ fprintf(f, "\t.fill %"PRId64",1,0\n", bss);
++ bss = -1;
++ }
+ if (d->isstr) {
+ if (d->type != DB)
+ err("strings only supported for 'b' currently");
+diff --git a/parse.c b/parse.c
+index 5e5ab66..fb8b509 100644
+--- a/parse.c
++++ b/parse.c
+@@ -1010,8 +1010,8 @@ parsedat(void cb(Dat *), Lnk *lnk)
+ t = nextnl();
+ }
+ d.type = DStart;
+- d.u.start.name = name;
+- d.u.start.lnk = lnk;
++ d.name = name;
++ d.lnk = lnk;
+ cb(&d);
+
+ if (t != Tlbrace)
+--
+2.34.1
+
diff --git a/pkg/qbe/patch/0002-amd64-optimize-loading-0-into-registers.patch b/pkg/qbe/patch/0002-amd64-optimize-loading-0-into-registers.patch
@@ -0,0 +1,83 @@
+From 97d75808dc4e8eff6d15a56e6812af168dc265d7 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?=C3=89rico=20Nogueira?= <erico.erc@gmail.com>
+Date: Sun, 11 Jul 2021 19:19:12 -0300
+Subject: [PATCH] amd64: optimize loading 0 into registers
+
+Loading +0 into a floating point register can be done using pxor or
+xorps instructions. Per [1], we went with pxor because it can run on all
+vector ALU ports, even if it's one byte longer.
+
+Similarly, an integer register can be zeroed with xor, which has a
+smaller encoding than mov with 0 immediate.
+
+To implement this, we special case fixarg to allow Ocopy when the
+value is +0 for floating point, and change emitins to emit pxor/xor
+when it encounters a copy from 0.
+
+Co-authored-by: Michael Forney <mforney@mforney.org>
+
+[1] https://stackoverflow.com/questions/39811577/does-using-mix-of-pxor-and-xorps-affect-performance/39828976
+---
+ amd64/emit.c | 12 ++++++++++++
+ amd64/isel.c | 12 +++++++-----
+ 2 files changed, 19 insertions(+), 5 deletions(-)
+
+diff --git a/amd64/emit.c b/amd64/emit.c
+index b8e9e8e..388b8b3 100644
+--- a/amd64/emit.c
++++ b/amd64/emit.c
+@@ -450,6 +450,18 @@ emitins(Ins i, Fn *fn, FILE *f)
+ if (req(i.to, i.arg[0]))
+ break;
+ t0 = rtype(i.arg[0]);
++ if (t0 == RCon
++ && fn->con[i.arg[0].val].type == CBits
++ && fn->con[i.arg[0].val].bits.i == 0) {
++ if (isreg(i.to)) {
++ if (KBASE(i.cls) == 0)
++ emitf("xor%k %=, %=", &i, fn, f);
++ else
++ emitf("pxor %D=, %D=", &i, fn, f);
++ break;
++ }
++ i.cls = KWIDE(i.cls) ? Kl : Kw;
++ }
+ if (i.cls == Kl
+ && t0 == RCon
+ && fn->con[i.arg[0].val].type == CBits) {
+diff --git a/amd64/isel.c b/amd64/isel.c
+index 4181e26..d4f0b69 100644
+--- a/amd64/isel.c
++++ b/amd64/isel.c
+@@ -69,7 +69,7 @@ fixarg(Ref *r, int k, Ins *i, Fn *fn)
+ r1 = r0 = *r;
+ s = rslot(r0, fn);
+ op = i ? i->op : Ocopy;
+- if (KBASE(k) == 1 && rtype(r0) == RCon) {
++ if (KBASE(k) == 1 && rtype(r0) == RCon && fn->con[r0.val].bits.i != 0) {
+ /* load floating points from memory
+ * slots, they can't be used as
+ * immediates
+@@ -84,13 +84,15 @@ fixarg(Ref *r, int k, Ins *i, Fn *fn)
+ a.offset.label = intern(buf);
+ fn->mem[fn->nmem-1] = a;
+ }
+- else if (op != Ocopy && k == Kl && noimm(r0, fn)) {
++ else if (op != Ocopy && ((k == Kl && noimm(r0, fn)) || (KBASE(k) == 1 && rtype(r0) == RCon))) {
+ /* load constants that do not fit in
+ * a 32bit signed integer into a
+- * long temporary
++ * long temporary OR
++ * load positive zero into a floating
++ * point register
+ */
+- r1 = newtmp("isel", Kl, fn);
+- emit(Ocopy, Kl, r1, r0, R);
++ r1 = newtmp("isel", k, fn);
++ emit(Ocopy, k, r1, r0, R);
+ }
+ else if (s != -1) {
+ /* load fast locals' addresses into
+--
+2.34.1
+
diff --git a/pkg/qbe/patch/0002-increase-NString-to-72.patch b/pkg/qbe/patch/0002-increase-NString-to-72.patch
@@ -1,25 +0,0 @@
-From 294fedc93dbeac68f0beec1eeea62be30227b025 Mon Sep 17 00:00:00 2001
-From: Michael Forney <mforney@mforney.org>
-Date: Fri, 31 May 2019 13:31:04 -0700
-Subject: [PATCH] increase NString to 72
-
----
- all.h | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/all.h b/all.h
-index 37980d3..f49b4ef 100644
---- a/all.h
-+++ b/all.h
-@@ -31,7 +31,7 @@ typedef struct Dat Dat;
- typedef struct Target Target;
-
- enum {
-- NString = 64,
-+ NString = 72,
- NIns = 1 << 20,
- NAlign = 3,
- NField = 32,
---
-2.32.0
-
diff --git a/pkg/qbe/patch/0003-fold-don-t-fold-invalid-addition-subtraction-rather-.patch b/pkg/qbe/patch/0003-fold-don-t-fold-invalid-addition-subtraction-rather-.patch
@@ -1,66 +0,0 @@
-From b3c8dfafafd7e749a12227c951f3faebc2572710 Mon Sep 17 00:00:00 2001
-From: Michael Forney <mforney@mforney.org>
-Date: Sun, 16 Jun 2019 01:38:27 -0700
-Subject: [PATCH] fold: don't fold invalid addition/subtraction rather than
- failing
-
-This may happen in a branch QBE doesn't realize is unreachable,
-for example (simplified from real code found in ncurses)
-
- data $str = { b "abcdef", b 0 }
- function l $f(w %x) {
- @start
- %.1 =w ceqw %x, 0
- jnz %.1, @logic_join, @logic_right
- @logic_right
- %p =l call $strchr(l $str, w %x)
- %.2 =w ceql %p, 0
- @logic_join
- %.3 =w phi @start %.1, @logic_right %.2
- jnz %.3, @fail, @return
- @fail
- ret 0
- @return
- %.4 =l sub %p, $str
- ret %.4
- }
----
- fold.c | 11 ++++-------
- 1 file changed, 4 insertions(+), 7 deletions(-)
-
-diff --git a/fold.c b/fold.c
-index 2081a72..50a862e 100644
---- a/fold.c
-+++ b/fold.c
-@@ -343,7 +343,7 @@ foldint(Con *res, int op, int w, Con *cl, Con *cr)
- if (op == Oadd) {
- if (cl->type == CAddr) {
- if (cr->type == CAddr)
-- err("undefined addition (addr + addr)");
-+ return 1;
- lab = cl->label;
- typ = CAddr;
- }
-@@ -358,16 +358,13 @@ foldint(Con *res, int op, int w, Con *cl, Con *cr)
- lab = cl->label;
- typ = CAddr;
- } else if (cl->label != cr->label)
-- err("undefined substraction (addr1 - addr2)");
-+ return 1;
- }
- else if (cr->type == CAddr)
-- err("undefined substraction (num - addr)");
-- }
-- else if (cl->type == CAddr || cr->type == CAddr) {
-- if (Ocmpl <= op && op <= Ocmpl1)
- return 1;
-- err("invalid address operand for '%s'", optab[op].name);
- }
-+ else if (cl->type == CAddr || cr->type == CAddr)
-+ return 1;
- switch (op) {
- case Oadd: x = l.u + r.u; break;
- case Osub: x = l.u - r.u; break;
---
-2.32.0
-
diff --git a/pkg/qbe/patch/0004-gas-put-zero-data-into-.bss.patch b/pkg/qbe/patch/0004-gas-put-zero-data-into-.bss.patch
@@ -1,102 +0,0 @@
-From e81a67355f1a53739cbfd9797ac9d687efff05e8 Mon Sep 17 00:00:00 2001
-From: Michael Forney <mforney@mforney.org>
-Date: Tue, 28 Sep 2021 11:25:05 -0700
-Subject: [PATCH] gas: put zero data into .bss
-
----
- gas.c | 56 ++++++++++++++++++++++++++++++++++++++++----------------
- 1 file changed, 40 insertions(+), 16 deletions(-)
-
-diff --git a/gas.c b/gas.c
-index 8c31794..ce082dc 100644
---- a/gas.c
-+++ b/gas.c
-@@ -3,10 +3,26 @@
-
- char *gasloc, *gassym;
-
-+static void
-+startdat(FILE *f, char *section, char *name, int align, int export, int zero)
-+{
-+ char *p;
-+
-+ if (section)
-+ fprintf(f, ".section %s\n", section);
-+ else
-+ fprintf(f, "%s\n", zero ? ".bss" : ".data");
-+ fprintf(f, ".balign %d\n", align);
-+ p = name[0] == '"' ? "" : gassym;
-+ if (export)
-+ fprintf(f, ".globl %s%s\n", p, name);
-+ fprintf(f, "%s%s:\n", p, name);
-+}
-+
- void
- gasemitdat(Dat *d, FILE *f)
- {
-- static int aligned;
-+ static int aligned, export;
- static char *dtoa[] = {
- [DAlign] = ".balign",
- [DB] = "\t.byte",
-@@ -14,34 +30,42 @@ gasemitdat(Dat *d, FILE *f)
- [DW] = "\t.int",
- [DL] = "\t.quad"
- };
-+ static char *name, *section;
-+ static int64_t zero;
- char *p;
-
- switch (d->type) {
- case DStart:
- aligned = 0;
-- if (d->u.str) {
-- fprintf(f, ".section %s\n", d->u.str);
-- } else {
-- fprintf(f, ".data\n");
-- }
-+ zero = 0;
-+ section = d->u.str;
- break;
- case DEnd:
-+ if (zero != -1) {
-+ startdat(f, section, name, aligned, export, 1);
-+ fprintf(f, "\t.fill %"PRId64",1,0\n", zero);
-+ }
-+ break;
-+ case DAlign:
-+ aligned = d->u.num;
- break;
- case DName:
-- if (!aligned)
-- fprintf(f, ".balign 8\n");
-- p = d->u.str[0] == '"' ? "" : gassym;
-- if (d->export)
-- fprintf(f, ".globl %s%s\n", p, d->u.str);
-- fprintf(f, "%s%s:\n", p, d->u.str);
-+ name = d->u.str;
-+ export = d->export;
- break;
- case DZ:
-- fprintf(f, "\t.fill %"PRId64",1,0\n", d->u.num);
-+ if (zero != -1)
-+ zero += d->u.num;
-+ else
-+ fprintf(f, "\t.fill %"PRId64",1,0\n", d->u.num);
- break;
- default:
-- if (d->type == DAlign)
-- aligned = 1;
--
-+ if (zero != -1) {
-+ startdat(f, section, name, aligned, export, 0);
-+ if (zero > 0)
-+ fprintf(f, "\t.fill %"PRId64",1,0\n", zero);
-+ zero = -1;
-+ }
- if (d->isstr) {
- if (d->type != DB)
- err("strings only supported for 'b' currently");
---
-2.32.0
-
diff --git a/pkg/qbe/patch/0005-amd64-optimize-loading-0-into-registers.patch b/pkg/qbe/patch/0005-amd64-optimize-loading-0-into-registers.patch
@@ -1,83 +0,0 @@
-From a11da13e22a694f8fe4a81d894d433f50ce4af6b Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?=C3=89rico=20Nogueira?= <erico.erc@gmail.com>
-Date: Sun, 11 Jul 2021 19:19:12 -0300
-Subject: [PATCH] amd64: optimize loading 0 into registers
-
-Loading +0 into a floating point register can be done using pxor or
-xorps instructions. Per [1], we went with pxor because it can run on all
-vector ALU ports, even if it's one byte longer.
-
-Similarly, an integer register can be zeroed with xor, which has a
-smaller encoding than mov with 0 immediate.
-
-To implement this, we special case fixarg to allow Ocopy when the
-value is +0 for floating point, and change emitins to emit pxor/xor
-when it encounters a copy from 0.
-
-Co-authored-by: Michael Forney <mforney@mforney.org>
-
-[1] https://stackoverflow.com/questions/39811577/does-using-mix-of-pxor-and-xorps-affect-performance/39828976
----
- amd64/emit.c | 12 ++++++++++++
- amd64/isel.c | 12 +++++++-----
- 2 files changed, 19 insertions(+), 5 deletions(-)
-
-diff --git a/amd64/emit.c b/amd64/emit.c
-index a888000..7aeeff5 100644
---- a/amd64/emit.c
-+++ b/amd64/emit.c
-@@ -443,6 +443,18 @@ emitins(Ins i, Fn *fn, FILE *f)
- if (req(i.to, i.arg[0]))
- break;
- t0 = rtype(i.arg[0]);
-+ if (t0 == RCon
-+ && fn->con[i.arg[0].val].type == CBits
-+ && fn->con[i.arg[0].val].bits.i == 0) {
-+ if (isreg(i.to)) {
-+ if (KBASE(i.cls) == 0)
-+ emitf("xor%k %=, %=", &i, fn, f);
-+ else
-+ emitf("pxor %D=, %D=", &i, fn, f);
-+ break;
-+ }
-+ i.cls = KWIDE(i.cls) ? Kl : Kw;
-+ }
- if (i.cls == Kl
- && t0 == RCon
- && fn->con[i.arg[0].val].type == CBits) {
-diff --git a/amd64/isel.c b/amd64/isel.c
-index 607c176..1c902f5 100644
---- a/amd64/isel.c
-+++ b/amd64/isel.c
-@@ -69,7 +69,7 @@ fixarg(Ref *r, int k, Ins *i, Fn *fn)
- r1 = r0 = *r;
- s = rslot(r0, fn);
- op = i ? i->op : Ocopy;
-- if (KBASE(k) == 1 && rtype(r0) == RCon) {
-+ if (KBASE(k) == 1 && rtype(r0) == RCon && fn->con[r0.val].bits.i != 0) {
- /* load floating points from memory
- * slots, they can't be used as
- * immediates
-@@ -84,13 +84,15 @@ fixarg(Ref *r, int k, Ins *i, Fn *fn)
- a.offset.label = intern(buf);
- fn->mem[fn->nmem-1] = a;
- }
-- else if (op != Ocopy && k == Kl && noimm(r0, fn)) {
-+ else if (op != Ocopy && ((k == Kl && noimm(r0, fn)) || (KBASE(k) == 1 && rtype(r0) == RCon))) {
- /* load constants that do not fit in
- * a 32bit signed integer into a
-- * long temporary
-+ * long temporary OR
-+ * load positive zero into a floating
-+ * point register
- */
-- r1 = newtmp("isel", Kl, fn);
-- emit(Ocopy, Kl, r1, r0, R);
-+ r1 = newtmp("isel", k, fn);
-+ emit(Ocopy, k, r1, r0, R);
- }
- else if (s != -1) {
- /* load fast locals' addresses into
---
-2.32.0
-
diff --git a/pkg/qbe/ver b/pkg/qbe/ver
@@ -1 +1 @@
-900805a8fe r0
+2ca6fb25a2 r0