From 5437a02abc9fe106054965828787e8f232692935 Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@suse.com>
Date: Fri, 3 Jan 2020 10:16:44 +0100
Subject: [PATCH] Arm64: correct address index operands for LD1RO{H,W,D}

Just like their LD1RQ{H,W,D} counterparts, as per the specification the
index registers get scaled by element size.
---
 gas/ChangeLog                     |  5 +++++
 gas/testsuite/gas/aarch64/f64mm.d | 24 ++++++++++++------------
 gas/testsuite/gas/aarch64/f64mm.s | 24 ++++++++++++------------
 opcodes/ChangeLog                 | 11 ++++++++---
 opcodes/aarch64-tbl.h             |  8 ++++----
 5 files changed, 41 insertions(+), 31 deletions(-)

diff --git a/gas/ChangeLog b/gas/ChangeLog
index 4a8bb549b34..41959ae7e73 100644
--- a/gas/ChangeLog
+++ b/gas/ChangeLog
@@ -1,3 +1,8 @@
+2020-01-03  Jan Beulich  <jbeulich@suse.com>
+
+	* testsuite/gas/aarch64/f64mm.s: Scale index of LD1RO{H,W,D}.
+	* testsuite/gas/aarch64/f64mm.d: Adjust expectations.
+
 2020-01-03  Jan Beulich  <jbeulich@suse.com>
 
 	* testsuite/gas/aarch64/i8mm.s: Add 128-bit form tests for
diff --git a/gas/testsuite/gas/aarch64/f64mm.d b/gas/testsuite/gas/aarch64/f64mm.d
index e9ec69440ab..35c0853b72c 100644
--- a/gas/testsuite/gas/aarch64/f64mm.d
+++ b/gas/testsuite/gas/aarch64/f64mm.d
@@ -10,20 +10,20 @@ Disassembly of section \.text:
  *[0-9a-f]+:	64e0e400 	fmmla	z0\.d, z0\.d, z0\.d
  *[0-9a-f]+:	a43b17f1 	ld1rob	{z17\.b}, p5/z, \[sp, x27\]
  *[0-9a-f]+:	a42003e0 	ld1rob	{z0\.b}, p0/z, \[sp, x0\]
- *[0-9a-f]+:	a4bb17f1 	ld1roh	{z17\.h}, p5/z, \[sp, x27\]
- *[0-9a-f]+:	a4a003e0 	ld1roh	{z0\.h}, p0/z, \[sp, x0\]
- *[0-9a-f]+:	a53b17f1 	ld1row	{z17\.s}, p5/z, \[sp, x27\]
- *[0-9a-f]+:	a52003e0 	ld1row	{z0\.s}, p0/z, \[sp, x0\]
- *[0-9a-f]+:	a5bb17f1 	ld1rod	{z17\.d}, p5/z, \[sp, x27\]
- *[0-9a-f]+:	a5a003e0 	ld1rod	{z0\.d}, p0/z, \[sp, x0\]
+ *[0-9a-f]+:	a4bb17f1 	ld1roh	{z17\.h}, p5/z, \[sp, x27, lsl #1\]
+ *[0-9a-f]+:	a4a003e0 	ld1roh	{z0\.h}, p0/z, \[sp, x0, lsl #1\]
+ *[0-9a-f]+:	a53b17f1 	ld1row	{z17\.s}, p5/z, \[sp, x27, lsl #2\]
+ *[0-9a-f]+:	a52003e0 	ld1row	{z0\.s}, p0/z, \[sp, x0, lsl #2\]
+ *[0-9a-f]+:	a5bb17f1 	ld1rod	{z17\.d}, p5/z, \[sp, x27, lsl #3\]
+ *[0-9a-f]+:	a5a003e0 	ld1rod	{z0\.d}, p0/z, \[sp, x0, lsl #3\]
  *[0-9a-f]+:	a43b1411 	ld1rob	{z17\.b}, p5/z, \[x0, x27\]
  *[0-9a-f]+:	a4200000 	ld1rob	{z0\.b}, p0/z, \[x0, x0\]
- *[0-9a-f]+:	a4bb1411 	ld1roh	{z17\.h}, p5/z, \[x0, x27\]
- *[0-9a-f]+:	a4a00000 	ld1roh	{z0\.h}, p0/z, \[x0, x0\]
- *[0-9a-f]+:	a53b1411 	ld1row	{z17\.s}, p5/z, \[x0, x27\]
- *[0-9a-f]+:	a5200000 	ld1row	{z0\.s}, p0/z, \[x0, x0\]
- *[0-9a-f]+:	a5bb1411 	ld1rod	{z17\.d}, p5/z, \[x0, x27\]
- *[0-9a-f]+:	a5a00000 	ld1rod	{z0\.d}, p0/z, \[x0, x0\]
+ *[0-9a-f]+:	a4bb1411 	ld1roh	{z17\.h}, p5/z, \[x0, x27, lsl #1\]
+ *[0-9a-f]+:	a4a00000 	ld1roh	{z0\.h}, p0/z, \[x0, x0, lsl #1\]
+ *[0-9a-f]+:	a53b1411 	ld1row	{z17\.s}, p5/z, \[x0, x27, lsl #2\]
+ *[0-9a-f]+:	a5200000 	ld1row	{z0\.s}, p0/z, \[x0, x0, lsl #2\]
+ *[0-9a-f]+:	a5bb1411 	ld1rod	{z17\.d}, p5/z, \[x0, x27, lsl #3\]
+ *[0-9a-f]+:	a5a00000 	ld1rod	{z0\.d}, p0/z, \[x0, x0, lsl #3\]
  *[0-9a-f]+:	a42037f1 	ld1rob	{z17\.b}, p5/z, \[sp\]
  *[0-9a-f]+:	a42723e0 	ld1rob	{z0\.b}, p0/z, \[sp, #224\]
  *[0-9a-f]+:	a42823e0 	ld1rob	{z0\.b}, p0/z, \[sp, #-256\]
diff --git a/gas/testsuite/gas/aarch64/f64mm.s b/gas/testsuite/gas/aarch64/f64mm.s
index cfe6b176d24..a58b3e9e955 100644
--- a/gas/testsuite/gas/aarch64/f64mm.s
+++ b/gas/testsuite/gas/aarch64/f64mm.s
@@ -13,21 +13,21 @@ fmmla	z0.d,  z0.d,  z0.d
 
 ld1rob { z17.b }, p5/z, [sp, x27]
 ld1rob { z0.b }, p0/z, [sp, x0]
-ld1roh { z17.h }, p5/z, [sp, x27]
-ld1roh { z0.h }, p0/z, [sp, x0]
-ld1row { z17.s }, p5/z, [sp, x27]
-ld1row { z0.s }, p0/z, [sp, x0]
-ld1rod { z17.d }, p5/z, [sp, x27]
-ld1rod { z0.d }, p0/z, [sp, x0]
+ld1roh { z17.h }, p5/z, [sp, x27, lsl #1]
+ld1roh { z0.h }, p0/z, [sp, x0, lsl #1]
+ld1row { z17.s }, p5/z, [sp, x27, lsl #2]
+ld1row { z0.s }, p0/z, [sp, x0, lsl #2]
+ld1rod { z17.d }, p5/z, [sp, x27, lsl #3]
+ld1rod { z0.d }, p0/z, [sp, x0, lsl #3]
 
 ld1rob { z17.b }, p5/z, [x0, x27]
 ld1rob { z0.b }, p0/z, [x0, x0]
-ld1roh { z17.h }, p5/z, [x0, x27]
-ld1roh { z0.h }, p0/z, [x0, x0]
-ld1row { z17.s }, p5/z, [x0, x27]
-ld1row { z0.s }, p0/z, [x0, x0]
-ld1rod { z17.d }, p5/z, [x0, x27]
-ld1rod { z0.d }, p0/z, [x0, x0]
+ld1roh { z17.h }, p5/z, [x0, x27, lsl #1]
+ld1roh { z0.h }, p0/z, [x0, x0, lsl #1]
+ld1row { z17.s }, p5/z, [x0, x27, lsl #2]
+ld1row { z0.s }, p0/z, [x0, x0, lsl #2]
+ld1rod { z17.d }, p5/z, [x0, x27, lsl #3]
+ld1rod { z0.d }, p0/z, [x0, x0, lsl #3]
 
 ld1rob { z17.b }, p5/z, [sp, #0]
 ld1rob { z0.b }, p0/z, [sp, #224]
diff --git a/opcodes/ChangeLog b/opcodes/ChangeLog
index 19a7b3f5c54..fb9f9e0929e 100644
--- a/opcodes/ChangeLog
+++ b/opcodes/ChangeLog
@@ -1,17 +1,22 @@
 2020-01-03  Jan Beulich  <jbeulich@suse.com>
 
-	* opcodes/aarch64-tbl.h (aarch64_opcode_table): Correct SIMD
+	* aarch64-tbl.h (aarch64_opcode_table): Use
+	SVE_ADDR_RX_LSL{1,2,3} for LD1RO{H,W,D}.
+
+2020-01-03  Jan Beulich  <jbeulich@suse.com>
+
+	* aarch64-tbl.h (aarch64_opcode_table): Correct SIMD
 	forms of SUDOT and USDOT.
 
 2020-01-03  Jan Beulich  <jbeulich@suse.com>
 
-	* opcodes/aarch64-tbl.h (aarch64_opcode_table): Drop 'i' from
+	* aarch64-tbl.h (aarch64_opcode_table): Drop 'i' from
 	uzip{1,2}.
 	* opcodes/aarch64-dis-2.c: Re-generate.
 
 2020-01-03  Jan Beulich  <jbeulich@suse.com>
 
-	* opcodes/aarch64-tbl.h (aarch64_opcode_table): Correct 64-bit
+	* aarch64-tbl.h (aarch64_opcode_table): Correct 64-bit
 	FMMLA encoding.
 	* opcodes/aarch64-dis-2.c: Re-generate.
 
diff --git a/opcodes/aarch64-tbl.h b/opcodes/aarch64-tbl.h
index 2655ca5d56f..48872e4c373 100644
--- a/opcodes/aarch64-tbl.h
+++ b/opcodes/aarch64-tbl.h
@@ -5074,10 +5074,10 @@ struct aarch64_opcode aarch64_opcode_table[] =
   INT8MATMUL_SVE_INSNC ("sudot",  0x44a01c00, 0xffe0fc00, sve_misc, OP3 (SVE_Zd, SVE_Zn, SVE_Zm3_INDEX), OP_SVE_SBB, 0, C_SCAN_MOVPRFX, 0),
   F32MATMUL_SVE_INSNC ("fmmla",   0x64a0e400, 0xffe0fc00, sve_misc, OP3 (SVE_Zd, SVE_Zn, SVE_Zm_16), OP_SVE_VVV_S, 0, C_SCAN_MOVPRFX, 0),
   F64MATMUL_SVE_INSNC ("fmmla",   0x64e0e400, 0xffe0fc00, sve_misc, OP3 (SVE_Zd, SVE_Zn, SVE_Zm_16), OP_SVE_VVV_D, 0, C_SCAN_MOVPRFX, 0),
-  F64MATMUL_SVE_INSN ("ld1rob",  0xa4200000, 0xffe0e000, sve_misc, OP3 (SVE_ZtxN, SVE_Pg3, SVE_ADDR_RX),  OP_SVE_BZU, F_OD(1), 0),
-  F64MATMUL_SVE_INSN ("ld1roh",  0xa4a00000, 0xffe0e000, sve_misc, OP3 (SVE_ZtxN, SVE_Pg3, SVE_ADDR_RX),  OP_SVE_HZU, F_OD(1), 0),
-  F64MATMUL_SVE_INSN ("ld1row",  0xa5200000, 0xffe0e000, sve_misc, OP3 (SVE_ZtxN, SVE_Pg3, SVE_ADDR_RX),  OP_SVE_SZU, F_OD(1), 0),
-  F64MATMUL_SVE_INSN ("ld1rod",  0xa5a00000, 0xffe0e000, sve_misc, OP3 (SVE_ZtxN, SVE_Pg3, SVE_ADDR_RX),  OP_SVE_DZU, F_OD(1), 0),
+  F64MATMUL_SVE_INSN ("ld1rob",  0xa4200000, 0xffe0e000, sve_misc, OP3 (SVE_ZtxN, SVE_Pg3, SVE_ADDR_RX), OP_SVE_BZU, F_OD(1), 0),
+  F64MATMUL_SVE_INSN ("ld1roh",  0xa4a00000, 0xffe0e000, sve_misc, OP3 (SVE_ZtxN, SVE_Pg3, SVE_ADDR_RX_LSL1), OP_SVE_HZU, F_OD(1), 0),
+  F64MATMUL_SVE_INSN ("ld1row",  0xa5200000, 0xffe0e000, sve_misc, OP3 (SVE_ZtxN, SVE_Pg3, SVE_ADDR_RX_LSL2), OP_SVE_SZU, F_OD(1), 0),
+  F64MATMUL_SVE_INSN ("ld1rod",  0xa5a00000, 0xffe0e000, sve_misc, OP3 (SVE_ZtxN, SVE_Pg3, SVE_ADDR_RX_LSL3), OP_SVE_DZU, F_OD(1), 0),
   F64MATMUL_SVE_INSN ("ld1rob",  0xa4202000, 0xfff0e000, sve_misc, OP3 (SVE_ZtxN, SVE_Pg3, SVE_ADDR_RI_S4x32), OP_SVE_BZU, F_OD(1), 0),
   F64MATMUL_SVE_INSN ("ld1roh",  0xa4a02000, 0xfff0e000, sve_misc, OP3 (SVE_ZtxN, SVE_Pg3, SVE_ADDR_RI_S4x32), OP_SVE_HZU, F_OD(1), 0),
   F64MATMUL_SVE_INSN ("ld1row",  0xa5202000, 0xfff0e000, sve_misc, OP3 (SVE_ZtxN, SVE_Pg3, SVE_ADDR_RI_S4x32), OP_SVE_SZU, F_OD(1), 0),
-- 
GitLab