-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[AMDGPU][True16][CodeGen] update more GFX11Plus codegen test with true16 mode #138600
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AMDGPU][True16][CodeGen] update more GFX11Plus codegen test with true16 mode #138600
Conversation
@llvm/pr-subscribers-backend-amdgpu Author: Brox Chen (broxigarchen) ChangesThis is a NFC patch. This patch duplicate GFX11plus runlines and apply them with "+mattr=+real-true16" and "+mattr=-real-true16" on more gfx11/gfx12 test. And then update the test with the update script Patch is 6.60 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/138600.diff 27 Files Affected:
diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.576bit.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.576bit.ll
index 1ef7d358d8cae..8ae7b58330256 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.576bit.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.576bit.ll
@@ -3,7 +3,8 @@
; RUN: llc -mtriple=amdgcn < %s | FileCheck -check-prefix=GCN %s
; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=VI %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-TRUE16 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 %s
define <18 x float> @bitcast_v18i32_to_v18f32(<18 x i32> %a, i32 %b) {
; GCN-LABEL: bitcast_v18i32_to_v18f32:
@@ -1227,113 +1228,145 @@ define <36 x i16> @bitcast_v18i32_to_v36i16(<18 x i32> %a, i32 %b) {
; GFX9-NEXT: v_perm_b32 v17, v18, v17, s4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-LABEL: bitcast_v18i32_to_v36i16:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v18
-; GFX11-NEXT: ; implicit-def: $vgpr35
-; GFX11-NEXT: ; implicit-def: $vgpr34
-; GFX11-NEXT: ; implicit-def: $vgpr33
-; GFX11-NEXT: ; implicit-def: $vgpr32
-; GFX11-NEXT: ; implicit-def: $vgpr31
-; GFX11-NEXT: ; implicit-def: $vgpr30
-; GFX11-NEXT: ; implicit-def: $vgpr29
-; GFX11-NEXT: ; implicit-def: $vgpr28
-; GFX11-NEXT: ; implicit-def: $vgpr27
-; GFX11-NEXT: ; implicit-def: $vgpr26
-; GFX11-NEXT: ; implicit-def: $vgpr25
-; GFX11-NEXT: ; implicit-def: $vgpr24
-; GFX11-NEXT: ; implicit-def: $vgpr23
-; GFX11-NEXT: ; implicit-def: $vgpr22
-; GFX11-NEXT: ; implicit-def: $vgpr21
-; GFX11-NEXT: ; implicit-def: $vgpr20
-; GFX11-NEXT: ; implicit-def: $vgpr19
-; GFX11-NEXT: ; implicit-def: $vgpr18
-; GFX11-NEXT: s_and_saveexec_b32 s0, vcc_lo
-; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX11-NEXT: s_xor_b32 s0, exec_lo, s0
-; GFX11-NEXT: s_cbranch_execz .LBB6_2
-; GFX11-NEXT: ; %bb.1: ; %cmp.false
-; GFX11-NEXT: v_lshrrev_b32_e32 v18, 16, v17
-; GFX11-NEXT: v_lshrrev_b32_e32 v19, 16, v16
-; GFX11-NEXT: v_lshrrev_b32_e32 v20, 16, v15
-; GFX11-NEXT: v_lshrrev_b32_e32 v21, 16, v14
-; GFX11-NEXT: v_lshrrev_b32_e32 v22, 16, v13
-; GFX11-NEXT: v_lshrrev_b32_e32 v23, 16, v12
-; GFX11-NEXT: v_lshrrev_b32_e32 v24, 16, v11
-; GFX11-NEXT: v_lshrrev_b32_e32 v25, 16, v10
-; GFX11-NEXT: v_lshrrev_b32_e32 v26, 16, v9
-; GFX11-NEXT: v_lshrrev_b32_e32 v27, 16, v8
-; GFX11-NEXT: v_lshrrev_b32_e32 v28, 16, v7
-; GFX11-NEXT: v_lshrrev_b32_e32 v29, 16, v6
-; GFX11-NEXT: v_lshrrev_b32_e32 v30, 16, v5
-; GFX11-NEXT: v_lshrrev_b32_e32 v31, 16, v4
-; GFX11-NEXT: v_lshrrev_b32_e32 v32, 16, v3
-; GFX11-NEXT: v_lshrrev_b32_e32 v33, 16, v2
-; GFX11-NEXT: v_lshrrev_b32_e32 v34, 16, v1
-; GFX11-NEXT: v_lshrrev_b32_e32 v35, 16, v0
-; GFX11-NEXT: .LBB6_2: ; %Flow
-; GFX11-NEXT: s_and_not1_saveexec_b32 s0, s0
-; GFX11-NEXT: s_cbranch_execz .LBB6_4
-; GFX11-NEXT: ; %bb.3: ; %cmp.true
-; GFX11-NEXT: v_add_nc_u32_e32 v17, 3, v17
-; GFX11-NEXT: v_add_nc_u32_e32 v16, 3, v16
-; GFX11-NEXT: v_add_nc_u32_e32 v15, 3, v15
-; GFX11-NEXT: v_add_nc_u32_e32 v14, 3, v14
-; GFX11-NEXT: v_add_nc_u32_e32 v13, 3, v13
-; GFX11-NEXT: v_add_nc_u32_e32 v12, 3, v12
-; GFX11-NEXT: v_add_nc_u32_e32 v11, 3, v11
-; GFX11-NEXT: v_add_nc_u32_e32 v10, 3, v10
-; GFX11-NEXT: v_add_nc_u32_e32 v9, 3, v9
-; GFX11-NEXT: v_add_nc_u32_e32 v8, 3, v8
-; GFX11-NEXT: v_add_nc_u32_e32 v7, 3, v7
-; GFX11-NEXT: v_add_nc_u32_e32 v6, 3, v6
-; GFX11-NEXT: v_add_nc_u32_e32 v5, 3, v5
-; GFX11-NEXT: v_add_nc_u32_e32 v4, 3, v4
-; GFX11-NEXT: v_add_nc_u32_e32 v3, 3, v3
-; GFX11-NEXT: v_add_nc_u32_e32 v2, 3, v2
-; GFX11-NEXT: v_add_nc_u32_e32 v1, 3, v1
-; GFX11-NEXT: v_add_nc_u32_e32 v0, 3, v0
-; GFX11-NEXT: v_lshrrev_b32_e32 v18, 16, v17
-; GFX11-NEXT: v_lshrrev_b32_e32 v19, 16, v16
-; GFX11-NEXT: v_lshrrev_b32_e32 v20, 16, v15
-; GFX11-NEXT: v_lshrrev_b32_e32 v21, 16, v14
-; GFX11-NEXT: v_lshrrev_b32_e32 v22, 16, v13
-; GFX11-NEXT: v_lshrrev_b32_e32 v23, 16, v12
-; GFX11-NEXT: v_lshrrev_b32_e32 v24, 16, v11
-; GFX11-NEXT: v_lshrrev_b32_e32 v25, 16, v10
-; GFX11-NEXT: v_lshrrev_b32_e32 v26, 16, v9
-; GFX11-NEXT: v_lshrrev_b32_e32 v27, 16, v8
-; GFX11-NEXT: v_lshrrev_b32_e32 v28, 16, v7
-; GFX11-NEXT: v_lshrrev_b32_e32 v29, 16, v6
-; GFX11-NEXT: v_lshrrev_b32_e32 v30, 16, v5
-; GFX11-NEXT: v_lshrrev_b32_e32 v31, 16, v4
-; GFX11-NEXT: v_lshrrev_b32_e32 v32, 16, v3
-; GFX11-NEXT: v_lshrrev_b32_e32 v33, 16, v2
-; GFX11-NEXT: v_lshrrev_b32_e32 v34, 16, v1
-; GFX11-NEXT: v_lshrrev_b32_e32 v35, 16, v0
-; GFX11-NEXT: .LBB6_4: ; %end
-; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT: v_perm_b32 v0, v35, v0, 0x5040100
-; GFX11-NEXT: v_perm_b32 v1, v34, v1, 0x5040100
-; GFX11-NEXT: v_perm_b32 v2, v33, v2, 0x5040100
-; GFX11-NEXT: v_perm_b32 v3, v32, v3, 0x5040100
-; GFX11-NEXT: v_perm_b32 v4, v31, v4, 0x5040100
-; GFX11-NEXT: v_perm_b32 v5, v30, v5, 0x5040100
-; GFX11-NEXT: v_perm_b32 v6, v29, v6, 0x5040100
-; GFX11-NEXT: v_perm_b32 v7, v28, v7, 0x5040100
-; GFX11-NEXT: v_perm_b32 v8, v27, v8, 0x5040100
-; GFX11-NEXT: v_perm_b32 v9, v26, v9, 0x5040100
-; GFX11-NEXT: v_perm_b32 v10, v25, v10, 0x5040100
-; GFX11-NEXT: v_perm_b32 v11, v24, v11, 0x5040100
-; GFX11-NEXT: v_perm_b32 v12, v23, v12, 0x5040100
-; GFX11-NEXT: v_perm_b32 v13, v22, v13, 0x5040100
-; GFX11-NEXT: v_perm_b32 v14, v21, v14, 0x5040100
-; GFX11-NEXT: v_perm_b32 v15, v20, v15, 0x5040100
-; GFX11-NEXT: v_perm_b32 v16, v19, v16, 0x5040100
-; GFX11-NEXT: v_perm_b32 v17, v18, v17, 0x5040100
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: bitcast_v18i32_to_v36i16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: s_mov_b32 s0, exec_lo
+; GFX11-TRUE16-NEXT: v_cmpx_ne_u32_e32 0, v18
+; GFX11-TRUE16-NEXT: s_xor_b32 s0, exec_lo, s0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-TRUE16-NEXT: s_and_not1_saveexec_b32 s0, s0
+; GFX11-TRUE16-NEXT: s_cbranch_execz .LBB6_2
+; GFX11-TRUE16-NEXT: ; %bb.1: ; %cmp.true
+; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v17, 3, v17
+; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v16, 3, v16
+; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v15, 3, v15
+; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v14, 3, v14
+; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v13, 3, v13
+; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v12, 3, v12
+; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v11, 3, v11
+; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v10, 3, v10
+; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v9, 3, v9
+; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v8, 3, v8
+; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v7, 3, v7
+; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v6, 3, v6
+; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v5, 3, v5
+; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, 3, v4
+; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 3, v3
+; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 3, v2
+; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 3, v1
+; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 3, v0
+; GFX11-TRUE16-NEXT: .LBB6_2: ; %end
+; GFX11-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: bitcast_v18i32_to_v36i16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v18
+; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr35
+; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr34
+; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr33
+; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr32
+; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr31
+; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr30
+; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr29
+; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr28
+; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr27
+; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr26
+; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr25
+; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr24
+; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr23
+; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr22
+; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr21
+; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr20
+; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr19
+; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr18
+; GFX11-FAKE16-NEXT: s_and_saveexec_b32 s0, vcc_lo
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-FAKE16-NEXT: s_xor_b32 s0, exec_lo, s0
+; GFX11-FAKE16-NEXT: s_cbranch_execz .LBB6_2
+; GFX11-FAKE16-NEXT: ; %bb.1: ; %cmp.false
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v18, 16, v17
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v19, 16, v16
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v20, 16, v15
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v21, 16, v14
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v22, 16, v13
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v23, 16, v12
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v24, 16, v11
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v25, 16, v10
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v26, 16, v9
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v27, 16, v8
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v28, 16, v7
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v29, 16, v6
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v30, 16, v5
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v31, 16, v4
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v32, 16, v3
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v33, 16, v2
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v34, 16, v1
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v35, 16, v0
+; GFX11-FAKE16-NEXT: .LBB6_2: ; %Flow
+; GFX11-FAKE16-NEXT: s_and_not1_saveexec_b32 s0, s0
+; GFX11-FAKE16-NEXT: s_cbranch_execz .LBB6_4
+; GFX11-FAKE16-NEXT: ; %bb.3: ; %cmp.true
+; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v17, 3, v17
+; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v16, 3, v16
+; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v15, 3, v15
+; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v14, 3, v14
+; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v13, 3, v13
+; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v12, 3, v12
+; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v11, 3, v11
+; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v10, 3, v10
+; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v9, 3, v9
+; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v8, 3, v8
+; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v7, 3, v7
+; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v6, 3, v6
+; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v5, 3, v5
+; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, 3, v4
+; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 3, v3
+; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 3, v2
+; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, 3, v1
+; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 3, v0
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v18, 16, v17
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v19, 16, v16
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v20, 16, v15
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v21, 16, v14
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v22, 16, v13
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v23, 16, v12
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v24, 16, v11
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v25, 16, v10
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v26, 16, v9
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v27, 16, v8
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v28, 16, v7
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v29, 16, v6
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v30, 16, v5
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v31, 16, v4
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v32, 16, v3
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v33, 16, v2
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v34, 16, v1
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v35, 16, v0
+; GFX11-FAKE16-NEXT: .LBB6_4: ; %end
+; GFX11-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v35, v0, 0x5040100
+; GFX11-FAKE16-NEXT: v_perm_b32 v1, v34, v1, 0x5040100
+; GFX11-FAKE16-NEXT: v_perm_b32 v2, v33, v2, 0x5040100
+; GFX11-FAKE16-NEXT: v_perm_b32 v3, v32, v3, 0x5040100
+; GFX11-FAKE16-NEXT: v_perm_b32 v4, v31, v4, 0x5040100
+; GFX11-FAKE16-NEXT: v_perm_b32 v5, v30, v5, 0x5040100
+; GFX11-FAKE16-NEXT: v_perm_b32 v6, v29, v6, 0x5040100
+; GFX11-FAKE16-NEXT: v_perm_b32 v7, v28, v7, 0x5040100
+; GFX11-FAKE16-NEXT: v_perm_b32 v8, v27, v8, 0x5040100
+; GFX11-FAKE16-NEXT: v_perm_b32 v9, v26, v9, 0x5040100
+; GFX11-FAKE16-NEXT: v_perm_b32 v10, v25, v10, 0x5040100
+; GFX11-FAKE16-NEXT: v_perm_b32 v11, v24, v11, 0x5040100
+; GFX11-FAKE16-NEXT: v_perm_b32 v12, v23, v12, 0x5040100
+; GFX11-FAKE16-NEXT: v_perm_b32 v13, v22, v13, 0x5040100
+; GFX11-FAKE16-NEXT: v_perm_b32 v14, v21, v14, 0x5040100
+; GFX11-FAKE16-NEXT: v_perm_b32 v15, v20, v15, 0x5040100
+; GFX11-FAKE16-NEXT: v_perm_b32 v16, v19, v16, 0x5040100
+; GFX11-FAKE16-NEXT: v_perm_b32 v17, v18, v17, 0x5040100
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq i32 %b, 0
br i1 %cmp, label %cmp.true, label %cmp.false
@@ -1963,73 +1996,105 @@ define <18 x i32> @bitcast_v36i16_to_v18i32(<36 x i16> %a, i32 %b) {
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-LABEL: bitcast_v36i16_to_v18i32:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_lshrrev_b32_e32 v19, 16, v17
-; GFX11-NEXT: v_lshrrev_b32_e32 v20, 16, v16
-; GFX11-NEXT: v_lshrrev_b32_e32 v21, 16, v15
-; GFX11-NEXT: v_lshrrev_b32_e32 v22, 16, v14
-; GFX11-NEXT: v_lshrrev_b32_e32 v23, 16, v13
-; GFX11-NEXT: v_lshrrev_b32_e32 v24, 16, v12
-; GFX11-NEXT: v_lshrrev_b32_e32 v25, 16, v11
-; GFX11-NEXT: v_lshrrev_b32_e32 v26, 16, v10
-; GFX11-NEXT: v_lshrrev_b32_e32 v27, 16, v9
-; GFX11-NEXT: v_lshrrev_b32_e32 v28, 16, v8
-; GFX11-NEXT: v_lshrrev_b32_e32 v29, 16, v7
-; GFX11-NEXT: v_lshrrev_b32_e32 v30, 16, v6
-; GFX11-NEXT: v_lshrrev_b32_e32 v31, 16, v5
-; GFX11-NEXT: v_lshrrev_b32_e32 v32, 16, v4
-; GFX11-NEXT: v_lshrrev_b32_e32 v33, 16, v0
-; GFX11-NEXT: v_lshrrev_b32_e32 v34, 16, v1
-; GFX11-NEXT: v_lshrrev_b32_e32 v35, 16, v2
-; GFX11-NEXT: v_lshrrev_b32_e32 v36, 16, v3
-; GFX11-NEXT: v_perm_b32 v4, v32, v4, 0x5040100
-; GFX11-NEXT: v_perm_b32 v0, v33, v0, 0x5040100
-; GFX11-NEXT: v_perm_b32 v1, v34, v1, 0x5040100
-; GFX11-NEXT: v_perm_b32 v2, v35, v2, 0x5040100
-; GFX11-NEXT: v_perm_b32 v3, v36, v3, 0x5040100
-; GFX11-NEXT: v_perm_b32 v5, v31, v5, 0x5040100
-; GFX11-NEXT: v_perm_b32 v6, v30, v6, 0x5040100
-; GFX11-NEXT: v_perm_b32 v7, v29, v7, 0x5040100
-; GFX11-NEXT: v_perm_b32 v8, v28, v8, 0x5040100
-; GFX11-NEXT: v_perm_b32 v9, v27, v9, 0x5040100
-; GFX11-NEXT: v_perm_b32 v10, v26, v10, 0x5040100
-; GFX11-NEXT: v_perm_b32 v11, v25, v11, 0x5040100
-; GFX11-NEXT: v_perm_b32 v12, v24, v12, 0x5040100
-; GFX11-NEXT: v_perm_b32 v13, v23, v13, 0x5040100
-; GFX11-NEXT: v_perm_b32 v14, v22, v14, 0x5040100
-; GFX11-NEXT: v_perm_b32 v15, v21, v15, 0x5040100
-; GFX11-NEXT: v_perm_b32 v16, v20, v16, 0x5040100
-; GFX11-NEXT: v_perm_b32 v17, v19, v17, 0x5040100
-; GFX11-NEXT: s_mov_b32 s0, exec_lo
-; GFX11-NEXT: v_cmpx_ne_u32_e32 0, v18
-; GFX11-NEXT: s_xor_b32 s0, exec_lo, s0
-; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX11-NEXT: s_and_not1_saveexec_b32 s0, s0
-; GFX11-NEXT: s_cbranch_execz .LBB7_2
-; GFX11-NEXT: ; %bb.1: ; %cmp.true
-; GFX11-NEXT: v_pk_add_u16 v0, v0, 3 op_sel_hi:[1,0]
-; GFX11-NEXT: v_pk_add_u16 v1, v1, 3 op_sel_hi:[1,0]
-; GFX11-NEXT: v_pk_add_u16 v2, v2, 3 op_sel_hi:[1,0]
-; GFX11-NEXT: v_pk_add_u16 v3, v3, 3 op_sel_hi:[1,0]
-; GFX11-NEXT: v_pk_add_u16 v4, v4, 3 op_sel_hi:[1,0]
-; GFX11-NEXT: v_pk_add_u16 v5, v5, 3 op_sel_hi:[1,0]
-; GFX11-NEXT: v_pk_add_u16 v6, v6, 3 op_sel_hi:[1,0]
-; GFX11-NEXT: v_pk_add_u16 v7, v7, 3 op_sel_hi:[1,0]
-; GFX11-NEXT: v_pk_add_u16 v8, v8, 3 op_sel_hi:[1,0]
-; GFX11-NEXT: v_pk_add_u16 v9, v9, 3 op_sel_hi:[1,0]
-; GFX11-NEXT: v_pk_add_u16 v10, v10, 3 op_sel_hi:[1,0]
-; GFX11-NEXT: v_pk_add_u16 v11, v11, 3 op_sel_hi:[1,0]
-; GFX11-NEXT: v_pk_add_u16 v12, v12, 3 op_sel_hi:[1,0]
-; GFX11-NEXT: v_pk_add_u16 v13, v13, 3 op_sel_hi:[1,0]
-; GFX11-NEXT: v_pk_add_u16 v14, v14, 3 op_sel_hi:[1,0]
-; GFX11-NEXT: v_pk_add_u16 v15, v15, 3 op_sel_hi:[1,0]
-; GFX11-NEXT: v_pk_add_u16 v16, v16, 3 op_sel_hi:[1,0]
-; GFX11-NEXT: v_pk_add_u16 v17, v17, 3 op_sel_hi:[1,0]
-; GFX11-NEXT: .LBB7_2: ; %end
-; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s0
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: bitcast_v36i16_to_v18i32:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: s_mov_b32 s0, exec_lo
+; GFX11-TRUE16-NEXT: v_cmpx_ne_u32_e32 0, v18
+; GFX11-TRUE16-NEXT: s_xor_b32 s0, exec_lo, s0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-TRUE16-NEXT: s_and_not1_saveexec_b32 s0, s0
+; GFX11-TRUE16-NEXT: s_cbranch_execz .LBB7_2
+; GFX11-TRUE16-NEXT: ; %bb.1: ; %cmp.true
+; GFX11-TRUE16-NEXT: v_pk_add_u16 v0, v0, 3 op_sel_hi:[1,0]
+; GFX11-TRUE16-NEXT: v_pk_add_u16 v1, v1, 3 op_sel_hi:[1,0]
+; GFX11-TRUE16-NEXT: v_pk_add_u16 v2, v2, 3 op_sel_hi:[1,0]
+; GFX11-TRUE16-NEXT: v_pk_add_u16 v3, v3, 3 op_sel_hi:[1,0]
+; GFX11-TRUE16-NEXT: v_pk_add_u16 v4, v4, 3 op_sel_hi:[1,0]
+; GFX11-TRUE16-NEXT: v_pk_add_u16 v5, v5, 3 op_sel_hi:[1,0]
+; GFX11-TRUE16-NEXT: v_pk_add_u16 v6, v6, 3 op_sel_hi:[1,0]
+; GFX11-TRUE16-NEXT: v_pk_add_u16 v7, v7, 3 op_sel_hi:[1,0]
+; GFX11-TRUE16-NEXT: v_pk_add_u16 v8, v8, 3 op_sel_hi:[1,0]
+; GFX11-TRUE16-NEXT: v_pk_add_u16 v9, v9, 3 op_sel_hi:[1,0]
+; GFX11-TRUE16-NEXT: v_pk_add_u16 v10, v10, 3 op_sel_hi:[1,0]
+; GFX11-TRUE16-NEXT: v_pk_add_u16 v11, v11, 3 op_sel_hi:[1,0]
+; GFX11-TRUE16-NEXT: v_pk_add_u16 v12, v12, 3 op_sel_hi:[1,0]
+; GFX11-TRUE16-NEXT: v_pk_add_u16 v13, v13, 3 op_sel_hi:[1,0]
+; GFX11-TRUE16-NEXT: v_pk_add_u16 v14, v14, 3 op_sel_hi:[1,0]
+; GFX11-TRUE16-NEXT: v_pk_add_u16 v15, v15, 3 op_sel_hi:[1,0]
+; GFX11-TRUE16-NEXT: v_pk_add_u16 v16, v16, 3 op_sel_hi:[1,0]
+; GFX11-TRUE16-NEXT: v_pk_add_u16 v17, v17, 3 op_sel_hi:[1,0]
+; GFX11-TRUE16-NEXT: .LBB7_2: ; %end
+; GFX11-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: bitcast_v36i16_to_v18i32:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v19, 16, v17
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v20, 16, v16
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v21, 16, v15
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v22, 16, v14
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v23, 16, v13
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v24, 16, v12
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v25, 16, v11
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v26, 16, v10
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v27, 16, v9
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v28, 16, v8
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v29, 16, v7
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v30, 16, v6
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v31, 16, v5
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v32, 16, v4
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v33, 16, v0
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v34, 16, v1
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v35, 16, v2
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v36, 16, v3
+; GFX11-FAKE16-NEXT: v_perm_b32 v4, v32, v4, 0x5040100
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v33, v0, 0x5040100
+; GFX11-FAKE16-NEXT: v_perm_b32 v1, v34, v1, 0x5040100
+; GFX11-FAKE16-NEXT: v_perm_b32 v2, v35, v2, 0x5040100
+; GFX11-FAKE16-NEXT: v_perm_b32 v3, v36, v3, 0x50401...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
; GFX11-NEXT: v_perm_b32 v20, v23, v20, 0x5040100 | ||
; GFX11-NEXT: v_perm_b32 v21, v22, v21, 0x5040100 | ||
; GFX11-NEXT: s_setpc_b64 s[30:31] | ||
; GFX11-TRUE16-LABEL: bitcast_v22i32_to_v44i16: |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@Shoreshen Does this ISA for the True16 workflow makes sense? If so it looks like a substantial improvement.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'll merge this patch first since this is a NFC patch. But we can continue the discussion here
This is a NFC patch.
This patch duplicate GFX11plus runlines and apply them with "+mattr=+real-true16" and "+mattr=-real-true16" on more gfx11/gfx12 tests. And then update the test with the update script