blob: 587357ae6951c85745b6f405103690c72d1480de [file] [log] [blame]
River Riddle400ad6f2020-04-08 19:57:021// RUN: mlir-opt -allow-unregistered-dialect %s -split-input-file -affine-data-copy-generate="generate-dma fast-mem-space=2 skip-non-unit-stride-loops" -verify-diagnostics | FileCheck %s
2// RUN: mlir-opt -allow-unregistered-dialect %s -split-input-file -affine-data-copy-generate="generate-dma fast-mem-capacity=16 fast-mem-space=2" | FileCheck %s --check-prefix FAST-MEM-16KB
Uday Bondhugulaf97c1c52019-02-16 01:54:493
Uday Bondhugula18b8d432019-08-01 23:31:154// We run most test cases with -copy-skip-non-unit-stride-loops to allow testing
Uday Bondhugulaf97c1c52019-02-16 01:54:495// DMA generation at inner levels easily - since the DMA generation would
6// otherwise always generate DMAs at the outermost level (default for fast mem
7// capacity is infinite). Using a specific capacity makes it harder to write
8// a test case as one would have to calculate total footprints. With
Uday Bondhugula18b8d432019-08-01 23:31:159// -copy-skip-non-unit-stride-loops, non-unit strides will always be skipped and
Uday Bondhugulaf97c1c52019-02-16 01:54:4910// its inner loops will be traversed till a unit stride loop is found (or the
11// innermost block is reached).
Uday Bondhugulae0623d42018-11-09 01:31:0112
Andy Davis6254a42d2019-05-09 14:02:3213// -----
14
Chris Lattnerbbf362b2019-01-02 18:20:0015// CHECK-LABEL: func @loop_nest_1d() {
16func @loop_nest_1d() {
Julian Grosse2310702021-02-10 12:53:1117 %A = memref.alloc() : memref<256 x f32>
18 %B = memref.alloc() : memref<512 x f32>
19 %F = memref.alloc() : memref<256 x f32, 2>
Uday Bondhugulae0623d42018-11-09 01:31:0120 // First DMA buffer.
Julian Grosse2310702021-02-10 12:53:1121 // CHECK: memref.alloc() : memref<256xf32>
22 // CHECK: memref.alloc() : memref<256xf32, 2>
Uday Bondhugulae0623d42018-11-09 01:31:0123 // Tag for first DMA.
Julian Grosse2310702021-02-10 12:53:1124 // CHECK: memref.alloc() : memref<1xi32>
Uday Bondhugulae0623d42018-11-09 01:31:0125 // First DMA transfer.
River Riddle89bc4492019-07-09 17:40:2926 // CHECK: affine.dma_start %{{.*}}[%{{.*}}], %{{.*}}[%{{.*}}], %{{.*}}[%{{.*}}], %{{.*}} : memref<256xf32>, memref<256xf32, 2>, memref<1xi32>
27 // CHECK: affine.dma_wait %{{.*}}[%{{.*}}], %{{.*}} : memref<1xi32>
Uday Bondhugulae0623d42018-11-09 01:31:0128 // Second DMA buffer.
Julian Grosse2310702021-02-10 12:53:1129 // CHECK: memref.alloc() : memref<256xf32, 2>
Uday Bondhugulae0623d42018-11-09 01:31:0130 // Tag for second DMA.
Julian Grosse2310702021-02-10 12:53:1131 // CHECK: memref.alloc() : memref<1xi32>
Uday Bondhugulae0623d42018-11-09 01:31:0132 // Second DMA transfer.
River Riddle89bc4492019-07-09 17:40:2933 // CHECK: affine.dma_start %{{.*}}[%{{.*}}], %{{.*}}[%{{.*}}], %{{.*}}[%{{.*}}], %{{.*}} : memref<512xf32>, memref<256xf32, 2>, memref<1xi32>
34 // CHECK-NEXT: affine.dma_wait %{{.*}}[%{{.*}}], %{{.*}} : memref<1xi32>
Uday Bondhugula70da33b2020-04-01 06:30:2635 // CHECK: affine.for %[[IV:.*]] = 0 to 256 {
Uday Bondhugula7c771632020-04-01 21:04:1536 // CHECK-NEXT: affine.load %{{.*}}[%{{.*}}] : memref<256xf32, 2>
Uday Bondhugula70da33b2020-04-01 06:30:2637 // Buffer for '%{{.*}}' in faster memref space is of smaller size: 256xf32
38 // Affine map for load on B is composed and becomes identity.
39 // CHECK: affine.load %{{.*}}[%[[IV]]] : memref<256xf32, 2>
Uday Bondhugulae0623d42018-11-09 01:31:0140 // Already in faster memory space.
Uday Bondhugula70da33b2020-04-01 06:30:2641 // CHECK: affine.load %{{.*}}[%[[IV]]] : memref<256xf32, 2>
Uday Bondhugulae0623d42018-11-09 01:31:0142 // CHECK-NEXT: }
River Riddle89bc4492019-07-09 17:40:2943 // CHECK-NEXT: dealloc %{{.*}} : memref<1xi32>
44 // CHECK-NEXT: dealloc %{{.*}} : memref<256xf32, 2>
45 // CHECK-NEXT: dealloc %{{.*}} : memref<1xi32>
46 // CHECK-NEXT: dealloc %{{.*}} : memref<256xf32, 2>
Uday Bondhugulae0623d42018-11-09 01:31:0147 // CHECK-NEXT: return
River Riddle832567b2019-03-25 17:14:3448 affine.for %i = 0 to 256 {
Andy Davis2e1187d2019-07-03 17:35:0349 affine.load %A[%i] : memref<256 x f32>
River Riddle4268e4f2020-01-13 21:12:3750 %idx = affine.apply affine_map<(d0) -> (d0 + 256)>(%i)
Andy Davis2e1187d2019-07-03 17:35:0351 affine.load %B[%idx] : memref<512 x f32>
52 affine.load %F[%i] : memref<256 x f32, 2>
Uday Bondhugulafff1efb2018-11-17 04:12:0653 }
54 return
55}
56
Andy Davis6254a42d2019-05-09 14:02:3257// -----
58
Chris Lattnerbbf362b2019-01-02 18:20:0059// CHECK-LABEL: func @loop_nest_high_d
Mogballa54f4ea2021-10-12 23:14:5760// CHECK: %{{.*}} = arith.constant 16384 : index
Julian Grosse2310702021-02-10 12:53:1161// CHECK-DAG: [[BUFB:%[0-9]+]] = memref.alloc() : memref<512x32xf32, 2>
62// CHECK-DAG: [[BUFA:%[0-9]+]] = memref.alloc() : memref<512x32xf32, 2>
63// CHECK-DAG: [[BUFC:%[0-9]+]] = memref.alloc() : memref<512x32xf32, 2>
64// CHECK-DAG: [[TAGB:%[0-9]+]] = memref.alloc() : memref<1xi32>
65// CHECK-DAG: [[TAGA:%[0-9]+]] = memref.alloc() : memref<1xi32>
66// CHECK-DAG: [[TAGC:%[0-9]+]] = memref.alloc() : memref<1xi32>
67// CHECK-DAG: [[TAGC_W:%[0-9]+]] = memref.alloc() : memref<1xi32>
Uday Bondhugulafff1efb2018-11-17 04:12:0668// INCOMING DMA for B
River Riddle89bc4492019-07-09 17:40:2969// CHECK-DAG: affine.dma_start %{{.*}}[%{{.*}}, %{{.*}}], [[BUFB]][%{{.*}}, %{{.*}}], [[TAGB]][%{{.*}}], %{{.*}} : memref<512x32xf32>, memref<512x32xf32, 2>, memref<1xi32>
70// CHECK-DAG: affine.dma_wait [[TAGB]][%{{.*}}], %{{.*}} : memref<1xi32>
Uday Bondhugulafff1efb2018-11-17 04:12:0671// INCOMING DMA for A.
River Riddle89bc4492019-07-09 17:40:2972// CHECK-DAG: affine.dma_start %{{.*}}[%{{.*}}, %{{.*}}], [[BUFA]][%{{.*}}, %{{.*}}], [[TAGA]][%{{.*}}], %{{.*}} : memref<512x32xf32>, memref<512x32xf32, 2>, memref<1xi32>
73// CHECK-DAG: affine.dma_wait [[TAGA]][%{{.*}}], %{{.*}} : memref<1xi32>
Uday Bondhugulafff1efb2018-11-17 04:12:0674// INCOMING DMA for C.
River Riddle89bc4492019-07-09 17:40:2975// CHECK-DAG: affine.dma_start %{{.*}}[%{{.*}}, %{{.*}}], [[BUFC]][%{{.*}}, %{{.*}}], [[TAGC]][%{{.*}}], %{{.*}} : memref<512x32xf32>, memref<512x32xf32, 2>, memref<1xi32>
76// CHECK-DAG: affine.dma_wait [[TAGC]][%{{.*}}], %{{.*}} : memref<1xi32>
77// CHECK-NEXT: affine.for %{{.*}} = 0 to 32 {
78// CHECK-NEXT: affine.for %{{.*}} = 0 to 32 {
79// CHECK-NEXT: affine.for %{{.*}} = 0 to 32 {
80// CHECK-NEXT: affine.for %{{.*}} = 0 to 16 {
Uday Bondhugula70da33b2020-04-01 06:30:2681// CHECK: affine.load [[BUFB]][%{{.*}} * 16 + %{{.*}}, %{{.*}}] : memref<512x32xf32, 2>
River Riddle89bc4492019-07-09 17:40:2982// CHECK-NEXT: "foo"(%{{.*}}) : (f32) -> ()
Uday Bondhugulafff1efb2018-11-17 04:12:0683// CHECK-NEXT: }
River Riddle89bc4492019-07-09 17:40:2984// CHECK-NEXT: affine.for %{{.*}} = 0 to 16 {
Uday Bondhugula70da33b2020-04-01 06:30:2685// CHECK: affine.load [[BUFA]][%{{.*}} * 16 + %{{.*}}, %{{.*}}] : memref<512x32xf32, 2>
River Riddle89bc4492019-07-09 17:40:2986// CHECK-NEXT: "bar"(%{{.*}}) : (f32) -> ()
Uday Bondhugulafff1efb2018-11-17 04:12:0687// CHECK-NEXT: }
River Riddle89bc4492019-07-09 17:40:2988// CHECK-NEXT: affine.for %{{.*}} = 0 to 16 {
Uday Bondhugula7c771632020-04-01 21:04:1589// CHECK-NEXT: "abc_compute"() : () -> f32
Uday Bondhugula70da33b2020-04-01 06:30:2690// CHECK: affine.load [[BUFC]][%{{.*}} * 16 + %{{.*}}, %{{.*}}] : memref<512x32xf32, 2>
Uday Bondhugula7c771632020-04-01 21:04:1591// CHECK-NEXT: "addf32"(%{{.*}}, %{{.*}}) : (f32, f32) -> f32
River Riddle89bc4492019-07-09 17:40:2992// CHECK-NEXT: affine.store %{{.*}}, [[BUFC]][%{{.*}} * 16 + %{{.*}}, %{{.*}}] : memref<512x32xf32, 2>
Uday Bondhugulafff1efb2018-11-17 04:12:0693// CHECK-NEXT: }
94// CHECK-NEXT: "foobar"() : () -> ()
95// CHECK-NEXT: }
96// CHECK-NEXT: }
97// CHECK-NEXT: }
98// OUTGOING DMA for C.
River Riddle89bc4492019-07-09 17:40:2999// CHECK-NEXT: affine.dma_start [[BUFC]][%{{.*}}, %{{.*}}], %{{.*}}[%{{.*}}, %{{.*}}], [[TAGC_W]][%{{.*}}], %{{.*}} : memref<512x32xf32, 2>, memref<512x32xf32>, memref<1xi32>
100// CHECK-NEXT: affine.dma_wait [[TAGC_W]][%{{.*}}], %{{.*}} : memref<1xi32>
Uday Bondhugula8b3f8412019-02-12 00:33:53101// CHECK-NEXT: dealloc [[TAGC_W]] : memref<1xi32>
102// CHECK-NEXT: dealloc [[TAGC]] : memref<1xi32>
Uday Bondhugula5836fae2019-03-06 01:19:47103// CHECK-NEXT: dealloc [[BUFC]] : memref<512x32xf32, 2>
Uday Bondhugula8b3f8412019-02-12 00:33:53104// CHECK-NEXT: dealloc [[TAGA]] : memref<1xi32>
Uday Bondhugula5836fae2019-03-06 01:19:47105// CHECK-NEXT: dealloc [[BUFA]] : memref<512x32xf32, 2>
Uday Bondhugula8b3f8412019-02-12 00:33:53106// CHECK-NEXT: dealloc [[TAGB]] : memref<1xi32>
Uday Bondhugula5836fae2019-03-06 01:19:47107// CHECK-NEXT: dealloc [[BUFB]] : memref<512x32xf32, 2>
Uday Bondhugulafff1efb2018-11-17 04:12:06108// CHECK-NEXT: return
109// CHECK-NEXT:}
Chris Lattnerbbf362b2019-01-02 18:20:00110func @loop_nest_high_d(%A: memref<512 x 32 x f32>,
Uday Bondhugulafff1efb2018-11-17 04:12:06111 %B: memref<512 x 32 x f32>, %C: memref<512 x 32 x f32>) {
112 // DMAs will be performed at this level (jT is the first loop without a stride).
113 // A and B are read, while C is both read and written. A total of three new buffers
114 // are allocated and existing load's/store's are replaced by accesses to those buffers.
River Riddle832567b2019-03-25 17:14:34115 affine.for %jT = 0 to 32 {
116 affine.for %kT = 0 to 32 {
117 affine.for %iT = 0 to 32 {
118 affine.for %kk = 0 to 16 { // k intratile
River Riddle4268e4f2020-01-13 21:12:37119 %k = affine.apply affine_map<(d0, d1) -> (16*d0 + d1)> (%kT, %kk)
Andy Davis2e1187d2019-07-03 17:35:03120 %v0 = affine.load %B[%k, %jT] : memref<512 x 32 x f32>
Uday Bondhugulafff1efb2018-11-17 04:12:06121 "foo"(%v0) : (f32) -> ()
122 }
River Riddle832567b2019-03-25 17:14:34123 affine.for %ii = 0 to 16 { // i intratile.
River Riddle4268e4f2020-01-13 21:12:37124 %i = affine.apply affine_map<(d0, d1) -> (16*d0 + d1)>(%iT, %ii)
Andy Davis2e1187d2019-07-03 17:35:03125 %v1 = affine.load %A[%i, %kT] : memref<512 x 32 x f32>
Uday Bondhugula72e5c7f2019-01-24 16:43:17126 "bar"(%v1) : (f32) -> ()
Uday Bondhugulafff1efb2018-11-17 04:12:06127 }
River Riddle832567b2019-03-25 17:14:34128 affine.for %ii_ = 0 to 16 { // i intratile.
Uday Bondhugulafff1efb2018-11-17 04:12:06129 %v2 = "abc_compute"() : () -> f32
River Riddle4268e4f2020-01-13 21:12:37130 %i_ = affine.apply affine_map<(d0, d1) -> (16*d0 + d1)>(%iT, %ii_)
Andy Davis2e1187d2019-07-03 17:35:03131 %v3 = affine.load %C[%i_, %jT] : memref<512 x 32 x f32>
Uday Bondhugulafff1efb2018-11-17 04:12:06132 %v4 = "addf32"(%v2, %v3) : (f32, f32) -> (f32)
Andy Davis2e1187d2019-07-03 17:35:03133 affine.store %v4, %C[%i_, %jT] : memref<512 x 32 x f32>
Uday Bondhugulafff1efb2018-11-17 04:12:06134 }
135 "foobar"() : () -> ()
136 }
137 }
138 }
139 return
140}
141
Andy Davis6254a42d2019-05-09 14:02:32142// -----
143
Uday Bondhuguladfc752e2018-12-07 23:04:55144// A loop nest with a modulo 2 access. A strided DMA is not needed here a 1x2
145// region within a 256 x 8 memref.
Uday Bondhugulafff1efb2018-11-17 04:12:06146//
Chris Lattnerbbf362b2019-01-02 18:20:00147// CHECK-LABEL: func @loop_nest_modulo() {
Julian Grosse2310702021-02-10 12:53:11148// CHECK: memref.alloc() : memref<256x8xf32>
River Riddle89bc4492019-07-09 17:40:29149// CHECK-NEXT: affine.for %{{.*}} = 0 to 32 step 4 {
Julian Grosse2310702021-02-10 12:53:11150// CHECK: memref.alloc() : memref<1x2xf32, 2>
151// CHECK-NEXT: memref.alloc() : memref<1xi32>
River Riddle89bc4492019-07-09 17:40:29152// Composition of the affine map for '%{{.*}}' causes '%{{.*}}' to be added as a symbol.
Uday Bondhugula4bb6f8e2019-08-29 08:13:01153// CHECK-NEXT: affine.dma_start %{{.*}}[%{{.*}}, 0], %{{.*}}[%{{.*}}, %{{.*}}], %{{.*}}[%{{.*}}], %{{.*}} : memref<256x8xf32>, memref<1x2xf32, 2>, memref<1xi32>
River Riddle89bc4492019-07-09 17:40:29154// CHECK-NEXT: affine.dma_wait %{{.*}}[%{{.*}}], %{{.*}} : memref<1xi32>
155// CHECK-NEXT: affine.for %{{.*}} = 0 to 8 {
Uday Bondhugulaa9213082018-12-05 23:14:25156// ...
157// ...
158// CHECK: }
River Riddle89bc4492019-07-09 17:40:29159// CHECK-NEXT: dealloc %{{.*}} : memref<1xi32>
160// CHECK-NEXT: dealloc %{{.*}} : memref<1x2xf32, 2>
Uday Bondhugulaa9213082018-12-05 23:14:25161// CHECK-NEXT: }
162// CHECK-NEXT: return
Chris Lattnerbbf362b2019-01-02 18:20:00163func @loop_nest_modulo() {
Julian Grosse2310702021-02-10 12:53:11164 %A = memref.alloc() : memref<256 x 8 x f32>
River Riddle832567b2019-03-25 17:14:34165 affine.for %i = 0 to 32 step 4 {
Uday Bondhugulafff1efb2018-11-17 04:12:06166 // DMAs will be performed at this level (%j is the first unit stride loop)
River Riddle832567b2019-03-25 17:14:34167 affine.for %j = 0 to 8 {
River Riddle4268e4f2020-01-13 21:12:37168 %idx = affine.apply affine_map<(d0) -> (d0 mod 2)> (%j)
Uday Bondhugulafff1efb2018-11-17 04:12:06169 // A buffer of size 32 x 2 will be allocated (original buffer was 256 x 8).
Andy Davis2e1187d2019-07-03 17:35:03170 %v = affine.load %A[%i, %idx] : memref<256 x 8 x f32>
Uday Bondhugulafff1efb2018-11-17 04:12:06171 }
Uday Bondhugulae0623d42018-11-09 01:31:01172 }
173 return
174}
Uday Bondhugula2631b152018-11-21 19:12:05175
Andy Davis6254a42d2019-05-09 14:02:32176// -----
177
Uday Bondhugula2631b152018-11-21 19:12:05178// DMA on tiled loop nest. This also tests the case where the bounds are
179// dependent on outer loop IVs.
Chris Lattnerbbf362b2019-01-02 18:20:00180// CHECK-LABEL: func @loop_nest_tiled() -> memref<256x1024xf32> {
181func @loop_nest_tiled() -> memref<256x1024xf32> {
Julian Grosse2310702021-02-10 12:53:11182 %0 = memref.alloc() : memref<256x1024xf32>
River Riddle832567b2019-03-25 17:14:34183 affine.for %i0 = 0 to 256 step 32 {
184 affine.for %i1 = 0 to 1024 step 32 {
Julian Grosse2310702021-02-10 12:53:11185// CHECK: memref.alloc() : memref<32x32xf32, 2>
186// CHECK-NEXT: memref.alloc() : memref<1xi32>
Uday Bondhuguladfc752e2018-12-07 23:04:55187// Strided DMA here: 32 x 32 tile in a 256 x 1024 memref.
River Riddle89bc4492019-07-09 17:40:29188// CHECK-NEXT: affine.dma_start %{{.*}}[%{{.*}}, %{{.*}}], %{{.*}}[%{{.*}}, %{{.*}}], %{{.*}}[%{{.*}}], %{{.*}}, %{{.*}}, %{{.*}} : memref<256x1024xf32>, memref<32x32xf32, 2>, memref<1xi32>
Andy Davis2e1187d2019-07-03 17:35:03189// CHECK-NEXT: affine.dma_wait
River Riddle89bc4492019-07-09 17:40:29190// CHECK-NEXT: affine.for %{{.*}} = #map
191// CHECK-NEXT: affine.for %{{.*}} = #map
River Riddle4268e4f2020-01-13 21:12:37192 affine.for %i2 = affine_map<(d0) -> (d0)>(%i0) to affine_map<(d0) -> (d0 + 32)>(%i0) {
193 affine.for %i3 = affine_map<(d0) -> (d0)>(%i1) to affine_map<(d0) -> (d0 + 32)>(%i1) {
Uday Bondhugula7c771632020-04-01 21:04:15194 // CHECK: affine.load %{{.*}}[-%{{.*}} + %{{.*}}, -%{{.*}} + %{{.*}}] : memref<32x32xf32, 2>
Andy Davis2e1187d2019-07-03 17:35:03195 %1 = affine.load %0[%i2, %i3] : memref<256x1024xf32>
Uday Bondhugula2631b152018-11-21 19:12:05196 } // CHECK-NEXT: }
197 }
198 }
199 }
Uday Bondhugula5f762452018-12-03 19:15:24200 return %0 : memref<256x1024xf32>
Uday Bondhugula2631b152018-11-21 19:12:05201}
Uday Bondhugulaa9213082018-12-05 23:14:25202
Andy Davis6254a42d2019-05-09 14:02:32203// -----
204
Chris Lattnerbbf362b2019-01-02 18:20:00205// CHECK-LABEL: func @dma_constant_dim_access
206func @dma_constant_dim_access(%A : memref<100x100xf32>) {
Mogballa54f4ea2021-10-12 23:14:57207 %one = arith.constant 1 : index
208 %N = arith.constant 100 : index
Julian Grosse2310702021-02-10 12:53:11209 // CHECK: memref.alloc() : memref<1x100xf32, 2>
210 // CHECK-NEXT: memref.alloc() : memref<1xi32>
Uday Bondhuguladfc752e2018-12-07 23:04:55211 // No strided DMA needed here.
River Riddle89bc4492019-07-09 17:40:29212 // CHECK: affine.dma_start %{{.*}}[%{{.*}}, %{{.*}}], %{{.*}}[%{{.*}}, %{{.*}}], %{{.*}}[%{{.*}}], %{{.*}} : memref<100x100xf32>, memref<1x100xf32, 2>,
213 // CHECK-NEXT: affine.dma_wait %{{.*}}[%{{.*}}], %{{.*}} : memref<1xi32>
River Riddle832567b2019-03-25 17:14:34214 affine.for %i = 0 to 100 {
River Riddle4268e4f2020-01-13 21:12:37215 affine.for %j = 0 to affine_map<()[s0] -> (s0)> ()[%N] {
Uday Bondhugula7c771632020-04-01 21:04:15216 // CHECK: affine.load %{{.*}}[0, %{{.*}}] : memref<1x100xf32, 2>
Andy Davis2e1187d2019-07-03 17:35:03217 affine.load %A[%one, %j] : memref<100 x 100 x f32>
Uday Bondhugulaa9213082018-12-05 23:14:25218 }
219 }
220 return
221}
222
Andy Davis6254a42d2019-05-09 14:02:32223// -----
224
Chris Lattnerbbf362b2019-01-02 18:20:00225// CHECK-LABEL: func @dma_with_symbolic_accesses
226func @dma_with_symbolic_accesses(%A : memref<100x100xf32>, %M : index) {
Mogballa54f4ea2021-10-12 23:14:57227 %N = arith.constant 9 : index
River Riddle832567b2019-03-25 17:14:34228 affine.for %i = 0 to 100 {
229 affine.for %j = 0 to 100 {
River Riddle4268e4f2020-01-13 21:12:37230 %idy = affine.apply affine_map<(d0, d1) [s0, s1] -> (d1 + s0 + s1)>(%i, %j)[%M, %N]
Andy Davis2e1187d2019-07-03 17:35:03231 affine.load %A[%i, %idy] : memref<100 x 100 x f32>
Uday Bondhugulaa9213082018-12-05 23:14:25232 }
233 }
234 return
Julian Grosse2310702021-02-10 12:53:11235// CHECK: memref.alloc() : memref<100x100xf32, 2>
236// CHECK-NEXT: memref.alloc() : memref<1xi32>
Uday Bondhugula4bb6f8e2019-08-29 08:13:01237// CHECK-NEXT: affine.dma_start %{{.*}}[0, symbol(%{{.*}}) + 9], %{{.*}}[%{{.*}}, %{{.*}}], %{{.*}}[%{{.*}}], %{{.*}}
River Riddle89bc4492019-07-09 17:40:29238// CHECK-NEXT: affine.dma_wait %{{.*}}[%{{.*}}], %{{.*}}
Uday Bondhugula70da33b2020-04-01 06:30:26239// CHECK-NEXT: affine.for %[[IV0:.*]] = 0 to 100 {
240// CHECK-NEXT: affine.for %[[IV1:.*]] = 0 to 100 {
241// CHECK: affine.load %{{.*}}[%[[IV0]], %[[IV1]]] : memref<100x100xf32, 2>
Uday Bondhugulaa9213082018-12-05 23:14:25242// CHECK-NEXT: }
243// CHECK-NEXT: }
Uday Bondhugula8b3f8412019-02-12 00:33:53244// CHECK: return
Uday Bondhugulaa9213082018-12-05 23:14:25245}
Uday Bondhuguladfc752e2018-12-07 23:04:55246
Andy Davis6254a42d2019-05-09 14:02:32247// -----
248
Chris Lattnerbbf362b2019-01-02 18:20:00249// CHECK-LABEL: func @dma_with_symbolic_loop_bounds
250func @dma_with_symbolic_loop_bounds(%A : memref<100x100xf32>, %M : index, %N: index) {
Mogballa54f4ea2021-10-12 23:14:57251 %K = arith.constant 9 : index
Uday Bondhuguladfc752e2018-12-07 23:04:55252// The buffer size can't be bound by a constant smaller than the original
253// memref size; so the DMA buffer is the entire 100x100.
Julian Grosse2310702021-02-10 12:53:11254// CHECK: memref.alloc() : memref<100x100xf32, 2>
255// CHECK-NEXT: memref.alloc() : memref<1xi32>
River Riddle89bc4492019-07-09 17:40:29256// CHECK-NEXT: affine.dma_start %{{.*}}[%{{.*}}, %{{.*}}], %{{.*}}[%{{.*}}, %{{.*}}], %{{.*}}[%{{.*}}], %{{.*}} : memref<100x100xf32>, memref<100x100xf32, 2>, memref<1xi32>
257// CHECK-NEXT: affine.dma_wait %{{.*}}[%{{.*}}], %{{.*}} : memref<1xi32>
River Riddle832567b2019-03-25 17:14:34258 affine.for %i = 0 to 100 {
259 affine.for %j = %M to %N {
River Riddle4268e4f2020-01-13 21:12:37260 %idy = affine.apply affine_map<(d1) [s0] -> (d1 + s0)>(%j)[%K]
Andy Davis2e1187d2019-07-03 17:35:03261 affine.load %A[%i, %idy] : memref<100 x 100 x f32>
Uday Bondhuguladfc752e2018-12-07 23:04:55262 }
263 }
264 return
265}
266
Uday Bondhugula72e5c7f2019-01-24 16:43:17267// -----
268
Chris Lattnerbbf362b2019-01-02 18:20:00269// CHECK-LABEL: func @dma_unknown_size
270func @dma_unknown_size(%arg0: memref<?x?xf32>) {
Mogballa54f4ea2021-10-12 23:14:57271 %c0 = arith.constant 0 : index
Julian Grosse2310702021-02-10 12:53:11272 %M = memref.dim %arg0, %c0 : memref<? x ? x f32>
273 %N = memref.dim %arg0, %c0 : memref<? x ? x f32>
River Riddle832567b2019-03-25 17:14:34274 affine.for %i = 0 to %M {
275 affine.for %j = 0 to %N {
Uday Bondhugula72e5c7f2019-01-24 16:43:17276 // If this loop nest isn't tiled, the access requires a non-constant DMA
277 // size -- not yet implemented.
Uday Bondhugula7c771632020-04-01 21:04:15278 // CHECK: affine.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<?x?xf32>
Andy Davis2e1187d2019-07-03 17:35:03279 affine.load %arg0[%i, %j] : memref<? x ? x f32>
Uday Bondhugula18b8d432019-08-01 23:31:15280 // expected-error@-6 {{copy generation failed for one or more memref's in this block}}
Uday Bondhuguladfc752e2018-12-07 23:04:55281 }
282 }
283 return
284}
285
Uday Bondhugula72e5c7f2019-01-24 16:43:17286// -----
287
Chris Lattnerbbf362b2019-01-02 18:20:00288// CHECK-LABEL: func @dma_memref_3d
289func @dma_memref_3d(%arg0: memref<1024x1024x1024xf32>) {
River Riddle832567b2019-03-25 17:14:34290 affine.for %i = 0 to 1024 {
291 affine.for %j = 0 to 1024 {
292 affine.for %k = 0 to 1024 {
River Riddle4268e4f2020-01-13 21:12:37293 %idx = affine.apply affine_map<(d0) -> (d0 mod 128)>(%i)
294 %idy = affine.apply affine_map<(d0) -> (d0 mod 128)>(%j)
295 %idz = affine.apply affine_map<(d0) -> (d0 mod 128)>(%k)
Uday Bondhuguladfc752e2018-12-07 23:04:55296 // DMA with nested striding (or emulating with loop around strided DMA)
297 // not yet implemented.
Uday Bondhugula7c771632020-04-01 21:04:15298 // CHECK: affine.load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref<1024x1024x1024xf32>
Andy Davis2e1187d2019-07-03 17:35:03299 %v = affine.load %arg0[%idx, %idy, %idz] : memref<1024 x 1024 x 1024 x f32>
Uday Bondhugula18b8d432019-08-01 23:31:15300 // expected-error@-10 {{copy generation failed for one or more memref's in this block}}
Uday Bondhuguladfc752e2018-12-07 23:04:55301 }
302 }
303 }
304 return
305}
Uday Bondhugulaf94b15c2019-01-25 06:10:53306
307// -----
308
Uday Bondhugulaf94b15c2019-01-25 06:10:53309// The first load accesses ([2,258), [128,384))
310// The second load accesses ([64,320), [2,258))
311// The first store writes to ([2,258), [192,448))
312// The second store writes to ([128,320), [2,258))
313// The union of all these regions is of size 318 x 446 and has its origin at (2,
314// 2), i.e., the window ([2,320), [2,448)) in the original space.
315
316// CHECK-LABEL: func @multi_load_store_union() {
317func @multi_load_store_union() {
Julian Grosse2310702021-02-10 12:53:11318 %A = memref.alloc() : memref<512 x 512 x f32>
River Riddle832567b2019-03-25 17:14:34319 affine.for %i = 0 to 256 {
320 affine.for %j = 0 to 256 {
River Riddle4268e4f2020-01-13 21:12:37321 %idx = affine.apply affine_map<(d0) -> (d0 + 64)>(%i)
322 %idy = affine.apply affine_map<(d0) -> (d0 + 128)>(%j)
323 %ishift = affine.apply affine_map<(d0) -> (d0 + 2)>(%i)
324 %jshift = affine.apply affine_map<(d0) -> (d0 + 2)>(%j)
Uday Bondhugulaf94b15c2019-01-25 06:10:53325
Andy Davis2e1187d2019-07-03 17:35:03326 %u = affine.load %A[%ishift, %idy] : memref<512 x 512 x f32>
327 %v = affine.load %A[%idx, %jshift] : memref<512 x 512 x f32>
Uday Bondhugulaf94b15c2019-01-25 06:10:53328
River Riddle4268e4f2020-01-13 21:12:37329 %sidx = affine.apply affine_map<(d0) -> (d0 + 128)>(%i)
330 %sidy = affine.apply affine_map<(d0) -> (d0 + 192)>(%j)
Uday Bondhugulaf94b15c2019-01-25 06:10:53331
Andy Davis2e1187d2019-07-03 17:35:03332 affine.store %u, %A[%ishift, %sidy] : memref<512 x 512 x f32>
333 affine.store %v, %A[%sidx, %jshift] : memref<512 x 512 x f32>
Uday Bondhugulaf94b15c2019-01-25 06:10:53334 }
335 }
336 return
337}
Julian Grosse2310702021-02-10 12:53:11338// CHECK: memref.alloc() : memref<512x512xf32>
339// CHECK-NEXT: memref.alloc() : memref<382x446xf32, 2>
340// CHECK-NEXT: memref.alloc() : memref<1xi32>
River Riddle89bc4492019-07-09 17:40:29341// CHECK-NEXT: affine.dma_start %{{.*}}[%{{.*}}, %{{.*}}], %{{.*}}[%{{.*}}, %{{.*}}], %{{.*}}[%{{.*}}], %{{.*}}, %{{.*}}, %{{.*}} : memref<512x512xf32>, memref<382x446xf32, 2>, memref<1xi32>
342// CHECK-NEXT: affine.dma_wait %{{.*}}[%{{.*}}], %{{.*}} : memref<1xi32>
Julian Grosse2310702021-02-10 12:53:11343// CHECK-NEXT: memref.alloc() : memref<1xi32>
River Riddle89bc4492019-07-09 17:40:29344// CHECK-NEXT: affine.for %{{.*}} = 0 to 256 {
345// CHECK-NEXT: affine.for %{{.*}} = 0 to 256 {
Uday Bondhugula70da33b2020-04-01 06:30:26346// CHECK: affine.load %{{.*}}[%{{.*}}, %{{.*}} + 126] : memref<382x446xf32, 2>
Uday Bondhugula7c771632020-04-01 21:04:15347// CHECK-NEXT: affine.load %{{.*}}[%{{.*}} + 62, %{{.*}}] : memref<382x446xf32, 2>
Uday Bondhugula70da33b2020-04-01 06:30:26348// CHECK: affine.store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}} + 190] : memref<382x446xf32, 2>
River Riddle89bc4492019-07-09 17:40:29349// CHECK-NEXT: affine.store %{{.*}}, %{{.*}}[%{{.*}} + 126, %{{.*}}] : memref<382x446xf32, 2>
Uday Bondhugulaf94b15c2019-01-25 06:10:53350// CHECK-NEXT: }
351// CHECK-NEXT: }
River Riddle89bc4492019-07-09 17:40:29352// CHECK-NEXT: affine.dma_start %{{.*}}[%{{.*}}, %{{.*}}], %{{.*}}[%{{.*}}, %{{.*}}], %{{.*}}[%{{.*}}], %{{.*}}, %{{.*}}, %{{.*}} : memref<382x446xf32, 2>, memref<512x512xf32>, memref<1xi32>
353// CHECK-NEXT: affine.dma_wait %{{.*}}[%{{.*}}], %{{.*}} : memref<1xi32>
354// CHECK-NEXT: dealloc %{{.*}} : memref<1xi32>
355// CHECK-NEXT: dealloc %{{.*}} : memref<1xi32>
356// CHECK-NEXT: dealloc %{{.*}} : memref<382x446xf32, 2>
Uday Bondhugulaf94b15c2019-01-25 06:10:53357// CHECK-NEXT: return
358// CHECK-NEXT:}
Uday Bondhugulab26900d2019-02-04 15:58:42359
360// -----
361
Uday Bondhugulab26900d2019-02-04 15:58:42362// CHECK-LABEL: func @dma_loop_straightline_interspersed() {
363func @dma_loop_straightline_interspersed() {
Mogballa54f4ea2021-10-12 23:14:57364 %c0 = arith.constant 0 : index
365 %c255 = arith.constant 255 : index
Julian Grosse2310702021-02-10 12:53:11366 %A = memref.alloc() : memref<256 x f32>
Andy Davis2e1187d2019-07-03 17:35:03367 %v = affine.load %A[%c0] : memref<256 x f32>
River Riddle832567b2019-03-25 17:14:34368 affine.for %i = 1 to 255 {
Andy Davis2e1187d2019-07-03 17:35:03369 affine.load %A[%i] : memref<256 x f32>
Uday Bondhugulab26900d2019-02-04 15:58:42370 }
Andy Davis2e1187d2019-07-03 17:35:03371 %l = affine.load %A[%c255] : memref<256 x f32>
372 affine.store %l, %A[%c0] : memref<256 x f32>
Uday Bondhugulab26900d2019-02-04 15:58:42373 return
374}
375// There are three regions here - the 'load' preceding the loop, the loop
Alex Zinenko60f443b2020-05-13 10:12:30376// itself, and the operations appearing after the scf.
Julian Grosse2310702021-02-10 12:53:11377// CHECK: memref.alloc() : memref<256xf32>
378// CHECK-NEXT: memref.alloc() : memref<1xf32, 2>
379// CHECK-NEXT: memref.alloc() : memref<1xi32>
River Riddle89bc4492019-07-09 17:40:29380// CHECK-NEXT: affine.dma_start %{{.*}}[%{{.*}}], %{{.*}}[%{{.*}}], %{{.*}}[%{{.*}}], %{{.*}} : memref<256xf32>, memref<1xf32, 2>, memref<1xi32>
381// CHECK-NEXT: affine.dma_wait %{{.*}}[%{{.*}}], %{{.*}} : memref<1xi32>
Uday Bondhugula7c771632020-04-01 21:04:15382// CHECK-NEXT: affine.load %{{.*}}[0] : memref<1xf32, 2>
River Riddle89bc4492019-07-09 17:40:29383// CHECK-NEXT: dealloc %{{.*}} : memref<1xi32>
384// CHECK-NEXT: dealloc %{{.*}} : memref<1xf32, 2>
Julian Grosse2310702021-02-10 12:53:11385// CHECK-NEXT: memref.alloc() : memref<254xf32, 2>
386// CHECK-NEXT: memref.alloc() : memref<1xi32>
River Riddle89bc4492019-07-09 17:40:29387// CHECK-NEXT: affine.dma_start %{{.*}}[%{{.*}}], %{{.*}}[%{{.*}}], %{{.*}}[%{{.*}}], %{{.*}} : memref<256xf32>, memref<254xf32, 2>, memref<1xi32>
388// CHECK-NEXT: affine.dma_wait %{{.*}}[%{{.*}}], %{{.*}} : memref<1xi32>
389// CHECK-NEXT: affine.for %{{.*}} = 1 to 255 {
Uday Bondhugula7c771632020-04-01 21:04:15390// CHECK-NEXT: affine.load %{{.*}}[%{{.*}} - 1] : memref<254xf32, 2>
Uday Bondhugulab26900d2019-02-04 15:58:42391// CHECK-NEXT: }
River Riddle89bc4492019-07-09 17:40:29392// CHECK-NEXT: dealloc %{{.*}} : memref<1xi32>
393// CHECK-NEXT: dealloc %{{.*}} : memref<254xf32, 2>
Julian Grosse2310702021-02-10 12:53:11394// CHECK-NEXT: memref.alloc() : memref<256xf32, 2>
395// CHECK-NEXT: memref.alloc() : memref<1xi32>
River Riddle89bc4492019-07-09 17:40:29396// CHECK-NEXT: affine.dma_start %{{.*}}[%{{.*}}], %{{.*}}[%{{.*}}], %{{.*}}[%{{.*}}], %{{.*}} : memref<256xf32>, memref<256xf32, 2>, memref<1xi32>
397// CHECK-NEXT: affine.dma_wait %{{.*}}[%{{.*}}], %{{.*}} : memref<1xi32>
Julian Grosse2310702021-02-10 12:53:11398// CHECK-NEXT: memref.alloc() : memref<1xi32>
Uday Bondhugula7c771632020-04-01 21:04:15399// CHECK-NEXT: affine.load %{{.*}}[255] : memref<256xf32, 2>
Uday Bondhugula4bb6f8e2019-08-29 08:13:01400// CHECK-NEXT: affine.store %{{.*}}, %{{.*}}[0] : memref<256xf32, 2>
River Riddle89bc4492019-07-09 17:40:29401// CHECK-NEXT: affine.dma_start %{{.*}}[%{{.*}}], %{{.*}}[%{{.*}}], %{{.*}}[%{{.*}}], %{{.*}} : memref<256xf32, 2>, memref<256xf32>, memref<1xi32>
402// CHECK-NEXT: affine.dma_wait %{{.*}}[%{{.*}}], %{{.*}} : memref<1xi32>
403// CHECK-NEXT: dealloc %{{.*}} : memref<1xi32>
404// CHECK-NEXT: dealloc %{{.*}} : memref<1xi32>
405// CHECK-NEXT: dealloc %{{.*}} : memref<256xf32, 2>
Uday Bondhugulab26900d2019-02-04 15:58:42406// CHECK-NEXT: return
407
408// -----
409
410// CHECK-LABEL: func @dma_mixed_loop_blocks() {
411func @dma_mixed_loop_blocks() {
Mogballa54f4ea2021-10-12 23:14:57412 %c0 = arith.constant 0 : index
Julian Grosse2310702021-02-10 12:53:11413 %A = memref.alloc() : memref<256 x 256 x vector<8 x f32>>
River Riddle832567b2019-03-25 17:14:34414 affine.for %i = 0 to 256 {
Andy Davis2e1187d2019-07-03 17:35:03415 %v = affine.load %A[%c0, %c0] : memref<256 x 256 x vector<8 x f32>>
Uday Bondhugulab26900d2019-02-04 15:58:42416 "foo"(%v) : (vector<8 x f32>) -> ()
River Riddle832567b2019-03-25 17:14:34417 affine.for %j = 0 to 256 {
Andy Davis2e1187d2019-07-03 17:35:03418 %w = affine.load %A[%i, %j] : memref<256 x 256 x vector<8 x f32>>
Uday Bondhugulab26900d2019-02-04 15:58:42419 "bar"(%w) : (vector<8 x f32>) -> ()
420 }
421 }
422 return
423}
Julian Grosse2310702021-02-10 12:53:11424// CHECK-DAG: [[MEM:%[0-9]+]] = memref.alloc() : memref<256x256xvector<8xf32>>
425// CHECK-DAG: [[BUF:%[0-9]+]] = memref.alloc() : memref<256x256xvector<8xf32>, 2>
426// CHECK-DAG: [[TAG:%[0-9]+]] = memref.alloc() : memref<1xi32>
River Riddle89bc4492019-07-09 17:40:29427// CHECK: affine.dma_start [[MEM]][%{{.*}}, %{{.*}}], [[BUF]][%{{.*}}, %{{.*}}], [[TAG]][%{{.*}}], %{{.*}} : memref<256x256xvector<8xf32>>, memref<256x256xvector<8xf32>, 2>, memref<1xi32>
428// CHECK-NEXT: affine.dma_wait [[TAG]][%{{.*}}], %{{.*}} : memref<1xi32>
429// CHECK-NEXT: affine.for %{{.*}} = 0 to 256 {
Uday Bondhugula7c771632020-04-01 21:04:15430// CHECK: affine.load [[BUF]][0, 0] : memref<256x256xvector<8xf32>, 2>
River Riddle89bc4492019-07-09 17:40:29431// CHECK: affine.for %{{.*}} = 0 to 256 {
Uday Bondhugula7c771632020-04-01 21:04:15432// CHECK-NEXT: affine.load [[BUF]][%{{.*}}, %{{.*}}] : memref<256x256xvector<8xf32>, 2>
Uday Bondhugulaf5eed892019-02-11 23:43:26433
434// -----
435
436// CHECK-LABEL: func @relative_loop_bounds
Uday Bondhugulaf97c1c52019-02-16 01:54:49437func @relative_loop_bounds(%arg0: memref<1027xf32>) {
River Riddle832567b2019-03-25 17:14:34438 affine.for %i0 = 0 to 1024 {
River Riddle4268e4f2020-01-13 21:12:37439 affine.for %i2 = affine_map<(d0) -> (d0)>(%i0) to affine_map<(d0) -> (d0 + 4)>(%i0) {
Mogballa54f4ea2021-10-12 23:14:57440 %0 = arith.constant 0.0 : f32
Andy Davis2e1187d2019-07-03 17:35:03441 affine.store %0, %arg0[%i2] : memref<1027xf32>
Uday Bondhugulaf5eed892019-02-11 23:43:26442 }
443 }
444 return
445}
Julian Grosse2310702021-02-10 12:53:11446// CHECK: [[BUF:%[0-9]+]] = memref.alloc() : memref<1027xf32, 2>
447// CHECK-NEXT: [[MEM:%[0-9]+]] = memref.alloc() : memref<1xi32>
River Riddle89bc4492019-07-09 17:40:29448// CHECK-NEXT: affine.for %{{.*}} = 0 to 1024 {
Uday Bondhugula70da33b2020-04-01 06:30:26449// CHECK-NEXT: affine.for %[[I2:.*]] = {{#map[0-9]+}}(%{{.*}}) to {{#map[0-9]+}}(%{{.*}}) {
450// CHECK: affine.store %{{.*}}, [[BUF]][%[[I2]]] : memref<1027xf32, 2>
Uday Bondhugulaf5eed892019-02-11 23:43:26451// CHECK-NEXT: }
452// CHECK-NEXT: }
River Riddle89bc4492019-07-09 17:40:29453// CHECK-NEXT: affine.dma_start [[BUF]][%{{.*}}], %{{.*}}[%{{.*}}], [[MEM]][%{{.*}}], %{{.*}} : memref<1027xf32, 2>, memref<1027xf32>, memref<1xi32>
454// CHECK-NEXT: affine.dma_wait [[MEM]][%{{.*}}], %{{.*}} : memref<1xi32>
Uday Bondhugulaf97c1c52019-02-16 01:54:49455
Andy Davis6254a42d2019-05-09 14:02:32456// -----
457
Andy Davis6254a42d2019-05-09 14:02:32458func @test_read_write_region_union() {
Julian Grosse2310702021-02-10 12:53:11459 %0 = memref.alloc() : memref<256xf32>
Andy Davis6254a42d2019-05-09 14:02:32460 affine.for %i0 = 0 to 10 {
461 // memref dims: [0, 256)
462 // read region: [100, 110)
463 // write region: [25, 35)
464 // union region: [25, 110)
River Riddle4268e4f2020-01-13 21:12:37465 %a0 = affine.apply affine_map<(d0) -> (d0 + 100)>(%i0)
466 %a1 = affine.apply affine_map<(d0) -> (d0 + 25)>(%i0)
Andy Davis2e1187d2019-07-03 17:35:03467 %1 = affine.load %0[%a0] : memref<256xf32>
468 affine.store %1, %0[%a1] : memref<256xf32>
Andy Davis6254a42d2019-05-09 14:02:32469 }
470 return
471}
472
Julian Grosse2310702021-02-10 12:53:11473// CHECK: memref.alloc() : memref<256xf32>
474// CHECK-NEXT: memref.alloc() : memref<85xf32, 2>
475// CHECK-NEXT: memref.alloc() : memref<1xi32>
River Riddle89bc4492019-07-09 17:40:29476// CHECK-NEXT: affine.dma_start %{{.*}}[%{{.*}}], %{{.*}}[%{{.*}}], %{{.*}}[%{{.*}}], %{{.*}} : memref<256xf32>, memref<85xf32, 2>, memref<1xi32>
477// CHECK-NEXT: affine.dma_wait %{{.*}}[%{{.*}}], %{{.*}} : memref<1xi32>
Julian Grosse2310702021-02-10 12:53:11478// CHECK-NEXT: memref.alloc() : memref<1xi32>
River Riddle89bc4492019-07-09 17:40:29479// CHECK-NEXT: affine.for %{{.*}} = 0 to 10 {
Uday Bondhugula70da33b2020-04-01 06:30:26480// CHECK: affine.load %{{.*}}[%{{.*}} + 75] : memref<85xf32, 2>
River Riddle89bc4492019-07-09 17:40:29481// CHECK-NEXT: affine.store %{{.*}}, %{{.*}}[%{{.*}}] : memref<85xf32, 2>
Andy Davis6254a42d2019-05-09 14:02:32482// CHECK-NEXT: }
River Riddle89bc4492019-07-09 17:40:29483// CHECK-NEXT: affine.dma_start %{{.*}}[%{{.*}}], %{{.*}}[%{{.*}}], %{{.*}}[%{{.*}}], %{{.*}} : memref<85xf32, 2>, memref<256xf32>, memref<1xi32>
484// CHECK-NEXT: affine.dma_wait %{{.*}}[%{{.*}}], %{{.*}} : memref<1xi32>
Andy Davis6254a42d2019-05-09 14:02:32485
486// -----
Uday Bondhugulaf97c1c52019-02-16 01:54:49487
River Riddle832567b2019-03-25 17:14:34488// This should create a buffer of size 2 affine.for %arg2.
Uday Bondhugula9f2781e2019-03-12 17:52:09489
River Riddle4268e4f2020-01-13 21:12:37490#map_lb = affine_map<(d0) -> (d0)>
491#map_ub = affine_map<(d0) -> (d0 + 3)>
492#map_acc = affine_map<(d0) -> (d0 floordiv 8)>
Uday Bondhugula9f2781e2019-03-12 17:52:09493// CHECK-LABEL: func @test_analysis_util
494func @test_analysis_util(%arg0: memref<4x4x16x1xf32>, %arg1: memref<144x9xf32>, %arg2: memref<2xf32>) -> (memref<144x9xf32>, memref<2xf32>) {
Mogballa54f4ea2021-10-12 23:14:57495 %c0 = arith.constant 0 : index
Julian Grosse2310702021-02-10 12:53:11496 %0 = memref.alloc() : memref<64x1xf32>
497 %1 = memref.alloc() : memref<144x4xf32>
Mogballa54f4ea2021-10-12 23:14:57498 %2 = arith.constant 0.0 : f32
River Riddle832567b2019-03-25 17:14:34499 affine.for %i8 = 0 to 9 step 3 {
500 affine.for %i9 = #map_lb(%i8) to #map_ub(%i8) {
501 affine.for %i17 = 0 to 64 {
Uday Bondhugula9f2781e2019-03-12 17:52:09502 %23 = affine.apply #map_acc(%i9)
Andy Davis2e1187d2019-07-03 17:35:03503 %25 = affine.load %arg2[%23] : memref<2xf32>
Uday Bondhugula9f2781e2019-03-12 17:52:09504 %26 = affine.apply #map_lb(%i17)
Andy Davis2e1187d2019-07-03 17:35:03505 %27 = affine.load %0[%26, %c0] : memref<64x1xf32>
506 affine.store %27, %arg2[%23] : memref<2xf32>
Uday Bondhugula9f2781e2019-03-12 17:52:09507 }
508 }
509 }
510 return %arg1, %arg2 : memref<144x9xf32>, memref<2xf32>
511}
River Riddle89bc4492019-07-09 17:40:29512// CHECK: affine.for %{{.*}} = 0 to 9 step 3 {
Julian Grosse2310702021-02-10 12:53:11513// CHECK: [[BUF:%[0-9]+]] = memref.alloc() : memref<2xf32, 2>
River Riddle89bc4492019-07-09 17:40:29514// CHECK: affine.dma_start %{{.*}}[%{{.*}} floordiv 8], [[BUF]]
515// CHECK: affine.dma_wait %{{.*}}[%{{.*}}], %{{.*}} : memref<1xi32>
516// CHECK: affine.for %{{.*}} =
Uday Bondhugula9f2781e2019-03-12 17:52:09517
Diego Caballerod7058ac2020-02-14 21:41:01518// -----
Andy Davis0412bf62019-05-09 15:36:02519
River Riddle4268e4f2020-01-13 21:12:37520#map3 = affine_map<(d0) -> (d0)>
521#map12 = affine_map<(d0) -> (d0 + 3)>
522#map14 = affine_map<(d0, d1) -> ((d0 + d1 * 72) floordiv 2304 + ((((d0 + d1 * 72) mod 2304) mod 1152) mod 9) floordiv 3)>
523#map15 = affine_map<(d0, d1) -> ((d0 + d1 * 72) mod 2304 - (((d0 + d1 * 72) mod 2304) floordiv 1152) * 1151 - ((((d0 + d1 * 72) mod 2304) mod 1152) floordiv 9) * 9 - (((((d0 + d1 * 72) mod 2304) mod 1152) mod 9) floordiv 3) * 3)>
524#map16 = affine_map<(d0, d1) -> (((((d0 + d1 * 72) mod 2304) mod 1152) floordiv 9) floordiv 8)>
Andy Davis0412bf62019-05-09 15:36:02525// Test for test case in b/128303048 #4.
Diego Caballerod7058ac2020-02-14 21:41:01526// CHECK-LABEL: func @test_memref_bounds
Andy Davis0412bf62019-05-09 15:36:02527func @test_memref_bounds(%arg0: memref<4x4x16x1xvector<8x128xf32>>, %arg1: memref<144x9xvector<8x128xf32>>, %arg2: memref<2xvector<8x128xf32>>) -> (memref<144x9xvector<8x128xf32>>, memref<2xvector<8x128xf32>>) {
Mogballa54f4ea2021-10-12 23:14:57528 %c0 = arith.constant 0 : index
Andy Davis0412bf62019-05-09 15:36:02529 affine.for %i8 = 0 to 9 step 3 {
530 affine.for %i9 = #map3(%i8) to #map12(%i8) {
531 affine.for %i10 = 0 to 64 {
532 %10 = affine.apply #map14(%i9, %i10)
533 %11 = affine.apply #map15(%i9, %i10)
534 %12 = affine.apply #map16(%i9, %i10)
Andy Davis2e1187d2019-07-03 17:35:03535 %13 = affine.load %arg0[%10, %11, %12, %c0] : memref<4x4x16x1xvector<8x128xf32>>
Andy Davis0412bf62019-05-09 15:36:02536 }
537 }
538 }
539 return %arg1, %arg2 : memref<144x9xvector<8x128xf32>>, memref<2xvector<8x128xf32>>
540}
541
Julian Grosse2310702021-02-10 12:53:11542// CHECK: memref.alloc() : memref<4x4x16x1xvector<8x128xf32>, 2>
543// CHECK-NEXT: memref.alloc() : memref<1xi32>
River Riddle89bc4492019-07-09 17:40:29544// CHECK-NEXT: affine.dma_start %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}], %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}], %{{.*}}[%{{.*}}], %{{.*}} : memref<4x4x16x1xvector<8x128xf32>>, memref<4x4x16x1xvector<8x128xf32>, 2>, memref<1xi32>
545// CHECK-NEXT: affine.dma_wait %{{.*}}[%{{.*}}], %{{.*}} : memref<1xi32>
Andy Davis0412bf62019-05-09 15:36:02546
Uday Bondhugula9f2781e2019-03-12 17:52:09547// -----
548
Uday Bondhugulaf97c1c52019-02-16 01:54:49549// Since the fast memory size is 4 KB, DMA generation will happen right under
550// %i0.
551
552// FAST-MEM-16KB-LABEL: func @load_store_same_memref
553func @load_store_same_memref(%arg0: memref<256x1024xf32>) {
River Riddle89bc4492019-07-09 17:40:29554 // FAST-MEM-16KB: affine.for %{{.*}} = 0 to 256 step 4
River Riddle832567b2019-03-25 17:14:34555 affine.for %i0 = 0 to 256 step 4 {
Julian Grosse2310702021-02-10 12:53:11556 // FAST-MEM-16KB: [[BUF:%[0-9]+]] = memref.alloc() : memref<4x1024xf32, 2>
River Riddle89bc4492019-07-09 17:40:29557 // FAST-MEM-16KB: affine.dma_start %{{.*}}
Andy Davis2e1187d2019-07-03 17:35:03558 // FAST-MEM-16KB-NEXT: affine.dma_wait
River Riddle89bc4492019-07-09 17:40:29559 // FAST-MEM-16KB: affine.for %{{.*}}
River Riddle832567b2019-03-25 17:14:34560 affine.for %i1 = 0 to 1024 step 4 {
River Riddle89bc4492019-07-09 17:40:29561 // FAST-MEM-16KB: affine.for %{{.*}}
River Riddle4268e4f2020-01-13 21:12:37562 affine.for %i2 = affine_map<(d0) -> (d0)>(%i0) to affine_map<(d0) -> (d0 + 4)>(%i0) {
River Riddle89bc4492019-07-09 17:40:29563 // FAST-MEM-16KB: affine.for %{{.*}}
River Riddle4268e4f2020-01-13 21:12:37564 affine.for %i3 = affine_map<(d0) -> (d0)>(%i1) to affine_map<(d0) -> (d0 + 4)>(%i1) {
Andy Davis2e1187d2019-07-03 17:35:03565 %3 = affine.load %arg0[%i2, %i3] : memref<256x1024xf32>
Mogballa54f4ea2021-10-12 23:14:57566 %4 = arith.mulf %3, %3 : f32
Andy Davis2e1187d2019-07-03 17:35:03567 affine.store %4, %arg0[%i2, %i3] : memref<256x1024xf32>
Uday Bondhugulaf97c1c52019-02-16 01:54:49568 } // FAST-MEM-16KB: }
569 } // FAST-MEM-16KB: }
570 } // FAST-MEM-16KB: }
Andy Davis2e1187d2019-07-03 17:35:03571 // FAST-MEM-16KB: affine.dma_start [[BUF]]
572 // FAST-MEM-16KB-NEXT: affine.dma_wait
Uday Bondhugulaf97c1c52019-02-16 01:54:49573 }
574 return
575}
Uday Bondhugula5021dc42019-02-19 18:33:41576
Uday Bondhugula9f2781e2019-03-12 17:52:09577// -----
Uday Bondhugula5021dc42019-02-19 18:33:41578
579// This a 3-d loop nest tiled by 4 x 4 x 4. Under %i, %j, %k, the size of a
580// tile of arg0, arg1, and arg2 accessed is 4 KB (each), i.e., 12 KB in total.
581// With fast mem capacity set to 16 KB, the DMAs if placed under %k will fit.
582// However, the region of arg2 accessed is invariant w.r.t the %k loop unlike
583// %arg0 and %arg1. So, its DMA can be hoisted one level up and placed under
Alex Zinenko60f443b2020-05-13 10:12:30584// %j, while the DMAs for arg0 and arg1 appear right under the %k scf.
Uday Bondhugula5021dc42019-02-19 18:33:41585
River Riddle4268e4f2020-01-13 21:12:37586#map0 = affine_map<(d0) -> (d0)>
587#map1 = affine_map<(d0) -> (d0 + 4)>
Uday Bondhugula5021dc42019-02-19 18:33:41588// FAST-MEM-16KB-LABEL: func @simple_matmul
589func @simple_matmul(%arg0: memref<8x8xvector<64xf32>>, %arg1: memref<8x8xvector<64xf32>>, %arg2: memref<8x8xvector<64xf32>>) -> memref<8x8xvector<64xf32>> {
River Riddle832567b2019-03-25 17:14:34590 affine.for %i = 0 to 8 step 4 {
591 affine.for %j = 0 to 8 step 4 {
592 affine.for %k = 0 to 8 step 4 {
593 affine.for %ii = #map0(%i) to #map1(%i) {
594 affine.for %jj = #map0(%j) to #map1(%j) {
595 affine.for %kk = #map0(%k) to #map1(%k) {
Andy Davis2e1187d2019-07-03 17:35:03596 %5 = affine.load %arg0[%ii, %kk] : memref<8x8xvector<64xf32>>
597 %6 = affine.load %arg1[%kk, %jj] : memref<8x8xvector<64xf32>>
598 %7 = affine.load %arg2[%ii, %jj] : memref<8x8xvector<64xf32>>
Mogballa54f4ea2021-10-12 23:14:57599 %8 = arith.mulf %5, %6 : vector<64xf32>
600 %9 = arith.addf %7, %8 : vector<64xf32>
Andy Davis2e1187d2019-07-03 17:35:03601 affine.store %9, %arg2[%ii, %jj] : memref<8x8xvector<64xf32>>
Uday Bondhugula5021dc42019-02-19 18:33:41602 }
603 }
604 }
605 }
606 }
607 }
608 return %arg2 : memref<8x8xvector<64xf32>>
609}
River Riddle89bc4492019-07-09 17:40:29610// FAST-MEM-16KB: affine.for %{{.*}} = 0 to 8 step 4 {
611// FAST-MEM-16KB: affine.for %{{.*}} = 0 to 8 step 4 {
612// FAST-MEM-16KB: affine.dma_start %{{.*}}
Andy Davis2e1187d2019-07-03 17:35:03613// FAST-MEM-16KB: affine.dma_wait
River Riddle89bc4492019-07-09 17:40:29614// FAST-MEM-16KB: affine.for %{{.*}} = 0 to 8 step 4 {
615// FAST-MEM-16KB: affine.dma_start %{{.*}}
Andy Davis2e1187d2019-07-03 17:35:03616// FAST-MEM-16KB: affine.dma_wait
River Riddle89bc4492019-07-09 17:40:29617// FAST-MEM-16KB: affine.dma_start %{{.*}}
Andy Davis2e1187d2019-07-03 17:35:03618// FAST-MEM-16KB: affine.dma_wait
River Riddle89bc4492019-07-09 17:40:29619// FAST-MEM-16KB: affine.for %{{.*}} = #map{{[0-9]+}}(%{{.*}}) to #map{{[0-9]+}}(%{{.*}}) {
620// FAST-MEM-16KB-NEXT: affine.for %{{.*}} = #map{{[0-9]+}}(%{{.*}}) to #map{{[0-9]+}}(%{{.*}}) {
621// FAST-MEM-16KB-NEXT: affine.for %{{.*}} = #map{{[0-9]+}}(%{{.*}}) to #map{{[0-9]+}}(%{{.*}}) {
Uday Bondhugula5021dc42019-02-19 18:33:41622// FAST-MEM-16KB: }
623// FAST-MEM-16KB: }
624// FAST-MEM-16KB: }
625// FAST-MEM-16KB: }
River Riddle89bc4492019-07-09 17:40:29626// FAST-MEM-16KB: affine.dma_start %{{.*}}[%{{.*}}, %{{.*}}], %{{.*}}
Andy Davis2e1187d2019-07-03 17:35:03627// FAST-MEM-16KB: affine.dma_wait