River Riddle | 400ad6f | 2020-04-08 19:57:02 | [diff] [blame] | 1 | // RUN: mlir-opt -allow-unregistered-dialect %s -split-input-file -affine-data-copy-generate="generate-dma fast-mem-space=2 skip-non-unit-stride-loops" -verify-diagnostics | FileCheck %s |
| 2 | // RUN: mlir-opt -allow-unregistered-dialect %s -split-input-file -affine-data-copy-generate="generate-dma fast-mem-capacity=16 fast-mem-space=2" | FileCheck %s --check-prefix FAST-MEM-16KB |
Uday Bondhugula | f97c1c5 | 2019-02-16 01:54:49 | [diff] [blame] | 3 | |
Uday Bondhugula | 18b8d43 | 2019-08-01 23:31:15 | [diff] [blame] | 4 | // We run most test cases with -copy-skip-non-unit-stride-loops to allow testing |
Uday Bondhugula | f97c1c5 | 2019-02-16 01:54:49 | [diff] [blame] | 5 | // DMA generation at inner levels easily - since the DMA generation would |
| 6 | // otherwise always generate DMAs at the outermost level (default for fast mem |
| 7 | // capacity is infinite). Using a specific capacity makes it harder to write |
| 8 | // a test case as one would have to calculate total footprints. With |
Uday Bondhugula | 18b8d43 | 2019-08-01 23:31:15 | [diff] [blame] | 9 | // -copy-skip-non-unit-stride-loops, non-unit strides will always be skipped and |
Uday Bondhugula | f97c1c5 | 2019-02-16 01:54:49 | [diff] [blame] | 10 | // its inner loops will be traversed till a unit stride loop is found (or the |
| 11 | // innermost block is reached). |
Uday Bondhugula | e0623d4 | 2018-11-09 01:31:01 | [diff] [blame] | 12 | |
Andy Davis | 6254a42d | 2019-05-09 14:02:32 | [diff] [blame] | 13 | // ----- |
| 14 | |
Chris Lattner | bbf362b | 2019-01-02 18:20:00 | [diff] [blame] | 15 | // CHECK-LABEL: func @loop_nest_1d() { |
| 16 | func @loop_nest_1d() { |
Julian Gross | e231070 | 2021-02-10 12:53:11 | [diff] [blame] | 17 | %A = memref.alloc() : memref<256 x f32> |
| 18 | %B = memref.alloc() : memref<512 x f32> |
| 19 | %F = memref.alloc() : memref<256 x f32, 2> |
Uday Bondhugula | e0623d4 | 2018-11-09 01:31:01 | [diff] [blame] | 20 | // First DMA buffer. |
Julian Gross | e231070 | 2021-02-10 12:53:11 | [diff] [blame] | 21 | // CHECK: memref.alloc() : memref<256xf32> |
| 22 | // CHECK: memref.alloc() : memref<256xf32, 2> |
Uday Bondhugula | e0623d4 | 2018-11-09 01:31:01 | [diff] [blame] | 23 | // Tag for first DMA. |
Julian Gross | e231070 | 2021-02-10 12:53:11 | [diff] [blame] | 24 | // CHECK: memref.alloc() : memref<1xi32> |
Uday Bondhugula | e0623d4 | 2018-11-09 01:31:01 | [diff] [blame] | 25 | // First DMA transfer. |
River Riddle | 89bc449 | 2019-07-09 17:40:29 | [diff] [blame] | 26 | // CHECK: affine.dma_start %{{.*}}[%{{.*}}], %{{.*}}[%{{.*}}], %{{.*}}[%{{.*}}], %{{.*}} : memref<256xf32>, memref<256xf32, 2>, memref<1xi32> |
| 27 | // CHECK: affine.dma_wait %{{.*}}[%{{.*}}], %{{.*}} : memref<1xi32> |
Uday Bondhugula | e0623d4 | 2018-11-09 01:31:01 | [diff] [blame] | 28 | // Second DMA buffer. |
Julian Gross | e231070 | 2021-02-10 12:53:11 | [diff] [blame] | 29 | // CHECK: memref.alloc() : memref<256xf32, 2> |
Uday Bondhugula | e0623d4 | 2018-11-09 01:31:01 | [diff] [blame] | 30 | // Tag for second DMA. |
Julian Gross | e231070 | 2021-02-10 12:53:11 | [diff] [blame] | 31 | // CHECK: memref.alloc() : memref<1xi32> |
Uday Bondhugula | e0623d4 | 2018-11-09 01:31:01 | [diff] [blame] | 32 | // Second DMA transfer. |
River Riddle | 89bc449 | 2019-07-09 17:40:29 | [diff] [blame] | 33 | // CHECK: affine.dma_start %{{.*}}[%{{.*}}], %{{.*}}[%{{.*}}], %{{.*}}[%{{.*}}], %{{.*}} : memref<512xf32>, memref<256xf32, 2>, memref<1xi32> |
| 34 | // CHECK-NEXT: affine.dma_wait %{{.*}}[%{{.*}}], %{{.*}} : memref<1xi32> |
Uday Bondhugula | 70da33b | 2020-04-01 06:30:26 | [diff] [blame] | 35 | // CHECK: affine.for %[[IV:.*]] = 0 to 256 { |
Uday Bondhugula | 7c77163 | 2020-04-01 21:04:15 | [diff] [blame] | 36 | // CHECK-NEXT: affine.load %{{.*}}[%{{.*}}] : memref<256xf32, 2> |
Uday Bondhugula | 70da33b | 2020-04-01 06:30:26 | [diff] [blame] | 37 | // Buffer for '%{{.*}}' in faster memref space is of smaller size: 256xf32 |
| 38 | // Affine map for load on B is composed and becomes identity. |
| 39 | // CHECK: affine.load %{{.*}}[%[[IV]]] : memref<256xf32, 2> |
Uday Bondhugula | e0623d4 | 2018-11-09 01:31:01 | [diff] [blame] | 40 | // Already in faster memory space. |
Uday Bondhugula | 70da33b | 2020-04-01 06:30:26 | [diff] [blame] | 41 | // CHECK: affine.load %{{.*}}[%[[IV]]] : memref<256xf32, 2> |
Uday Bondhugula | e0623d4 | 2018-11-09 01:31:01 | [diff] [blame] | 42 | // CHECK-NEXT: } |
River Riddle | 89bc449 | 2019-07-09 17:40:29 | [diff] [blame] | 43 | // CHECK-NEXT: dealloc %{{.*}} : memref<1xi32> |
| 44 | // CHECK-NEXT: dealloc %{{.*}} : memref<256xf32, 2> |
| 45 | // CHECK-NEXT: dealloc %{{.*}} : memref<1xi32> |
| 46 | // CHECK-NEXT: dealloc %{{.*}} : memref<256xf32, 2> |
Uday Bondhugula | e0623d4 | 2018-11-09 01:31:01 | [diff] [blame] | 47 | // CHECK-NEXT: return |
River Riddle | 832567b | 2019-03-25 17:14:34 | [diff] [blame] | 48 | affine.for %i = 0 to 256 { |
Andy Davis | 2e1187d | 2019-07-03 17:35:03 | [diff] [blame] | 49 | affine.load %A[%i] : memref<256 x f32> |
River Riddle | 4268e4f | 2020-01-13 21:12:37 | [diff] [blame] | 50 | %idx = affine.apply affine_map<(d0) -> (d0 + 256)>(%i) |
Andy Davis | 2e1187d | 2019-07-03 17:35:03 | [diff] [blame] | 51 | affine.load %B[%idx] : memref<512 x f32> |
| 52 | affine.load %F[%i] : memref<256 x f32, 2> |
Uday Bondhugula | fff1efb | 2018-11-17 04:12:06 | [diff] [blame] | 53 | } |
| 54 | return |
| 55 | } |
| 56 | |
Andy Davis | 6254a42d | 2019-05-09 14:02:32 | [diff] [blame] | 57 | // ----- |
| 58 | |
Chris Lattner | bbf362b | 2019-01-02 18:20:00 | [diff] [blame] | 59 | // CHECK-LABEL: func @loop_nest_high_d |
Mogball | a54f4ea | 2021-10-12 23:14:57 | [diff] [blame^] | 60 | // CHECK: %{{.*}} = arith.constant 16384 : index |
Julian Gross | e231070 | 2021-02-10 12:53:11 | [diff] [blame] | 61 | // CHECK-DAG: [[BUFB:%[0-9]+]] = memref.alloc() : memref<512x32xf32, 2> |
| 62 | // CHECK-DAG: [[BUFA:%[0-9]+]] = memref.alloc() : memref<512x32xf32, 2> |
| 63 | // CHECK-DAG: [[BUFC:%[0-9]+]] = memref.alloc() : memref<512x32xf32, 2> |
| 64 | // CHECK-DAG: [[TAGB:%[0-9]+]] = memref.alloc() : memref<1xi32> |
| 65 | // CHECK-DAG: [[TAGA:%[0-9]+]] = memref.alloc() : memref<1xi32> |
| 66 | // CHECK-DAG: [[TAGC:%[0-9]+]] = memref.alloc() : memref<1xi32> |
| 67 | // CHECK-DAG: [[TAGC_W:%[0-9]+]] = memref.alloc() : memref<1xi32> |
Uday Bondhugula | fff1efb | 2018-11-17 04:12:06 | [diff] [blame] | 68 | // INCOMING DMA for B |
River Riddle | 89bc449 | 2019-07-09 17:40:29 | [diff] [blame] | 69 | // CHECK-DAG: affine.dma_start %{{.*}}[%{{.*}}, %{{.*}}], [[BUFB]][%{{.*}}, %{{.*}}], [[TAGB]][%{{.*}}], %{{.*}} : memref<512x32xf32>, memref<512x32xf32, 2>, memref<1xi32> |
| 70 | // CHECK-DAG: affine.dma_wait [[TAGB]][%{{.*}}], %{{.*}} : memref<1xi32> |
Uday Bondhugula | fff1efb | 2018-11-17 04:12:06 | [diff] [blame] | 71 | // INCOMING DMA for A. |
River Riddle | 89bc449 | 2019-07-09 17:40:29 | [diff] [blame] | 72 | // CHECK-DAG: affine.dma_start %{{.*}}[%{{.*}}, %{{.*}}], [[BUFA]][%{{.*}}, %{{.*}}], [[TAGA]][%{{.*}}], %{{.*}} : memref<512x32xf32>, memref<512x32xf32, 2>, memref<1xi32> |
| 73 | // CHECK-DAG: affine.dma_wait [[TAGA]][%{{.*}}], %{{.*}} : memref<1xi32> |
Uday Bondhugula | fff1efb | 2018-11-17 04:12:06 | [diff] [blame] | 74 | // INCOMING DMA for C. |
River Riddle | 89bc449 | 2019-07-09 17:40:29 | [diff] [blame] | 75 | // CHECK-DAG: affine.dma_start %{{.*}}[%{{.*}}, %{{.*}}], [[BUFC]][%{{.*}}, %{{.*}}], [[TAGC]][%{{.*}}], %{{.*}} : memref<512x32xf32>, memref<512x32xf32, 2>, memref<1xi32> |
| 76 | // CHECK-DAG: affine.dma_wait [[TAGC]][%{{.*}}], %{{.*}} : memref<1xi32> |
| 77 | // CHECK-NEXT: affine.for %{{.*}} = 0 to 32 { |
| 78 | // CHECK-NEXT: affine.for %{{.*}} = 0 to 32 { |
| 79 | // CHECK-NEXT: affine.for %{{.*}} = 0 to 32 { |
| 80 | // CHECK-NEXT: affine.for %{{.*}} = 0 to 16 { |
Uday Bondhugula | 70da33b | 2020-04-01 06:30:26 | [diff] [blame] | 81 | // CHECK: affine.load [[BUFB]][%{{.*}} * 16 + %{{.*}}, %{{.*}}] : memref<512x32xf32, 2> |
River Riddle | 89bc449 | 2019-07-09 17:40:29 | [diff] [blame] | 82 | // CHECK-NEXT: "foo"(%{{.*}}) : (f32) -> () |
Uday Bondhugula | fff1efb | 2018-11-17 04:12:06 | [diff] [blame] | 83 | // CHECK-NEXT: } |
River Riddle | 89bc449 | 2019-07-09 17:40:29 | [diff] [blame] | 84 | // CHECK-NEXT: affine.for %{{.*}} = 0 to 16 { |
Uday Bondhugula | 70da33b | 2020-04-01 06:30:26 | [diff] [blame] | 85 | // CHECK: affine.load [[BUFA]][%{{.*}} * 16 + %{{.*}}, %{{.*}}] : memref<512x32xf32, 2> |
River Riddle | 89bc449 | 2019-07-09 17:40:29 | [diff] [blame] | 86 | // CHECK-NEXT: "bar"(%{{.*}}) : (f32) -> () |
Uday Bondhugula | fff1efb | 2018-11-17 04:12:06 | [diff] [blame] | 87 | // CHECK-NEXT: } |
River Riddle | 89bc449 | 2019-07-09 17:40:29 | [diff] [blame] | 88 | // CHECK-NEXT: affine.for %{{.*}} = 0 to 16 { |
Uday Bondhugula | 7c77163 | 2020-04-01 21:04:15 | [diff] [blame] | 89 | // CHECK-NEXT: "abc_compute"() : () -> f32 |
Uday Bondhugula | 70da33b | 2020-04-01 06:30:26 | [diff] [blame] | 90 | // CHECK: affine.load [[BUFC]][%{{.*}} * 16 + %{{.*}}, %{{.*}}] : memref<512x32xf32, 2> |
Uday Bondhugula | 7c77163 | 2020-04-01 21:04:15 | [diff] [blame] | 91 | // CHECK-NEXT: "addf32"(%{{.*}}, %{{.*}}) : (f32, f32) -> f32 |
River Riddle | 89bc449 | 2019-07-09 17:40:29 | [diff] [blame] | 92 | // CHECK-NEXT: affine.store %{{.*}}, [[BUFC]][%{{.*}} * 16 + %{{.*}}, %{{.*}}] : memref<512x32xf32, 2> |
Uday Bondhugula | fff1efb | 2018-11-17 04:12:06 | [diff] [blame] | 93 | // CHECK-NEXT: } |
| 94 | // CHECK-NEXT: "foobar"() : () -> () |
| 95 | // CHECK-NEXT: } |
| 96 | // CHECK-NEXT: } |
| 97 | // CHECK-NEXT: } |
| 98 | // OUTGOING DMA for C. |
River Riddle | 89bc449 | 2019-07-09 17:40:29 | [diff] [blame] | 99 | // CHECK-NEXT: affine.dma_start [[BUFC]][%{{.*}}, %{{.*}}], %{{.*}}[%{{.*}}, %{{.*}}], [[TAGC_W]][%{{.*}}], %{{.*}} : memref<512x32xf32, 2>, memref<512x32xf32>, memref<1xi32> |
| 100 | // CHECK-NEXT: affine.dma_wait [[TAGC_W]][%{{.*}}], %{{.*}} : memref<1xi32> |
Uday Bondhugula | 8b3f841 | 2019-02-12 00:33:53 | [diff] [blame] | 101 | // CHECK-NEXT: dealloc [[TAGC_W]] : memref<1xi32> |
| 102 | // CHECK-NEXT: dealloc [[TAGC]] : memref<1xi32> |
Uday Bondhugula | 5836fae | 2019-03-06 01:19:47 | [diff] [blame] | 103 | // CHECK-NEXT: dealloc [[BUFC]] : memref<512x32xf32, 2> |
Uday Bondhugula | 8b3f841 | 2019-02-12 00:33:53 | [diff] [blame] | 104 | // CHECK-NEXT: dealloc [[TAGA]] : memref<1xi32> |
Uday Bondhugula | 5836fae | 2019-03-06 01:19:47 | [diff] [blame] | 105 | // CHECK-NEXT: dealloc [[BUFA]] : memref<512x32xf32, 2> |
Uday Bondhugula | 8b3f841 | 2019-02-12 00:33:53 | [diff] [blame] | 106 | // CHECK-NEXT: dealloc [[TAGB]] : memref<1xi32> |
Uday Bondhugula | 5836fae | 2019-03-06 01:19:47 | [diff] [blame] | 107 | // CHECK-NEXT: dealloc [[BUFB]] : memref<512x32xf32, 2> |
Uday Bondhugula | fff1efb | 2018-11-17 04:12:06 | [diff] [blame] | 108 | // CHECK-NEXT: return |
| 109 | // CHECK-NEXT:} |
Chris Lattner | bbf362b | 2019-01-02 18:20:00 | [diff] [blame] | 110 | func @loop_nest_high_d(%A: memref<512 x 32 x f32>, |
Uday Bondhugula | fff1efb | 2018-11-17 04:12:06 | [diff] [blame] | 111 | %B: memref<512 x 32 x f32>, %C: memref<512 x 32 x f32>) { |
| 112 | // DMAs will be performed at this level (jT is the first loop without a stride). |
| 113 | // A and B are read, while C is both read and written. A total of three new buffers |
| 114 | // are allocated and existing load's/store's are replaced by accesses to those buffers. |
River Riddle | 832567b | 2019-03-25 17:14:34 | [diff] [blame] | 115 | affine.for %jT = 0 to 32 { |
| 116 | affine.for %kT = 0 to 32 { |
| 117 | affine.for %iT = 0 to 32 { |
| 118 | affine.for %kk = 0 to 16 { // k intratile |
River Riddle | 4268e4f | 2020-01-13 21:12:37 | [diff] [blame] | 119 | %k = affine.apply affine_map<(d0, d1) -> (16*d0 + d1)> (%kT, %kk) |
Andy Davis | 2e1187d | 2019-07-03 17:35:03 | [diff] [blame] | 120 | %v0 = affine.load %B[%k, %jT] : memref<512 x 32 x f32> |
Uday Bondhugula | fff1efb | 2018-11-17 04:12:06 | [diff] [blame] | 121 | "foo"(%v0) : (f32) -> () |
| 122 | } |
River Riddle | 832567b | 2019-03-25 17:14:34 | [diff] [blame] | 123 | affine.for %ii = 0 to 16 { // i intratile. |
River Riddle | 4268e4f | 2020-01-13 21:12:37 | [diff] [blame] | 124 | %i = affine.apply affine_map<(d0, d1) -> (16*d0 + d1)>(%iT, %ii) |
Andy Davis | 2e1187d | 2019-07-03 17:35:03 | [diff] [blame] | 125 | %v1 = affine.load %A[%i, %kT] : memref<512 x 32 x f32> |
Uday Bondhugula | 72e5c7f | 2019-01-24 16:43:17 | [diff] [blame] | 126 | "bar"(%v1) : (f32) -> () |
Uday Bondhugula | fff1efb | 2018-11-17 04:12:06 | [diff] [blame] | 127 | } |
River Riddle | 832567b | 2019-03-25 17:14:34 | [diff] [blame] | 128 | affine.for %ii_ = 0 to 16 { // i intratile. |
Uday Bondhugula | fff1efb | 2018-11-17 04:12:06 | [diff] [blame] | 129 | %v2 = "abc_compute"() : () -> f32 |
River Riddle | 4268e4f | 2020-01-13 21:12:37 | [diff] [blame] | 130 | %i_ = affine.apply affine_map<(d0, d1) -> (16*d0 + d1)>(%iT, %ii_) |
Andy Davis | 2e1187d | 2019-07-03 17:35:03 | [diff] [blame] | 131 | %v3 = affine.load %C[%i_, %jT] : memref<512 x 32 x f32> |
Uday Bondhugula | fff1efb | 2018-11-17 04:12:06 | [diff] [blame] | 132 | %v4 = "addf32"(%v2, %v3) : (f32, f32) -> (f32) |
Andy Davis | 2e1187d | 2019-07-03 17:35:03 | [diff] [blame] | 133 | affine.store %v4, %C[%i_, %jT] : memref<512 x 32 x f32> |
Uday Bondhugula | fff1efb | 2018-11-17 04:12:06 | [diff] [blame] | 134 | } |
| 135 | "foobar"() : () -> () |
| 136 | } |
| 137 | } |
| 138 | } |
| 139 | return |
| 140 | } |
| 141 | |
Andy Davis | 6254a42d | 2019-05-09 14:02:32 | [diff] [blame] | 142 | // ----- |
| 143 | |
Uday Bondhugula | dfc752e | 2018-12-07 23:04:55 | [diff] [blame] | 144 | // A loop nest with a modulo 2 access. A strided DMA is not needed here a 1x2 |
| 145 | // region within a 256 x 8 memref. |
Uday Bondhugula | fff1efb | 2018-11-17 04:12:06 | [diff] [blame] | 146 | // |
Chris Lattner | bbf362b | 2019-01-02 18:20:00 | [diff] [blame] | 147 | // CHECK-LABEL: func @loop_nest_modulo() { |
Julian Gross | e231070 | 2021-02-10 12:53:11 | [diff] [blame] | 148 | // CHECK: memref.alloc() : memref<256x8xf32> |
River Riddle | 89bc449 | 2019-07-09 17:40:29 | [diff] [blame] | 149 | // CHECK-NEXT: affine.for %{{.*}} = 0 to 32 step 4 { |
Julian Gross | e231070 | 2021-02-10 12:53:11 | [diff] [blame] | 150 | // CHECK: memref.alloc() : memref<1x2xf32, 2> |
| 151 | // CHECK-NEXT: memref.alloc() : memref<1xi32> |
River Riddle | 89bc449 | 2019-07-09 17:40:29 | [diff] [blame] | 152 | // Composition of the affine map for '%{{.*}}' causes '%{{.*}}' to be added as a symbol. |
Uday Bondhugula | 4bb6f8e | 2019-08-29 08:13:01 | [diff] [blame] | 153 | // CHECK-NEXT: affine.dma_start %{{.*}}[%{{.*}}, 0], %{{.*}}[%{{.*}}, %{{.*}}], %{{.*}}[%{{.*}}], %{{.*}} : memref<256x8xf32>, memref<1x2xf32, 2>, memref<1xi32> |
River Riddle | 89bc449 | 2019-07-09 17:40:29 | [diff] [blame] | 154 | // CHECK-NEXT: affine.dma_wait %{{.*}}[%{{.*}}], %{{.*}} : memref<1xi32> |
| 155 | // CHECK-NEXT: affine.for %{{.*}} = 0 to 8 { |
Uday Bondhugula | a921308 | 2018-12-05 23:14:25 | [diff] [blame] | 156 | // ... |
| 157 | // ... |
| 158 | // CHECK: } |
River Riddle | 89bc449 | 2019-07-09 17:40:29 | [diff] [blame] | 159 | // CHECK-NEXT: dealloc %{{.*}} : memref<1xi32> |
| 160 | // CHECK-NEXT: dealloc %{{.*}} : memref<1x2xf32, 2> |
Uday Bondhugula | a921308 | 2018-12-05 23:14:25 | [diff] [blame] | 161 | // CHECK-NEXT: } |
| 162 | // CHECK-NEXT: return |
Chris Lattner | bbf362b | 2019-01-02 18:20:00 | [diff] [blame] | 163 | func @loop_nest_modulo() { |
Julian Gross | e231070 | 2021-02-10 12:53:11 | [diff] [blame] | 164 | %A = memref.alloc() : memref<256 x 8 x f32> |
River Riddle | 832567b | 2019-03-25 17:14:34 | [diff] [blame] | 165 | affine.for %i = 0 to 32 step 4 { |
Uday Bondhugula | fff1efb | 2018-11-17 04:12:06 | [diff] [blame] | 166 | // DMAs will be performed at this level (%j is the first unit stride loop) |
River Riddle | 832567b | 2019-03-25 17:14:34 | [diff] [blame] | 167 | affine.for %j = 0 to 8 { |
River Riddle | 4268e4f | 2020-01-13 21:12:37 | [diff] [blame] | 168 | %idx = affine.apply affine_map<(d0) -> (d0 mod 2)> (%j) |
Uday Bondhugula | fff1efb | 2018-11-17 04:12:06 | [diff] [blame] | 169 | // A buffer of size 32 x 2 will be allocated (original buffer was 256 x 8). |
Andy Davis | 2e1187d | 2019-07-03 17:35:03 | [diff] [blame] | 170 | %v = affine.load %A[%i, %idx] : memref<256 x 8 x f32> |
Uday Bondhugula | fff1efb | 2018-11-17 04:12:06 | [diff] [blame] | 171 | } |
Uday Bondhugula | e0623d4 | 2018-11-09 01:31:01 | [diff] [blame] | 172 | } |
| 173 | return |
| 174 | } |
Uday Bondhugula | 2631b15 | 2018-11-21 19:12:05 | [diff] [blame] | 175 | |
Andy Davis | 6254a42d | 2019-05-09 14:02:32 | [diff] [blame] | 176 | // ----- |
| 177 | |
Uday Bondhugula | 2631b15 | 2018-11-21 19:12:05 | [diff] [blame] | 178 | // DMA on tiled loop nest. This also tests the case where the bounds are |
| 179 | // dependent on outer loop IVs. |
Chris Lattner | bbf362b | 2019-01-02 18:20:00 | [diff] [blame] | 180 | // CHECK-LABEL: func @loop_nest_tiled() -> memref<256x1024xf32> { |
| 181 | func @loop_nest_tiled() -> memref<256x1024xf32> { |
Julian Gross | e231070 | 2021-02-10 12:53:11 | [diff] [blame] | 182 | %0 = memref.alloc() : memref<256x1024xf32> |
River Riddle | 832567b | 2019-03-25 17:14:34 | [diff] [blame] | 183 | affine.for %i0 = 0 to 256 step 32 { |
| 184 | affine.for %i1 = 0 to 1024 step 32 { |
Julian Gross | e231070 | 2021-02-10 12:53:11 | [diff] [blame] | 185 | // CHECK: memref.alloc() : memref<32x32xf32, 2> |
| 186 | // CHECK-NEXT: memref.alloc() : memref<1xi32> |
Uday Bondhugula | dfc752e | 2018-12-07 23:04:55 | [diff] [blame] | 187 | // Strided DMA here: 32 x 32 tile in a 256 x 1024 memref. |
River Riddle | 89bc449 | 2019-07-09 17:40:29 | [diff] [blame] | 188 | // CHECK-NEXT: affine.dma_start %{{.*}}[%{{.*}}, %{{.*}}], %{{.*}}[%{{.*}}, %{{.*}}], %{{.*}}[%{{.*}}], %{{.*}}, %{{.*}}, %{{.*}} : memref<256x1024xf32>, memref<32x32xf32, 2>, memref<1xi32> |
Andy Davis | 2e1187d | 2019-07-03 17:35:03 | [diff] [blame] | 189 | // CHECK-NEXT: affine.dma_wait |
River Riddle | 89bc449 | 2019-07-09 17:40:29 | [diff] [blame] | 190 | // CHECK-NEXT: affine.for %{{.*}} = #map |
| 191 | // CHECK-NEXT: affine.for %{{.*}} = #map |
River Riddle | 4268e4f | 2020-01-13 21:12:37 | [diff] [blame] | 192 | affine.for %i2 = affine_map<(d0) -> (d0)>(%i0) to affine_map<(d0) -> (d0 + 32)>(%i0) { |
| 193 | affine.for %i3 = affine_map<(d0) -> (d0)>(%i1) to affine_map<(d0) -> (d0 + 32)>(%i1) { |
Uday Bondhugula | 7c77163 | 2020-04-01 21:04:15 | [diff] [blame] | 194 | // CHECK: affine.load %{{.*}}[-%{{.*}} + %{{.*}}, -%{{.*}} + %{{.*}}] : memref<32x32xf32, 2> |
Andy Davis | 2e1187d | 2019-07-03 17:35:03 | [diff] [blame] | 195 | %1 = affine.load %0[%i2, %i3] : memref<256x1024xf32> |
Uday Bondhugula | 2631b15 | 2018-11-21 19:12:05 | [diff] [blame] | 196 | } // CHECK-NEXT: } |
| 197 | } |
| 198 | } |
| 199 | } |
Uday Bondhugula | 5f76245 | 2018-12-03 19:15:24 | [diff] [blame] | 200 | return %0 : memref<256x1024xf32> |
Uday Bondhugula | 2631b15 | 2018-11-21 19:12:05 | [diff] [blame] | 201 | } |
Uday Bondhugula | a921308 | 2018-12-05 23:14:25 | [diff] [blame] | 202 | |
Andy Davis | 6254a42d | 2019-05-09 14:02:32 | [diff] [blame] | 203 | // ----- |
| 204 | |
Chris Lattner | bbf362b | 2019-01-02 18:20:00 | [diff] [blame] | 205 | // CHECK-LABEL: func @dma_constant_dim_access |
| 206 | func @dma_constant_dim_access(%A : memref<100x100xf32>) { |
Mogball | a54f4ea | 2021-10-12 23:14:57 | [diff] [blame^] | 207 | %one = arith.constant 1 : index |
| 208 | %N = arith.constant 100 : index |
Julian Gross | e231070 | 2021-02-10 12:53:11 | [diff] [blame] | 209 | // CHECK: memref.alloc() : memref<1x100xf32, 2> |
| 210 | // CHECK-NEXT: memref.alloc() : memref<1xi32> |
Uday Bondhugula | dfc752e | 2018-12-07 23:04:55 | [diff] [blame] | 211 | // No strided DMA needed here. |
River Riddle | 89bc449 | 2019-07-09 17:40:29 | [diff] [blame] | 212 | // CHECK: affine.dma_start %{{.*}}[%{{.*}}, %{{.*}}], %{{.*}}[%{{.*}}, %{{.*}}], %{{.*}}[%{{.*}}], %{{.*}} : memref<100x100xf32>, memref<1x100xf32, 2>, |
| 213 | // CHECK-NEXT: affine.dma_wait %{{.*}}[%{{.*}}], %{{.*}} : memref<1xi32> |
River Riddle | 832567b | 2019-03-25 17:14:34 | [diff] [blame] | 214 | affine.for %i = 0 to 100 { |
River Riddle | 4268e4f | 2020-01-13 21:12:37 | [diff] [blame] | 215 | affine.for %j = 0 to affine_map<()[s0] -> (s0)> ()[%N] { |
Uday Bondhugula | 7c77163 | 2020-04-01 21:04:15 | [diff] [blame] | 216 | // CHECK: affine.load %{{.*}}[0, %{{.*}}] : memref<1x100xf32, 2> |
Andy Davis | 2e1187d | 2019-07-03 17:35:03 | [diff] [blame] | 217 | affine.load %A[%one, %j] : memref<100 x 100 x f32> |
Uday Bondhugula | a921308 | 2018-12-05 23:14:25 | [diff] [blame] | 218 | } |
| 219 | } |
| 220 | return |
| 221 | } |
| 222 | |
Andy Davis | 6254a42d | 2019-05-09 14:02:32 | [diff] [blame] | 223 | // ----- |
| 224 | |
Chris Lattner | bbf362b | 2019-01-02 18:20:00 | [diff] [blame] | 225 | // CHECK-LABEL: func @dma_with_symbolic_accesses |
| 226 | func @dma_with_symbolic_accesses(%A : memref<100x100xf32>, %M : index) { |
Mogball | a54f4ea | 2021-10-12 23:14:57 | [diff] [blame^] | 227 | %N = arith.constant 9 : index |
River Riddle | 832567b | 2019-03-25 17:14:34 | [diff] [blame] | 228 | affine.for %i = 0 to 100 { |
| 229 | affine.for %j = 0 to 100 { |
River Riddle | 4268e4f | 2020-01-13 21:12:37 | [diff] [blame] | 230 | %idy = affine.apply affine_map<(d0, d1) [s0, s1] -> (d1 + s0 + s1)>(%i, %j)[%M, %N] |
Andy Davis | 2e1187d | 2019-07-03 17:35:03 | [diff] [blame] | 231 | affine.load %A[%i, %idy] : memref<100 x 100 x f32> |
Uday Bondhugula | a921308 | 2018-12-05 23:14:25 | [diff] [blame] | 232 | } |
| 233 | } |
| 234 | return |
Julian Gross | e231070 | 2021-02-10 12:53:11 | [diff] [blame] | 235 | // CHECK: memref.alloc() : memref<100x100xf32, 2> |
| 236 | // CHECK-NEXT: memref.alloc() : memref<1xi32> |
Uday Bondhugula | 4bb6f8e | 2019-08-29 08:13:01 | [diff] [blame] | 237 | // CHECK-NEXT: affine.dma_start %{{.*}}[0, symbol(%{{.*}}) + 9], %{{.*}}[%{{.*}}, %{{.*}}], %{{.*}}[%{{.*}}], %{{.*}} |
River Riddle | 89bc449 | 2019-07-09 17:40:29 | [diff] [blame] | 238 | // CHECK-NEXT: affine.dma_wait %{{.*}}[%{{.*}}], %{{.*}} |
Uday Bondhugula | 70da33b | 2020-04-01 06:30:26 | [diff] [blame] | 239 | // CHECK-NEXT: affine.for %[[IV0:.*]] = 0 to 100 { |
| 240 | // CHECK-NEXT: affine.for %[[IV1:.*]] = 0 to 100 { |
| 241 | // CHECK: affine.load %{{.*}}[%[[IV0]], %[[IV1]]] : memref<100x100xf32, 2> |
Uday Bondhugula | a921308 | 2018-12-05 23:14:25 | [diff] [blame] | 242 | // CHECK-NEXT: } |
| 243 | // CHECK-NEXT: } |
Uday Bondhugula | 8b3f841 | 2019-02-12 00:33:53 | [diff] [blame] | 244 | // CHECK: return |
Uday Bondhugula | a921308 | 2018-12-05 23:14:25 | [diff] [blame] | 245 | } |
Uday Bondhugula | dfc752e | 2018-12-07 23:04:55 | [diff] [blame] | 246 | |
Andy Davis | 6254a42d | 2019-05-09 14:02:32 | [diff] [blame] | 247 | // ----- |
| 248 | |
Chris Lattner | bbf362b | 2019-01-02 18:20:00 | [diff] [blame] | 249 | // CHECK-LABEL: func @dma_with_symbolic_loop_bounds |
| 250 | func @dma_with_symbolic_loop_bounds(%A : memref<100x100xf32>, %M : index, %N: index) { |
Mogball | a54f4ea | 2021-10-12 23:14:57 | [diff] [blame^] | 251 | %K = arith.constant 9 : index |
Uday Bondhugula | dfc752e | 2018-12-07 23:04:55 | [diff] [blame] | 252 | // The buffer size can't be bound by a constant smaller than the original |
| 253 | // memref size; so the DMA buffer is the entire 100x100. |
Julian Gross | e231070 | 2021-02-10 12:53:11 | [diff] [blame] | 254 | // CHECK: memref.alloc() : memref<100x100xf32, 2> |
| 255 | // CHECK-NEXT: memref.alloc() : memref<1xi32> |
River Riddle | 89bc449 | 2019-07-09 17:40:29 | [diff] [blame] | 256 | // CHECK-NEXT: affine.dma_start %{{.*}}[%{{.*}}, %{{.*}}], %{{.*}}[%{{.*}}, %{{.*}}], %{{.*}}[%{{.*}}], %{{.*}} : memref<100x100xf32>, memref<100x100xf32, 2>, memref<1xi32> |
| 257 | // CHECK-NEXT: affine.dma_wait %{{.*}}[%{{.*}}], %{{.*}} : memref<1xi32> |
River Riddle | 832567b | 2019-03-25 17:14:34 | [diff] [blame] | 258 | affine.for %i = 0 to 100 { |
| 259 | affine.for %j = %M to %N { |
River Riddle | 4268e4f | 2020-01-13 21:12:37 | [diff] [blame] | 260 | %idy = affine.apply affine_map<(d1) [s0] -> (d1 + s0)>(%j)[%K] |
Andy Davis | 2e1187d | 2019-07-03 17:35:03 | [diff] [blame] | 261 | affine.load %A[%i, %idy] : memref<100 x 100 x f32> |
Uday Bondhugula | dfc752e | 2018-12-07 23:04:55 | [diff] [blame] | 262 | } |
| 263 | } |
| 264 | return |
| 265 | } |
| 266 | |
Uday Bondhugula | 72e5c7f | 2019-01-24 16:43:17 | [diff] [blame] | 267 | // ----- |
| 268 | |
Chris Lattner | bbf362b | 2019-01-02 18:20:00 | [diff] [blame] | 269 | // CHECK-LABEL: func @dma_unknown_size |
| 270 | func @dma_unknown_size(%arg0: memref<?x?xf32>) { |
Mogball | a54f4ea | 2021-10-12 23:14:57 | [diff] [blame^] | 271 | %c0 = arith.constant 0 : index |
Julian Gross | e231070 | 2021-02-10 12:53:11 | [diff] [blame] | 272 | %M = memref.dim %arg0, %c0 : memref<? x ? x f32> |
| 273 | %N = memref.dim %arg0, %c0 : memref<? x ? x f32> |
River Riddle | 832567b | 2019-03-25 17:14:34 | [diff] [blame] | 274 | affine.for %i = 0 to %M { |
| 275 | affine.for %j = 0 to %N { |
Uday Bondhugula | 72e5c7f | 2019-01-24 16:43:17 | [diff] [blame] | 276 | // If this loop nest isn't tiled, the access requires a non-constant DMA |
| 277 | // size -- not yet implemented. |
Uday Bondhugula | 7c77163 | 2020-04-01 21:04:15 | [diff] [blame] | 278 | // CHECK: affine.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<?x?xf32> |
Andy Davis | 2e1187d | 2019-07-03 17:35:03 | [diff] [blame] | 279 | affine.load %arg0[%i, %j] : memref<? x ? x f32> |
Uday Bondhugula | 18b8d43 | 2019-08-01 23:31:15 | [diff] [blame] | 280 | // expected-error@-6 {{copy generation failed for one or more memref's in this block}} |
Uday Bondhugula | dfc752e | 2018-12-07 23:04:55 | [diff] [blame] | 281 | } |
| 282 | } |
| 283 | return |
| 284 | } |
| 285 | |
Uday Bondhugula | 72e5c7f | 2019-01-24 16:43:17 | [diff] [blame] | 286 | // ----- |
| 287 | |
Chris Lattner | bbf362b | 2019-01-02 18:20:00 | [diff] [blame] | 288 | // CHECK-LABEL: func @dma_memref_3d |
| 289 | func @dma_memref_3d(%arg0: memref<1024x1024x1024xf32>) { |
River Riddle | 832567b | 2019-03-25 17:14:34 | [diff] [blame] | 290 | affine.for %i = 0 to 1024 { |
| 291 | affine.for %j = 0 to 1024 { |
| 292 | affine.for %k = 0 to 1024 { |
River Riddle | 4268e4f | 2020-01-13 21:12:37 | [diff] [blame] | 293 | %idx = affine.apply affine_map<(d0) -> (d0 mod 128)>(%i) |
| 294 | %idy = affine.apply affine_map<(d0) -> (d0 mod 128)>(%j) |
| 295 | %idz = affine.apply affine_map<(d0) -> (d0 mod 128)>(%k) |
Uday Bondhugula | dfc752e | 2018-12-07 23:04:55 | [diff] [blame] | 296 | // DMA with nested striding (or emulating with loop around strided DMA) |
| 297 | // not yet implemented. |
Uday Bondhugula | 7c77163 | 2020-04-01 21:04:15 | [diff] [blame] | 298 | // CHECK: affine.load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref<1024x1024x1024xf32> |
Andy Davis | 2e1187d | 2019-07-03 17:35:03 | [diff] [blame] | 299 | %v = affine.load %arg0[%idx, %idy, %idz] : memref<1024 x 1024 x 1024 x f32> |
Uday Bondhugula | 18b8d43 | 2019-08-01 23:31:15 | [diff] [blame] | 300 | // expected-error@-10 {{copy generation failed for one or more memref's in this block}} |
Uday Bondhugula | dfc752e | 2018-12-07 23:04:55 | [diff] [blame] | 301 | } |
| 302 | } |
| 303 | } |
| 304 | return |
| 305 | } |
Uday Bondhugula | f94b15c | 2019-01-25 06:10:53 | [diff] [blame] | 306 | |
| 307 | // ----- |
| 308 | |
Uday Bondhugula | f94b15c | 2019-01-25 06:10:53 | [diff] [blame] | 309 | // The first load accesses ([2,258), [128,384)) |
| 310 | // The second load accesses ([64,320), [2,258)) |
| 311 | // The first store writes to ([2,258), [192,448)) |
| 312 | // The second store writes to ([128,320), [2,258)) |
| 313 | // The union of all these regions is of size 318 x 446 and has its origin at (2, |
| 314 | // 2), i.e., the window ([2,320), [2,448)) in the original space. |
| 315 | |
| 316 | // CHECK-LABEL: func @multi_load_store_union() { |
| 317 | func @multi_load_store_union() { |
Julian Gross | e231070 | 2021-02-10 12:53:11 | [diff] [blame] | 318 | %A = memref.alloc() : memref<512 x 512 x f32> |
River Riddle | 832567b | 2019-03-25 17:14:34 | [diff] [blame] | 319 | affine.for %i = 0 to 256 { |
| 320 | affine.for %j = 0 to 256 { |
River Riddle | 4268e4f | 2020-01-13 21:12:37 | [diff] [blame] | 321 | %idx = affine.apply affine_map<(d0) -> (d0 + 64)>(%i) |
| 322 | %idy = affine.apply affine_map<(d0) -> (d0 + 128)>(%j) |
| 323 | %ishift = affine.apply affine_map<(d0) -> (d0 + 2)>(%i) |
| 324 | %jshift = affine.apply affine_map<(d0) -> (d0 + 2)>(%j) |
Uday Bondhugula | f94b15c | 2019-01-25 06:10:53 | [diff] [blame] | 325 | |
Andy Davis | 2e1187d | 2019-07-03 17:35:03 | [diff] [blame] | 326 | %u = affine.load %A[%ishift, %idy] : memref<512 x 512 x f32> |
| 327 | %v = affine.load %A[%idx, %jshift] : memref<512 x 512 x f32> |
Uday Bondhugula | f94b15c | 2019-01-25 06:10:53 | [diff] [blame] | 328 | |
River Riddle | 4268e4f | 2020-01-13 21:12:37 | [diff] [blame] | 329 | %sidx = affine.apply affine_map<(d0) -> (d0 + 128)>(%i) |
| 330 | %sidy = affine.apply affine_map<(d0) -> (d0 + 192)>(%j) |
Uday Bondhugula | f94b15c | 2019-01-25 06:10:53 | [diff] [blame] | 331 | |
Andy Davis | 2e1187d | 2019-07-03 17:35:03 | [diff] [blame] | 332 | affine.store %u, %A[%ishift, %sidy] : memref<512 x 512 x f32> |
| 333 | affine.store %v, %A[%sidx, %jshift] : memref<512 x 512 x f32> |
Uday Bondhugula | f94b15c | 2019-01-25 06:10:53 | [diff] [blame] | 334 | } |
| 335 | } |
| 336 | return |
| 337 | } |
Julian Gross | e231070 | 2021-02-10 12:53:11 | [diff] [blame] | 338 | // CHECK: memref.alloc() : memref<512x512xf32> |
| 339 | // CHECK-NEXT: memref.alloc() : memref<382x446xf32, 2> |
| 340 | // CHECK-NEXT: memref.alloc() : memref<1xi32> |
River Riddle | 89bc449 | 2019-07-09 17:40:29 | [diff] [blame] | 341 | // CHECK-NEXT: affine.dma_start %{{.*}}[%{{.*}}, %{{.*}}], %{{.*}}[%{{.*}}, %{{.*}}], %{{.*}}[%{{.*}}], %{{.*}}, %{{.*}}, %{{.*}} : memref<512x512xf32>, memref<382x446xf32, 2>, memref<1xi32> |
| 342 | // CHECK-NEXT: affine.dma_wait %{{.*}}[%{{.*}}], %{{.*}} : memref<1xi32> |
Julian Gross | e231070 | 2021-02-10 12:53:11 | [diff] [blame] | 343 | // CHECK-NEXT: memref.alloc() : memref<1xi32> |
River Riddle | 89bc449 | 2019-07-09 17:40:29 | [diff] [blame] | 344 | // CHECK-NEXT: affine.for %{{.*}} = 0 to 256 { |
| 345 | // CHECK-NEXT: affine.for %{{.*}} = 0 to 256 { |
Uday Bondhugula | 70da33b | 2020-04-01 06:30:26 | [diff] [blame] | 346 | // CHECK: affine.load %{{.*}}[%{{.*}}, %{{.*}} + 126] : memref<382x446xf32, 2> |
Uday Bondhugula | 7c77163 | 2020-04-01 21:04:15 | [diff] [blame] | 347 | // CHECK-NEXT: affine.load %{{.*}}[%{{.*}} + 62, %{{.*}}] : memref<382x446xf32, 2> |
Uday Bondhugula | 70da33b | 2020-04-01 06:30:26 | [diff] [blame] | 348 | // CHECK: affine.store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}} + 190] : memref<382x446xf32, 2> |
River Riddle | 89bc449 | 2019-07-09 17:40:29 | [diff] [blame] | 349 | // CHECK-NEXT: affine.store %{{.*}}, %{{.*}}[%{{.*}} + 126, %{{.*}}] : memref<382x446xf32, 2> |
Uday Bondhugula | f94b15c | 2019-01-25 06:10:53 | [diff] [blame] | 350 | // CHECK-NEXT: } |
| 351 | // CHECK-NEXT: } |
River Riddle | 89bc449 | 2019-07-09 17:40:29 | [diff] [blame] | 352 | // CHECK-NEXT: affine.dma_start %{{.*}}[%{{.*}}, %{{.*}}], %{{.*}}[%{{.*}}, %{{.*}}], %{{.*}}[%{{.*}}], %{{.*}}, %{{.*}}, %{{.*}} : memref<382x446xf32, 2>, memref<512x512xf32>, memref<1xi32> |
| 353 | // CHECK-NEXT: affine.dma_wait %{{.*}}[%{{.*}}], %{{.*}} : memref<1xi32> |
| 354 | // CHECK-NEXT: dealloc %{{.*}} : memref<1xi32> |
| 355 | // CHECK-NEXT: dealloc %{{.*}} : memref<1xi32> |
| 356 | // CHECK-NEXT: dealloc %{{.*}} : memref<382x446xf32, 2> |
Uday Bondhugula | f94b15c | 2019-01-25 06:10:53 | [diff] [blame] | 357 | // CHECK-NEXT: return |
| 358 | // CHECK-NEXT:} |
Uday Bondhugula | b26900d | 2019-02-04 15:58:42 | [diff] [blame] | 359 | |
| 360 | // ----- |
| 361 | |
Uday Bondhugula | b26900d | 2019-02-04 15:58:42 | [diff] [blame] | 362 | // CHECK-LABEL: func @dma_loop_straightline_interspersed() { |
| 363 | func @dma_loop_straightline_interspersed() { |
Mogball | a54f4ea | 2021-10-12 23:14:57 | [diff] [blame^] | 364 | %c0 = arith.constant 0 : index |
| 365 | %c255 = arith.constant 255 : index |
Julian Gross | e231070 | 2021-02-10 12:53:11 | [diff] [blame] | 366 | %A = memref.alloc() : memref<256 x f32> |
Andy Davis | 2e1187d | 2019-07-03 17:35:03 | [diff] [blame] | 367 | %v = affine.load %A[%c0] : memref<256 x f32> |
River Riddle | 832567b | 2019-03-25 17:14:34 | [diff] [blame] | 368 | affine.for %i = 1 to 255 { |
Andy Davis | 2e1187d | 2019-07-03 17:35:03 | [diff] [blame] | 369 | affine.load %A[%i] : memref<256 x f32> |
Uday Bondhugula | b26900d | 2019-02-04 15:58:42 | [diff] [blame] | 370 | } |
Andy Davis | 2e1187d | 2019-07-03 17:35:03 | [diff] [blame] | 371 | %l = affine.load %A[%c255] : memref<256 x f32> |
| 372 | affine.store %l, %A[%c0] : memref<256 x f32> |
Uday Bondhugula | b26900d | 2019-02-04 15:58:42 | [diff] [blame] | 373 | return |
| 374 | } |
| 375 | // There are three regions here - the 'load' preceding the loop, the loop |
Alex Zinenko | 60f443b | 2020-05-13 10:12:30 | [diff] [blame] | 376 | // itself, and the operations appearing after the scf. |
Julian Gross | e231070 | 2021-02-10 12:53:11 | [diff] [blame] | 377 | // CHECK: memref.alloc() : memref<256xf32> |
| 378 | // CHECK-NEXT: memref.alloc() : memref<1xf32, 2> |
| 379 | // CHECK-NEXT: memref.alloc() : memref<1xi32> |
River Riddle | 89bc449 | 2019-07-09 17:40:29 | [diff] [blame] | 380 | // CHECK-NEXT: affine.dma_start %{{.*}}[%{{.*}}], %{{.*}}[%{{.*}}], %{{.*}}[%{{.*}}], %{{.*}} : memref<256xf32>, memref<1xf32, 2>, memref<1xi32> |
| 381 | // CHECK-NEXT: affine.dma_wait %{{.*}}[%{{.*}}], %{{.*}} : memref<1xi32> |
Uday Bondhugula | 7c77163 | 2020-04-01 21:04:15 | [diff] [blame] | 382 | // CHECK-NEXT: affine.load %{{.*}}[0] : memref<1xf32, 2> |
River Riddle | 89bc449 | 2019-07-09 17:40:29 | [diff] [blame] | 383 | // CHECK-NEXT: dealloc %{{.*}} : memref<1xi32> |
| 384 | // CHECK-NEXT: dealloc %{{.*}} : memref<1xf32, 2> |
Julian Gross | e231070 | 2021-02-10 12:53:11 | [diff] [blame] | 385 | // CHECK-NEXT: memref.alloc() : memref<254xf32, 2> |
| 386 | // CHECK-NEXT: memref.alloc() : memref<1xi32> |
River Riddle | 89bc449 | 2019-07-09 17:40:29 | [diff] [blame] | 387 | // CHECK-NEXT: affine.dma_start %{{.*}}[%{{.*}}], %{{.*}}[%{{.*}}], %{{.*}}[%{{.*}}], %{{.*}} : memref<256xf32>, memref<254xf32, 2>, memref<1xi32> |
| 388 | // CHECK-NEXT: affine.dma_wait %{{.*}}[%{{.*}}], %{{.*}} : memref<1xi32> |
| 389 | // CHECK-NEXT: affine.for %{{.*}} = 1 to 255 { |
Uday Bondhugula | 7c77163 | 2020-04-01 21:04:15 | [diff] [blame] | 390 | // CHECK-NEXT: affine.load %{{.*}}[%{{.*}} - 1] : memref<254xf32, 2> |
Uday Bondhugula | b26900d | 2019-02-04 15:58:42 | [diff] [blame] | 391 | // CHECK-NEXT: } |
River Riddle | 89bc449 | 2019-07-09 17:40:29 | [diff] [blame] | 392 | // CHECK-NEXT: dealloc %{{.*}} : memref<1xi32> |
| 393 | // CHECK-NEXT: dealloc %{{.*}} : memref<254xf32, 2> |
Julian Gross | e231070 | 2021-02-10 12:53:11 | [diff] [blame] | 394 | // CHECK-NEXT: memref.alloc() : memref<256xf32, 2> |
| 395 | // CHECK-NEXT: memref.alloc() : memref<1xi32> |
River Riddle | 89bc449 | 2019-07-09 17:40:29 | [diff] [blame] | 396 | // CHECK-NEXT: affine.dma_start %{{.*}}[%{{.*}}], %{{.*}}[%{{.*}}], %{{.*}}[%{{.*}}], %{{.*}} : memref<256xf32>, memref<256xf32, 2>, memref<1xi32> |
| 397 | // CHECK-NEXT: affine.dma_wait %{{.*}}[%{{.*}}], %{{.*}} : memref<1xi32> |
Julian Gross | e231070 | 2021-02-10 12:53:11 | [diff] [blame] | 398 | // CHECK-NEXT: memref.alloc() : memref<1xi32> |
Uday Bondhugula | 7c77163 | 2020-04-01 21:04:15 | [diff] [blame] | 399 | // CHECK-NEXT: affine.load %{{.*}}[255] : memref<256xf32, 2> |
Uday Bondhugula | 4bb6f8e | 2019-08-29 08:13:01 | [diff] [blame] | 400 | // CHECK-NEXT: affine.store %{{.*}}, %{{.*}}[0] : memref<256xf32, 2> |
River Riddle | 89bc449 | 2019-07-09 17:40:29 | [diff] [blame] | 401 | // CHECK-NEXT: affine.dma_start %{{.*}}[%{{.*}}], %{{.*}}[%{{.*}}], %{{.*}}[%{{.*}}], %{{.*}} : memref<256xf32, 2>, memref<256xf32>, memref<1xi32> |
| 402 | // CHECK-NEXT: affine.dma_wait %{{.*}}[%{{.*}}], %{{.*}} : memref<1xi32> |
| 403 | // CHECK-NEXT: dealloc %{{.*}} : memref<1xi32> |
| 404 | // CHECK-NEXT: dealloc %{{.*}} : memref<1xi32> |
| 405 | // CHECK-NEXT: dealloc %{{.*}} : memref<256xf32, 2> |
Uday Bondhugula | b26900d | 2019-02-04 15:58:42 | [diff] [blame] | 406 | // CHECK-NEXT: return |
| 407 | |
| 408 | // ----- |
| 409 | |
| 410 | // CHECK-LABEL: func @dma_mixed_loop_blocks() { |
| 411 | func @dma_mixed_loop_blocks() { |
Mogball | a54f4ea | 2021-10-12 23:14:57 | [diff] [blame^] | 412 | %c0 = arith.constant 0 : index |
Julian Gross | e231070 | 2021-02-10 12:53:11 | [diff] [blame] | 413 | %A = memref.alloc() : memref<256 x 256 x vector<8 x f32>> |
River Riddle | 832567b | 2019-03-25 17:14:34 | [diff] [blame] | 414 | affine.for %i = 0 to 256 { |
Andy Davis | 2e1187d | 2019-07-03 17:35:03 | [diff] [blame] | 415 | %v = affine.load %A[%c0, %c0] : memref<256 x 256 x vector<8 x f32>> |
Uday Bondhugula | b26900d | 2019-02-04 15:58:42 | [diff] [blame] | 416 | "foo"(%v) : (vector<8 x f32>) -> () |
River Riddle | 832567b | 2019-03-25 17:14:34 | [diff] [blame] | 417 | affine.for %j = 0 to 256 { |
Andy Davis | 2e1187d | 2019-07-03 17:35:03 | [diff] [blame] | 418 | %w = affine.load %A[%i, %j] : memref<256 x 256 x vector<8 x f32>> |
Uday Bondhugula | b26900d | 2019-02-04 15:58:42 | [diff] [blame] | 419 | "bar"(%w) : (vector<8 x f32>) -> () |
| 420 | } |
| 421 | } |
| 422 | return |
| 423 | } |
Julian Gross | e231070 | 2021-02-10 12:53:11 | [diff] [blame] | 424 | // CHECK-DAG: [[MEM:%[0-9]+]] = memref.alloc() : memref<256x256xvector<8xf32>> |
| 425 | // CHECK-DAG: [[BUF:%[0-9]+]] = memref.alloc() : memref<256x256xvector<8xf32>, 2> |
| 426 | // CHECK-DAG: [[TAG:%[0-9]+]] = memref.alloc() : memref<1xi32> |
River Riddle | 89bc449 | 2019-07-09 17:40:29 | [diff] [blame] | 427 | // CHECK: affine.dma_start [[MEM]][%{{.*}}, %{{.*}}], [[BUF]][%{{.*}}, %{{.*}}], [[TAG]][%{{.*}}], %{{.*}} : memref<256x256xvector<8xf32>>, memref<256x256xvector<8xf32>, 2>, memref<1xi32> |
| 428 | // CHECK-NEXT: affine.dma_wait [[TAG]][%{{.*}}], %{{.*}} : memref<1xi32> |
| 429 | // CHECK-NEXT: affine.for %{{.*}} = 0 to 256 { |
Uday Bondhugula | 7c77163 | 2020-04-01 21:04:15 | [diff] [blame] | 430 | // CHECK: affine.load [[BUF]][0, 0] : memref<256x256xvector<8xf32>, 2> |
River Riddle | 89bc449 | 2019-07-09 17:40:29 | [diff] [blame] | 431 | // CHECK: affine.for %{{.*}} = 0 to 256 { |
Uday Bondhugula | 7c77163 | 2020-04-01 21:04:15 | [diff] [blame] | 432 | // CHECK-NEXT: affine.load [[BUF]][%{{.*}}, %{{.*}}] : memref<256x256xvector<8xf32>, 2> |
Uday Bondhugula | f5eed89 | 2019-02-11 23:43:26 | [diff] [blame] | 433 | |
| 434 | // ----- |
| 435 | |
| 436 | // CHECK-LABEL: func @relative_loop_bounds |
Uday Bondhugula | f97c1c5 | 2019-02-16 01:54:49 | [diff] [blame] | 437 | func @relative_loop_bounds(%arg0: memref<1027xf32>) { |
River Riddle | 832567b | 2019-03-25 17:14:34 | [diff] [blame] | 438 | affine.for %i0 = 0 to 1024 { |
River Riddle | 4268e4f | 2020-01-13 21:12:37 | [diff] [blame] | 439 | affine.for %i2 = affine_map<(d0) -> (d0)>(%i0) to affine_map<(d0) -> (d0 + 4)>(%i0) { |
Mogball | a54f4ea | 2021-10-12 23:14:57 | [diff] [blame^] | 440 | %0 = arith.constant 0.0 : f32 |
Andy Davis | 2e1187d | 2019-07-03 17:35:03 | [diff] [blame] | 441 | affine.store %0, %arg0[%i2] : memref<1027xf32> |
Uday Bondhugula | f5eed89 | 2019-02-11 23:43:26 | [diff] [blame] | 442 | } |
| 443 | } |
| 444 | return |
| 445 | } |
Julian Gross | e231070 | 2021-02-10 12:53:11 | [diff] [blame] | 446 | // CHECK: [[BUF:%[0-9]+]] = memref.alloc() : memref<1027xf32, 2> |
| 447 | // CHECK-NEXT: [[MEM:%[0-9]+]] = memref.alloc() : memref<1xi32> |
River Riddle | 89bc449 | 2019-07-09 17:40:29 | [diff] [blame] | 448 | // CHECK-NEXT: affine.for %{{.*}} = 0 to 1024 { |
Uday Bondhugula | 70da33b | 2020-04-01 06:30:26 | [diff] [blame] | 449 | // CHECK-NEXT: affine.for %[[I2:.*]] = {{#map[0-9]+}}(%{{.*}}) to {{#map[0-9]+}}(%{{.*}}) { |
| 450 | // CHECK: affine.store %{{.*}}, [[BUF]][%[[I2]]] : memref<1027xf32, 2> |
Uday Bondhugula | f5eed89 | 2019-02-11 23:43:26 | [diff] [blame] | 451 | // CHECK-NEXT: } |
| 452 | // CHECK-NEXT: } |
River Riddle | 89bc449 | 2019-07-09 17:40:29 | [diff] [blame] | 453 | // CHECK-NEXT: affine.dma_start [[BUF]][%{{.*}}], %{{.*}}[%{{.*}}], [[MEM]][%{{.*}}], %{{.*}} : memref<1027xf32, 2>, memref<1027xf32>, memref<1xi32> |
| 454 | // CHECK-NEXT: affine.dma_wait [[MEM]][%{{.*}}], %{{.*}} : memref<1xi32> |
Uday Bondhugula | f97c1c5 | 2019-02-16 01:54:49 | [diff] [blame] | 455 | |
Andy Davis | 6254a42d | 2019-05-09 14:02:32 | [diff] [blame] | 456 | // ----- |
| 457 | |
Andy Davis | 6254a42d | 2019-05-09 14:02:32 | [diff] [blame] | 458 | func @test_read_write_region_union() { |
Julian Gross | e231070 | 2021-02-10 12:53:11 | [diff] [blame] | 459 | %0 = memref.alloc() : memref<256xf32> |
Andy Davis | 6254a42d | 2019-05-09 14:02:32 | [diff] [blame] | 460 | affine.for %i0 = 0 to 10 { |
| 461 | // memref dims: [0, 256) |
| 462 | // read region: [100, 110) |
| 463 | // write region: [25, 35) |
| 464 | // union region: [25, 110) |
River Riddle | 4268e4f | 2020-01-13 21:12:37 | [diff] [blame] | 465 | %a0 = affine.apply affine_map<(d0) -> (d0 + 100)>(%i0) |
| 466 | %a1 = affine.apply affine_map<(d0) -> (d0 + 25)>(%i0) |
Andy Davis | 2e1187d | 2019-07-03 17:35:03 | [diff] [blame] | 467 | %1 = affine.load %0[%a0] : memref<256xf32> |
| 468 | affine.store %1, %0[%a1] : memref<256xf32> |
Andy Davis | 6254a42d | 2019-05-09 14:02:32 | [diff] [blame] | 469 | } |
| 470 | return |
| 471 | } |
| 472 | |
Julian Gross | e231070 | 2021-02-10 12:53:11 | [diff] [blame] | 473 | // CHECK: memref.alloc() : memref<256xf32> |
| 474 | // CHECK-NEXT: memref.alloc() : memref<85xf32, 2> |
| 475 | // CHECK-NEXT: memref.alloc() : memref<1xi32> |
River Riddle | 89bc449 | 2019-07-09 17:40:29 | [diff] [blame] | 476 | // CHECK-NEXT: affine.dma_start %{{.*}}[%{{.*}}], %{{.*}}[%{{.*}}], %{{.*}}[%{{.*}}], %{{.*}} : memref<256xf32>, memref<85xf32, 2>, memref<1xi32> |
| 477 | // CHECK-NEXT: affine.dma_wait %{{.*}}[%{{.*}}], %{{.*}} : memref<1xi32> |
Julian Gross | e231070 | 2021-02-10 12:53:11 | [diff] [blame] | 478 | // CHECK-NEXT: memref.alloc() : memref<1xi32> |
River Riddle | 89bc449 | 2019-07-09 17:40:29 | [diff] [blame] | 479 | // CHECK-NEXT: affine.for %{{.*}} = 0 to 10 { |
Uday Bondhugula | 70da33b | 2020-04-01 06:30:26 | [diff] [blame] | 480 | // CHECK: affine.load %{{.*}}[%{{.*}} + 75] : memref<85xf32, 2> |
River Riddle | 89bc449 | 2019-07-09 17:40:29 | [diff] [blame] | 481 | // CHECK-NEXT: affine.store %{{.*}}, %{{.*}}[%{{.*}}] : memref<85xf32, 2> |
Andy Davis | 6254a42d | 2019-05-09 14:02:32 | [diff] [blame] | 482 | // CHECK-NEXT: } |
River Riddle | 89bc449 | 2019-07-09 17:40:29 | [diff] [blame] | 483 | // CHECK-NEXT: affine.dma_start %{{.*}}[%{{.*}}], %{{.*}}[%{{.*}}], %{{.*}}[%{{.*}}], %{{.*}} : memref<85xf32, 2>, memref<256xf32>, memref<1xi32> |
| 484 | // CHECK-NEXT: affine.dma_wait %{{.*}}[%{{.*}}], %{{.*}} : memref<1xi32> |
Andy Davis | 6254a42d | 2019-05-09 14:02:32 | [diff] [blame] | 485 | |
| 486 | // ----- |
Uday Bondhugula | f97c1c5 | 2019-02-16 01:54:49 | [diff] [blame] | 487 | |
River Riddle | 832567b | 2019-03-25 17:14:34 | [diff] [blame] | 488 | // This should create a buffer of size 2 affine.for %arg2. |
Uday Bondhugula | 9f2781e | 2019-03-12 17:52:09 | [diff] [blame] | 489 | |
River Riddle | 4268e4f | 2020-01-13 21:12:37 | [diff] [blame] | 490 | #map_lb = affine_map<(d0) -> (d0)> |
| 491 | #map_ub = affine_map<(d0) -> (d0 + 3)> |
| 492 | #map_acc = affine_map<(d0) -> (d0 floordiv 8)> |
Uday Bondhugula | 9f2781e | 2019-03-12 17:52:09 | [diff] [blame] | 493 | // CHECK-LABEL: func @test_analysis_util |
| 494 | func @test_analysis_util(%arg0: memref<4x4x16x1xf32>, %arg1: memref<144x9xf32>, %arg2: memref<2xf32>) -> (memref<144x9xf32>, memref<2xf32>) { |
Mogball | a54f4ea | 2021-10-12 23:14:57 | [diff] [blame^] | 495 | %c0 = arith.constant 0 : index |
Julian Gross | e231070 | 2021-02-10 12:53:11 | [diff] [blame] | 496 | %0 = memref.alloc() : memref<64x1xf32> |
| 497 | %1 = memref.alloc() : memref<144x4xf32> |
Mogball | a54f4ea | 2021-10-12 23:14:57 | [diff] [blame^] | 498 | %2 = arith.constant 0.0 : f32 |
River Riddle | 832567b | 2019-03-25 17:14:34 | [diff] [blame] | 499 | affine.for %i8 = 0 to 9 step 3 { |
| 500 | affine.for %i9 = #map_lb(%i8) to #map_ub(%i8) { |
| 501 | affine.for %i17 = 0 to 64 { |
Uday Bondhugula | 9f2781e | 2019-03-12 17:52:09 | [diff] [blame] | 502 | %23 = affine.apply #map_acc(%i9) |
Andy Davis | 2e1187d | 2019-07-03 17:35:03 | [diff] [blame] | 503 | %25 = affine.load %arg2[%23] : memref<2xf32> |
Uday Bondhugula | 9f2781e | 2019-03-12 17:52:09 | [diff] [blame] | 504 | %26 = affine.apply #map_lb(%i17) |
Andy Davis | 2e1187d | 2019-07-03 17:35:03 | [diff] [blame] | 505 | %27 = affine.load %0[%26, %c0] : memref<64x1xf32> |
| 506 | affine.store %27, %arg2[%23] : memref<2xf32> |
Uday Bondhugula | 9f2781e | 2019-03-12 17:52:09 | [diff] [blame] | 507 | } |
| 508 | } |
| 509 | } |
| 510 | return %arg1, %arg2 : memref<144x9xf32>, memref<2xf32> |
| 511 | } |
River Riddle | 89bc449 | 2019-07-09 17:40:29 | [diff] [blame] | 512 | // CHECK: affine.for %{{.*}} = 0 to 9 step 3 { |
Julian Gross | e231070 | 2021-02-10 12:53:11 | [diff] [blame] | 513 | // CHECK: [[BUF:%[0-9]+]] = memref.alloc() : memref<2xf32, 2> |
River Riddle | 89bc449 | 2019-07-09 17:40:29 | [diff] [blame] | 514 | // CHECK: affine.dma_start %{{.*}}[%{{.*}} floordiv 8], [[BUF]] |
| 515 | // CHECK: affine.dma_wait %{{.*}}[%{{.*}}], %{{.*}} : memref<1xi32> |
| 516 | // CHECK: affine.for %{{.*}} = |
Uday Bondhugula | 9f2781e | 2019-03-12 17:52:09 | [diff] [blame] | 517 | |
Diego Caballero | d7058ac | 2020-02-14 21:41:01 | [diff] [blame] | 518 | // ----- |
Andy Davis | 0412bf6 | 2019-05-09 15:36:02 | [diff] [blame] | 519 | |
River Riddle | 4268e4f | 2020-01-13 21:12:37 | [diff] [blame] | 520 | #map3 = affine_map<(d0) -> (d0)> |
| 521 | #map12 = affine_map<(d0) -> (d0 + 3)> |
| 522 | #map14 = affine_map<(d0, d1) -> ((d0 + d1 * 72) floordiv 2304 + ((((d0 + d1 * 72) mod 2304) mod 1152) mod 9) floordiv 3)> |
| 523 | #map15 = affine_map<(d0, d1) -> ((d0 + d1 * 72) mod 2304 - (((d0 + d1 * 72) mod 2304) floordiv 1152) * 1151 - ((((d0 + d1 * 72) mod 2304) mod 1152) floordiv 9) * 9 - (((((d0 + d1 * 72) mod 2304) mod 1152) mod 9) floordiv 3) * 3)> |
| 524 | #map16 = affine_map<(d0, d1) -> (((((d0 + d1 * 72) mod 2304) mod 1152) floordiv 9) floordiv 8)> |
Andy Davis | 0412bf6 | 2019-05-09 15:36:02 | [diff] [blame] | 525 | // Test for test case in b/128303048 #4. |
Diego Caballero | d7058ac | 2020-02-14 21:41:01 | [diff] [blame] | 526 | // CHECK-LABEL: func @test_memref_bounds |
Andy Davis | 0412bf6 | 2019-05-09 15:36:02 | [diff] [blame] | 527 | func @test_memref_bounds(%arg0: memref<4x4x16x1xvector<8x128xf32>>, %arg1: memref<144x9xvector<8x128xf32>>, %arg2: memref<2xvector<8x128xf32>>) -> (memref<144x9xvector<8x128xf32>>, memref<2xvector<8x128xf32>>) { |
Mogball | a54f4ea | 2021-10-12 23:14:57 | [diff] [blame^] | 528 | %c0 = arith.constant 0 : index |
Andy Davis | 0412bf6 | 2019-05-09 15:36:02 | [diff] [blame] | 529 | affine.for %i8 = 0 to 9 step 3 { |
| 530 | affine.for %i9 = #map3(%i8) to #map12(%i8) { |
| 531 | affine.for %i10 = 0 to 64 { |
| 532 | %10 = affine.apply #map14(%i9, %i10) |
| 533 | %11 = affine.apply #map15(%i9, %i10) |
| 534 | %12 = affine.apply #map16(%i9, %i10) |
Andy Davis | 2e1187d | 2019-07-03 17:35:03 | [diff] [blame] | 535 | %13 = affine.load %arg0[%10, %11, %12, %c0] : memref<4x4x16x1xvector<8x128xf32>> |
Andy Davis | 0412bf6 | 2019-05-09 15:36:02 | [diff] [blame] | 536 | } |
| 537 | } |
| 538 | } |
| 539 | return %arg1, %arg2 : memref<144x9xvector<8x128xf32>>, memref<2xvector<8x128xf32>> |
| 540 | } |
| 541 | |
Julian Gross | e231070 | 2021-02-10 12:53:11 | [diff] [blame] | 542 | // CHECK: memref.alloc() : memref<4x4x16x1xvector<8x128xf32>, 2> |
| 543 | // CHECK-NEXT: memref.alloc() : memref<1xi32> |
River Riddle | 89bc449 | 2019-07-09 17:40:29 | [diff] [blame] | 544 | // CHECK-NEXT: affine.dma_start %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}], %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}], %{{.*}}[%{{.*}}], %{{.*}} : memref<4x4x16x1xvector<8x128xf32>>, memref<4x4x16x1xvector<8x128xf32>, 2>, memref<1xi32> |
| 545 | // CHECK-NEXT: affine.dma_wait %{{.*}}[%{{.*}}], %{{.*}} : memref<1xi32> |
Andy Davis | 0412bf6 | 2019-05-09 15:36:02 | [diff] [blame] | 546 | |
Uday Bondhugula | 9f2781e | 2019-03-12 17:52:09 | [diff] [blame] | 547 | // ----- |
| 548 | |
Uday Bondhugula | f97c1c5 | 2019-02-16 01:54:49 | [diff] [blame] | 549 | // Since the fast memory size is 4 KB, DMA generation will happen right under |
| 550 | // %i0. |
| 551 | |
| 552 | // FAST-MEM-16KB-LABEL: func @load_store_same_memref |
| 553 | func @load_store_same_memref(%arg0: memref<256x1024xf32>) { |
River Riddle | 89bc449 | 2019-07-09 17:40:29 | [diff] [blame] | 554 | // FAST-MEM-16KB: affine.for %{{.*}} = 0 to 256 step 4 |
River Riddle | 832567b | 2019-03-25 17:14:34 | [diff] [blame] | 555 | affine.for %i0 = 0 to 256 step 4 { |
Julian Gross | e231070 | 2021-02-10 12:53:11 | [diff] [blame] | 556 | // FAST-MEM-16KB: [[BUF:%[0-9]+]] = memref.alloc() : memref<4x1024xf32, 2> |
River Riddle | 89bc449 | 2019-07-09 17:40:29 | [diff] [blame] | 557 | // FAST-MEM-16KB: affine.dma_start %{{.*}} |
Andy Davis | 2e1187d | 2019-07-03 17:35:03 | [diff] [blame] | 558 | // FAST-MEM-16KB-NEXT: affine.dma_wait |
River Riddle | 89bc449 | 2019-07-09 17:40:29 | [diff] [blame] | 559 | // FAST-MEM-16KB: affine.for %{{.*}} |
River Riddle | 832567b | 2019-03-25 17:14:34 | [diff] [blame] | 560 | affine.for %i1 = 0 to 1024 step 4 { |
River Riddle | 89bc449 | 2019-07-09 17:40:29 | [diff] [blame] | 561 | // FAST-MEM-16KB: affine.for %{{.*}} |
River Riddle | 4268e4f | 2020-01-13 21:12:37 | [diff] [blame] | 562 | affine.for %i2 = affine_map<(d0) -> (d0)>(%i0) to affine_map<(d0) -> (d0 + 4)>(%i0) { |
River Riddle | 89bc449 | 2019-07-09 17:40:29 | [diff] [blame] | 563 | // FAST-MEM-16KB: affine.for %{{.*}} |
River Riddle | 4268e4f | 2020-01-13 21:12:37 | [diff] [blame] | 564 | affine.for %i3 = affine_map<(d0) -> (d0)>(%i1) to affine_map<(d0) -> (d0 + 4)>(%i1) { |
Andy Davis | 2e1187d | 2019-07-03 17:35:03 | [diff] [blame] | 565 | %3 = affine.load %arg0[%i2, %i3] : memref<256x1024xf32> |
Mogball | a54f4ea | 2021-10-12 23:14:57 | [diff] [blame^] | 566 | %4 = arith.mulf %3, %3 : f32 |
Andy Davis | 2e1187d | 2019-07-03 17:35:03 | [diff] [blame] | 567 | affine.store %4, %arg0[%i2, %i3] : memref<256x1024xf32> |
Uday Bondhugula | f97c1c5 | 2019-02-16 01:54:49 | [diff] [blame] | 568 | } // FAST-MEM-16KB: } |
| 569 | } // FAST-MEM-16KB: } |
| 570 | } // FAST-MEM-16KB: } |
Andy Davis | 2e1187d | 2019-07-03 17:35:03 | [diff] [blame] | 571 | // FAST-MEM-16KB: affine.dma_start [[BUF]] |
| 572 | // FAST-MEM-16KB-NEXT: affine.dma_wait |
Uday Bondhugula | f97c1c5 | 2019-02-16 01:54:49 | [diff] [blame] | 573 | } |
| 574 | return |
| 575 | } |
Uday Bondhugula | 5021dc4 | 2019-02-19 18:33:41 | [diff] [blame] | 576 | |
Uday Bondhugula | 9f2781e | 2019-03-12 17:52:09 | [diff] [blame] | 577 | // ----- |
Uday Bondhugula | 5021dc4 | 2019-02-19 18:33:41 | [diff] [blame] | 578 | |
| 579 | // This a 3-d loop nest tiled by 4 x 4 x 4. Under %i, %j, %k, the size of a |
| 580 | // tile of arg0, arg1, and arg2 accessed is 4 KB (each), i.e., 12 KB in total. |
| 581 | // With fast mem capacity set to 16 KB, the DMAs if placed under %k will fit. |
| 582 | // However, the region of arg2 accessed is invariant w.r.t the %k loop unlike |
| 583 | // %arg0 and %arg1. So, its DMA can be hoisted one level up and placed under |
Alex Zinenko | 60f443b | 2020-05-13 10:12:30 | [diff] [blame] | 584 | // %j, while the DMAs for arg0 and arg1 appear right under the %k scf. |
Uday Bondhugula | 5021dc4 | 2019-02-19 18:33:41 | [diff] [blame] | 585 | |
River Riddle | 4268e4f | 2020-01-13 21:12:37 | [diff] [blame] | 586 | #map0 = affine_map<(d0) -> (d0)> |
| 587 | #map1 = affine_map<(d0) -> (d0 + 4)> |
Uday Bondhugula | 5021dc4 | 2019-02-19 18:33:41 | [diff] [blame] | 588 | // FAST-MEM-16KB-LABEL: func @simple_matmul |
| 589 | func @simple_matmul(%arg0: memref<8x8xvector<64xf32>>, %arg1: memref<8x8xvector<64xf32>>, %arg2: memref<8x8xvector<64xf32>>) -> memref<8x8xvector<64xf32>> { |
River Riddle | 832567b | 2019-03-25 17:14:34 | [diff] [blame] | 590 | affine.for %i = 0 to 8 step 4 { |
| 591 | affine.for %j = 0 to 8 step 4 { |
| 592 | affine.for %k = 0 to 8 step 4 { |
| 593 | affine.for %ii = #map0(%i) to #map1(%i) { |
| 594 | affine.for %jj = #map0(%j) to #map1(%j) { |
| 595 | affine.for %kk = #map0(%k) to #map1(%k) { |
Andy Davis | 2e1187d | 2019-07-03 17:35:03 | [diff] [blame] | 596 | %5 = affine.load %arg0[%ii, %kk] : memref<8x8xvector<64xf32>> |
| 597 | %6 = affine.load %arg1[%kk, %jj] : memref<8x8xvector<64xf32>> |
| 598 | %7 = affine.load %arg2[%ii, %jj] : memref<8x8xvector<64xf32>> |
Mogball | a54f4ea | 2021-10-12 23:14:57 | [diff] [blame^] | 599 | %8 = arith.mulf %5, %6 : vector<64xf32> |
| 600 | %9 = arith.addf %7, %8 : vector<64xf32> |
Andy Davis | 2e1187d | 2019-07-03 17:35:03 | [diff] [blame] | 601 | affine.store %9, %arg2[%ii, %jj] : memref<8x8xvector<64xf32>> |
Uday Bondhugula | 5021dc4 | 2019-02-19 18:33:41 | [diff] [blame] | 602 | } |
| 603 | } |
| 604 | } |
| 605 | } |
| 606 | } |
| 607 | } |
| 608 | return %arg2 : memref<8x8xvector<64xf32>> |
| 609 | } |
River Riddle | 89bc449 | 2019-07-09 17:40:29 | [diff] [blame] | 610 | // FAST-MEM-16KB: affine.for %{{.*}} = 0 to 8 step 4 { |
| 611 | // FAST-MEM-16KB: affine.for %{{.*}} = 0 to 8 step 4 { |
| 612 | // FAST-MEM-16KB: affine.dma_start %{{.*}} |
Andy Davis | 2e1187d | 2019-07-03 17:35:03 | [diff] [blame] | 613 | // FAST-MEM-16KB: affine.dma_wait |
River Riddle | 89bc449 | 2019-07-09 17:40:29 | [diff] [blame] | 614 | // FAST-MEM-16KB: affine.for %{{.*}} = 0 to 8 step 4 { |
| 615 | // FAST-MEM-16KB: affine.dma_start %{{.*}} |
Andy Davis | 2e1187d | 2019-07-03 17:35:03 | [diff] [blame] | 616 | // FAST-MEM-16KB: affine.dma_wait |
River Riddle | 89bc449 | 2019-07-09 17:40:29 | [diff] [blame] | 617 | // FAST-MEM-16KB: affine.dma_start %{{.*}} |
Andy Davis | 2e1187d | 2019-07-03 17:35:03 | [diff] [blame] | 618 | // FAST-MEM-16KB: affine.dma_wait |
River Riddle | 89bc449 | 2019-07-09 17:40:29 | [diff] [blame] | 619 | // FAST-MEM-16KB: affine.for %{{.*}} = #map{{[0-9]+}}(%{{.*}}) to #map{{[0-9]+}}(%{{.*}}) { |
| 620 | // FAST-MEM-16KB-NEXT: affine.for %{{.*}} = #map{{[0-9]+}}(%{{.*}}) to #map{{[0-9]+}}(%{{.*}}) { |
| 621 | // FAST-MEM-16KB-NEXT: affine.for %{{.*}} = #map{{[0-9]+}}(%{{.*}}) to #map{{[0-9]+}}(%{{.*}}) { |
Uday Bondhugula | 5021dc4 | 2019-02-19 18:33:41 | [diff] [blame] | 622 | // FAST-MEM-16KB: } |
| 623 | // FAST-MEM-16KB: } |
| 624 | // FAST-MEM-16KB: } |
| 625 | // FAST-MEM-16KB: } |
River Riddle | 89bc449 | 2019-07-09 17:40:29 | [diff] [blame] | 626 | // FAST-MEM-16KB: affine.dma_start %{{.*}}[%{{.*}}, %{{.*}}], %{{.*}} |
Andy Davis | 2e1187d | 2019-07-03 17:35:03 | [diff] [blame] | 627 | // FAST-MEM-16KB: affine.dma_wait |