@@ -109,4 +109,88 @@ extern "C" void acc_loop(int *A, int *B, int *C, int N) {
109
109
// CHECK: acc.loop {
110
110
// CHECK: acc.yield
111
111
// CHECK-NEXT: } attributes {auto_ = [#acc.device_type<none>]} loc
112
+
113
+ #pragma acc loop collapse(1) device_type(radeon)
114
+ for (unsigned I = 0 ; I < N; ++I)
115
+ for (unsigned J = 0 ; J < N; ++J)
116
+ for (unsigned K = 0 ; K < N; ++K);
117
+ // CHECK: acc.loop {
118
+ // CHECK: acc.yield
119
+ // CHECK-NEXT: } attributes {collapse = [1], collapseDeviceType = [#acc.device_type<none>]}
120
+
121
+ #pragma acc loop collapse(1) device_type(radeon) collapse (2)
122
+ for (unsigned I = 0 ; I < N; ++I)
123
+ for (unsigned J = 0 ; J < N; ++J)
124
+ for (unsigned K = 0 ; K < N; ++K);
125
+ // CHECK: acc.loop {
126
+ // CHECK: acc.yield
127
+ // CHECK-NEXT: } attributes {collapse = [1, 2], collapseDeviceType = [#acc.device_type<none>, #acc.device_type<radeon>]}
128
+
129
+ #pragma acc loop collapse(1) device_type(radeon, nvidia) collapse (2)
130
+ for (unsigned I = 0 ; I < N; ++I)
131
+ for (unsigned J = 0 ; J < N; ++J)
132
+ for (unsigned K = 0 ; K < N; ++K);
133
+ // CHECK: acc.loop {
134
+ // CHECK: acc.yield
135
+ // CHECK-NEXT: } attributes {collapse = [1, 2, 2], collapseDeviceType = [#acc.device_type<none>, #acc.device_type<radeon>, #acc.device_type<nvidia>]}
136
+ #pragma acc loop collapse(1) device_type(radeon, nvidia) collapse(2) device_type(host) collapse(3)
137
+ for (unsigned I = 0 ; I < N; ++I)
138
+ for (unsigned J = 0 ; J < N; ++J)
139
+ for (unsigned K = 0 ; K < N; ++K);
140
+ // CHECK: acc.loop {
141
+ // CHECK: acc.yield
142
+ // CHECK-NEXT: } attributes {collapse = [1, 2, 2, 3], collapseDeviceType = [#acc.device_type<none>, #acc.device_type<radeon>, #acc.device_type<nvidia>, #acc.device_type<host>]}
143
+
144
+ #pragma acc loop tile(1, 2, 3)
145
+ for (unsigned I = 0 ; I < N; ++I)
146
+ for (unsigned J = 0 ; J < N; ++J)
147
+ for (unsigned K = 0 ; K < N; ++K);
148
+ // CHECK: %[[ONE_CONST:.*]] = arith.constant 1 : i64
149
+ // CHECK-NEXT: %[[TWO_CONST:.*]] = arith.constant 2 : i64
150
+ // CHECK-NEXT: %[[THREE_CONST:.*]] = arith.constant 3 : i64
151
+ // CHECK-NEXT: acc.loop tile({%[[ONE_CONST]] : i64, %[[TWO_CONST]] : i64, %[[THREE_CONST]] : i64}) {
152
+ // CHECK: acc.yield
153
+ // CHECK-NEXT: } loc
154
+ #pragma acc loop tile(2) device_type(radeon)
155
+ for (unsigned I = 0 ; I < N; ++I)
156
+ for (unsigned J = 0 ; J < N; ++J)
157
+ for (unsigned K = 0 ; K < N; ++K);
158
+ // CHECK-NEXT: %[[TWO_CONST:.*]] = arith.constant 2 : i64
159
+ // CHECK-NEXT: acc.loop tile({%[[TWO_CONST]] : i64}) {
160
+ // CHECK: acc.yield
161
+ // CHECK-NEXT: } loc
162
+ #pragma acc loop tile(2) device_type(radeon) tile (1, *)
163
+ for (unsigned I = 0 ; I < N; ++I)
164
+ for (unsigned J = 0 ; J < N; ++J)
165
+ for (unsigned K = 0 ; K < N; ++K);
166
+ // CHECK-NEXT: %[[TWO_CONST:.*]] = arith.constant 2 : i64
167
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
168
+ // CHECK-NEXT: %[[STAR_CONST:.*]] = arith.constant -1 : i64
169
+ // CHECK-NEXT: acc.loop tile({%[[TWO_CONST]] : i64}, {%[[ONE_CONST]] : i64, %[[STAR_CONST]] : i64} [#acc.device_type<radeon>]) {
170
+ // CHECK: acc.yield
171
+ // CHECK-NEXT: } loc
172
+ #pragma acc loop tile(*) device_type(radeon, nvidia) tile (1, 2)
173
+ for (unsigned I = 0 ; I < N; ++I)
174
+ for (unsigned J = 0 ; J < N; ++J)
175
+ for (unsigned K = 0 ; K < N; ++K);
176
+ // CHECK-NEXT: %[[STAR_CONST:.*]] = arith.constant -1 : i64
177
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
178
+ // CHECK-NEXT: %[[TWO_CONST:.*]] = arith.constant 2 : i64
179
+ // CHECK-NEXT: acc.loop tile({%[[STAR_CONST]] : i64}, {%[[ONE_CONST]] : i64, %[[TWO_CONST]] : i64} [#acc.device_type<radeon>], {%[[ONE_CONST]] : i64, %[[TWO_CONST]] : i64} [#acc.device_type<nvidia>]) {
180
+ // CHECK: acc.yield
181
+ // CHECK-NEXT: } loc
182
+ #pragma acc loop tile(1) device_type(radeon, nvidia) tile(2, 3) device_type(host) tile(*, *, *)
183
+ for (unsigned I = 0 ; I < N; ++I)
184
+ for (unsigned J = 0 ; J < N; ++J)
185
+ for (unsigned K = 0 ; K < N; ++K);
186
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
187
+ // CHECK-NEXT: %[[TWO_CONST:.*]] = arith.constant 2 : i64
188
+ // CHECK-NEXT: %[[THREE_CONST:.*]] = arith.constant 3 : i64
189
+ // CHECK-NEXT: %[[STAR_CONST:.*]] = arith.constant -1 : i64
190
+ // CHECK-NEXT: %[[STAR2_CONST:.*]] = arith.constant -1 : i64
191
+ // CHECK-NEXT: %[[STAR3_CONST:.*]] = arith.constant -1 : i64
192
+ // CHECK-NEXT: acc.loop tile({%[[ONE_CONST]] : i64}, {%[[TWO_CONST]] : i64, %[[THREE_CONST]] : i64} [#acc.device_type<radeon>], {%[[TWO_CONST]] : i64, %[[THREE_CONST]] : i64} [#acc.device_type<nvidia>], {%[[STAR_CONST]] : i64, %[[STAR2_CONST]] : i64, %[[STAR3_CONST]] : i64} [#acc.device_type<host>]) {
193
+ // CHECK: acc.yield
194
+ // CHECK-NEXT: } loc
195
+
112
196
}
0 commit comments