Wen-Heng (Jack) Chung | 7bfcb91 | 2020-06-12 00:15:16 | [diff] [blame] | 1 | #!/usr/bin/env python3 |
River Riddle | a9d4015 | 2019-08-13 23:42:41 | [diff] [blame] | 2 | """A script to generate FileCheck statements for mlir unit tests. |
| 3 | |
| 4 | This script is a utility to add FileCheck patterns to an mlir file. |
| 5 | |
| 6 | NOTE: The input .mlir is expected to be the output from the parser, not a |
| 7 | stripped down variant. |
| 8 | |
| 9 | Example usage: |
| 10 | $ generate-test-checks.py foo.mlir |
| 11 | $ mlir-opt foo.mlir -transformation | generate-test-checks.py |
Tim Shen | b877f33 | 2020-06-16 18:38:26 | [diff] [blame] | 12 | $ mlir-opt foo.mlir -transformation | generate-test-checks.py --source foo.mlir |
| 13 | $ mlir-opt foo.mlir -transformation | generate-test-checks.py --source foo.mlir -i |
| 14 | $ mlir-opt foo.mlir -transformation | generate-test-checks.py --source foo.mlir -i --source_delim_regex='gpu.func @' |
River Riddle | a9d4015 | 2019-08-13 23:42:41 | [diff] [blame] | 15 | |
Tim Shen | b877f33 | 2020-06-16 18:38:26 | [diff] [blame] | 16 | The script will heuristically generate CHECK/CHECK-LABEL commands for each line |
River Riddle | a9d4015 | 2019-08-13 23:42:41 | [diff] [blame] | 17 | within the file. By default this script will also try to insert string |
Tim Shen | b877f33 | 2020-06-16 18:38:26 | [diff] [blame] | 18 | substitution blocks for all SSA value names. If --source file is specified, the |
| 19 | script will attempt to insert the generated CHECKs to the source file by looking |
| 20 | for line positions matched by --source_delim_regex. |
| 21 | |
| 22 | The script is designed to make adding checks to a test case fast, it is *not* |
| 23 | designed to be authoritative about what constitutes a good test! |
River Riddle | a9d4015 | 2019-08-13 23:42:41 | [diff] [blame] | 24 | """ |
| 25 | |
Mehdi Amini | 56222a0 | 2019-12-23 17:35:36 | [diff] [blame] | 26 | # Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 27 | # See https://llvm.org/LICENSE.txt for license information. |
| 28 | # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
River Riddle | a9d4015 | 2019-08-13 23:42:41 | [diff] [blame] | 29 | |
| 30 | import argparse |
| 31 | import os # Used to advertise this file's name ("autogenerated_note"). |
| 32 | import re |
| 33 | import sys |
River Riddle | a9d4015 | 2019-08-13 23:42:41 | [diff] [blame] | 34 | |
Mehdi Amini | 4e7c0a37 | 2021-09-20 17:28:01 | [diff] [blame] | 35 | ADVERT_BEGIN = '// NOTE: Assertions have been autogenerated by ' |
| 36 | ADVERT_END = """ |
| 37 | // The script is designed to make adding checks to |
| 38 | // a test case fast, it is *not* designed to be authoritative |
| 39 | // about what constitutes a good test! The CHECK should be |
| 40 | // minimized and named to reflect the test intent. |
| 41 | """ |
| 42 | |
River Riddle | a9d4015 | 2019-08-13 23:42:41 | [diff] [blame] | 43 | |
| 44 | # Regex command to match an SSA identifier. |
| 45 | SSA_RE_STR = '[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*' |
| 46 | SSA_RE = re.compile(SSA_RE_STR) |
| 47 | |
| 48 | |
| 49 | # Class used to generate and manage string substitution blocks for SSA value |
| 50 | # names. |
| 51 | class SSAVariableNamer: |
| 52 | |
| 53 | def __init__(self): |
| 54 | self.scopes = [] |
| 55 | self.name_counter = 0 |
| 56 | |
Kazuaki Ishizaki | f28c5ac | 2019-10-20 16:44:06 | [diff] [blame] | 57 | # Generate a substitution name for the given ssa value name. |
River Riddle | a9d4015 | 2019-08-13 23:42:41 | [diff] [blame] | 58 | def generate_name(self, ssa_name): |
| 59 | variable = 'VAL_' + str(self.name_counter) |
| 60 | self.name_counter += 1 |
| 61 | self.scopes[-1][ssa_name] = variable |
| 62 | return variable |
| 63 | |
| 64 | # Push a new variable name scope. |
| 65 | def push_name_scope(self): |
| 66 | self.scopes.append({}) |
| 67 | |
| 68 | # Pop the last variable name scope. |
| 69 | def pop_name_scope(self): |
| 70 | self.scopes.pop() |
| 71 | |
Tim Shen | b877f33 | 2020-06-16 18:38:26 | [diff] [blame] | 72 | # Return the level of nesting (number of pushed scopes). |
Tim Shen | 25b3806 | 2020-06-16 02:41:03 | [diff] [blame] | 73 | def num_scopes(self): |
| 74 | return len(self.scopes) |
| 75 | |
Tim Shen | b877f33 | 2020-06-16 18:38:26 | [diff] [blame] | 76 | # Reset the counter. |
Tim Shen | 25b3806 | 2020-06-16 02:41:03 | [diff] [blame] | 77 | def clear_counter(self): |
| 78 | self.name_counter = 0 |
| 79 | |
River Riddle | a9d4015 | 2019-08-13 23:42:41 | [diff] [blame] | 80 | |
| 81 | # Process a line of input that has been split at each SSA identifier '%'. |
| 82 | def process_line(line_chunks, variable_namer): |
| 83 | output_line = '' |
| 84 | |
| 85 | # Process the rest that contained an SSA value name. |
| 86 | for chunk in line_chunks: |
| 87 | m = SSA_RE.match(chunk) |
| 88 | ssa_name = m.group(0) |
| 89 | |
| 90 | # Check if an existing variable exists for this name. |
| 91 | variable = None |
| 92 | for scope in variable_namer.scopes: |
| 93 | variable = scope.get(ssa_name) |
| 94 | if variable is not None: |
| 95 | break |
| 96 | |
| 97 | # If one exists, then output the existing name. |
| 98 | if variable is not None: |
Sean Silva | 0aa97ad | 2020-05-13 03:15:09 | [diff] [blame] | 99 | output_line += '%[[' + variable + ']]' |
River Riddle | a9d4015 | 2019-08-13 23:42:41 | [diff] [blame] | 100 | else: |
| 101 | # Otherwise, generate a new variable. |
| 102 | variable = variable_namer.generate_name(ssa_name) |
Sean Silva | 0aa97ad | 2020-05-13 03:15:09 | [diff] [blame] | 103 | output_line += '%[[' + variable + ':.*]]' |
River Riddle | a9d4015 | 2019-08-13 23:42:41 | [diff] [blame] | 104 | |
| 105 | # Append the non named group. |
| 106 | output_line += chunk[len(ssa_name):] |
| 107 | |
Tim Shen | a6150de | 2020-06-16 18:28:36 | [diff] [blame] | 108 | return output_line.rstrip() + '\n' |
River Riddle | a9d4015 | 2019-08-13 23:42:41 | [diff] [blame] | 109 | |
| 110 | |
Tim Shen | b877f33 | 2020-06-16 18:38:26 | [diff] [blame] | 111 | # Process the source file lines. The source file doesn't have to be .mlir. |
Tim Shen | 25b3806 | 2020-06-16 02:41:03 | [diff] [blame] | 112 | def process_source_lines(source_lines, note, args): |
| 113 | source_split_re = re.compile(args.source_delim_regex) |
| 114 | |
| 115 | source_segments = [[]] |
| 116 | for line in source_lines: |
Tim Shen | b877f33 | 2020-06-16 18:38:26 | [diff] [blame] | 117 | # Remove previous note. |
Tim Shen | 25b3806 | 2020-06-16 02:41:03 | [diff] [blame] | 118 | if line == note: |
| 119 | continue |
Tim Shen | b877f33 | 2020-06-16 18:38:26 | [diff] [blame] | 120 | # Remove previous CHECK lines. |
Tim Shen | 25b3806 | 2020-06-16 02:41:03 | [diff] [blame] | 121 | if line.find(args.check_prefix) != -1: |
| 122 | continue |
Tim Shen | b877f33 | 2020-06-16 18:38:26 | [diff] [blame] | 123 | # Segment the file based on --source_delim_regex. |
Tim Shen | 25b3806 | 2020-06-16 02:41:03 | [diff] [blame] | 124 | if source_split_re.search(line): |
| 125 | source_segments.append([]) |
| 126 | |
| 127 | source_segments[-1].append(line + '\n') |
| 128 | return source_segments |
| 129 | |
| 130 | |
River Riddle | 3e2ac62 | 2019-09-17 20:59:12 | [diff] [blame] | 131 | # Pre-process a line of input to remove any character sequences that will be |
| 132 | # problematic with FileCheck. |
| 133 | def preprocess_line(line): |
| 134 | # Replace any double brackets, '[[' with escaped replacements. '[[' |
| 135 | # corresponds to variable names in FileCheck. |
| 136 | output_line = line.replace('[[', '{{\\[\\[}}') |
| 137 | |
| 138 | # Replace any single brackets that are followed by an SSA identifier, the |
| 139 | # identifier will be replace by a variable; Creating the same situation as |
| 140 | # above. |
| 141 | output_line = output_line.replace('[%', '{{\\[}}%') |
| 142 | |
| 143 | return output_line |
| 144 | |
| 145 | |
River Riddle | a9d4015 | 2019-08-13 23:42:41 | [diff] [blame] | 146 | def main(): |
River Riddle | a9d4015 | 2019-08-13 23:42:41 | [diff] [blame] | 147 | parser = argparse.ArgumentParser( |
Jacques Pienaar | 5002e98 | 2019-09-20 00:45:02 | [diff] [blame] | 148 | description=__doc__, formatter_class=argparse.RawTextHelpFormatter) |
River Riddle | a9d4015 | 2019-08-13 23:42:41 | [diff] [blame] | 149 | parser.add_argument( |
| 150 | '--check-prefix', default='CHECK', help='Prefix to use from check file.') |
| 151 | parser.add_argument( |
| 152 | '-o', |
| 153 | '--output', |
| 154 | nargs='?', |
| 155 | type=argparse.FileType('w'), |
Tim Shen | 25b3806 | 2020-06-16 02:41:03 | [diff] [blame] | 156 | default=None) |
River Riddle | a9d4015 | 2019-08-13 23:42:41 | [diff] [blame] | 157 | parser.add_argument( |
| 158 | 'input', |
| 159 | nargs='?', |
| 160 | type=argparse.FileType('r'), |
| 161 | default=sys.stdin) |
Tim Shen | 25b3806 | 2020-06-16 02:41:03 | [diff] [blame] | 162 | parser.add_argument( |
| 163 | '--source', type=str, |
| 164 | help='Print each CHECK chunk before each delimeter line in the source' |
| 165 | 'file, respectively. The delimeter lines are identified by ' |
| 166 | '--source_delim_regex.') |
| 167 | parser.add_argument('--source_delim_regex', type=str, default='func @') |
| 168 | parser.add_argument( |
| 169 | '--starts_from_scope', type=int, default=1, |
| 170 | help='Omit the top specified level of content. For example, by default ' |
| 171 | 'it omits "module {"') |
| 172 | parser.add_argument('-i', '--inplace', action='store_true', default=False) |
| 173 | |
River Riddle | a9d4015 | 2019-08-13 23:42:41 | [diff] [blame] | 174 | args = parser.parse_args() |
| 175 | |
| 176 | # Open the given input file. |
| 177 | input_lines = [l.rstrip() for l in args.input] |
| 178 | args.input.close() |
| 179 | |
River Riddle | a9d4015 | 2019-08-13 23:42:41 | [diff] [blame] | 180 | # Generate a note used for the generated check file. |
| 181 | script_name = os.path.basename(__file__) |
Mehdi Amini | 4e7c0a37 | 2021-09-20 17:28:01 | [diff] [blame] | 182 | autogenerated_note = (ADVERT_BEGIN + 'utils/' + script_name + "\n" + ADVERT_END) |
River Riddle | a9d4015 | 2019-08-13 23:42:41 | [diff] [blame] | 183 | |
Tim Shen | 25b3806 | 2020-06-16 02:41:03 | [diff] [blame] | 184 | source_segments = None |
| 185 | if args.source: |
| 186 | source_segments = process_source_lines( |
| 187 | [l.rstrip() for l in open(args.source, 'r')], |
| 188 | autogenerated_note, |
| 189 | args |
| 190 | ) |
| 191 | |
| 192 | if args.inplace: |
| 193 | assert args.output is None |
| 194 | output = open(args.source, 'w') |
| 195 | elif args.output is None: |
| 196 | output = sys.stdout |
| 197 | else: |
| 198 | output = args.output |
| 199 | |
| 200 | output_segments = [[]] |
River Riddle | a9d4015 | 2019-08-13 23:42:41 | [diff] [blame] | 201 | # A map containing data used for naming SSA value names. |
| 202 | variable_namer = SSAVariableNamer() |
| 203 | for input_line in input_lines: |
| 204 | if not input_line: |
| 205 | continue |
| 206 | lstripped_input_line = input_line.lstrip() |
| 207 | |
| 208 | # Lines with blocks begin with a ^. These lines have a trailing comment |
| 209 | # that needs to be stripped. |
| 210 | is_block = lstripped_input_line[0] == '^' |
| 211 | if is_block: |
| 212 | input_line = input_line.rsplit('//', 1)[0].rstrip() |
| 213 | |
Tim Shen | 25b3806 | 2020-06-16 02:41:03 | [diff] [blame] | 214 | cur_level = variable_namer.num_scopes() |
River Riddle | a9d4015 | 2019-08-13 23:42:41 | [diff] [blame] | 215 | |
| 216 | # If the line starts with a '}', pop the last name scope. |
| 217 | if lstripped_input_line[0] == '}': |
| 218 | variable_namer.pop_name_scope() |
Tim Shen | 25b3806 | 2020-06-16 02:41:03 | [diff] [blame] | 219 | cur_level = variable_namer.num_scopes() |
River Riddle | a9d4015 | 2019-08-13 23:42:41 | [diff] [blame] | 220 | |
| 221 | # If the line ends with a '{', push a new name scope. |
| 222 | if input_line[-1] == '{': |
| 223 | variable_namer.push_name_scope() |
Tim Shen | 25b3806 | 2020-06-16 02:41:03 | [diff] [blame] | 224 | if cur_level == args.starts_from_scope: |
| 225 | output_segments.append([]) |
| 226 | |
| 227 | # Omit lines at the near top level e.g. "module {". |
| 228 | if cur_level < args.starts_from_scope: |
| 229 | continue |
| 230 | |
| 231 | if len(output_segments[-1]) == 0: |
| 232 | variable_namer.clear_counter() |
River Riddle | a9d4015 | 2019-08-13 23:42:41 | [diff] [blame] | 233 | |
River Riddle | 3e2ac62 | 2019-09-17 20:59:12 | [diff] [blame] | 234 | # Preprocess the input to remove any sequences that may be problematic with |
| 235 | # FileCheck. |
| 236 | input_line = preprocess_line(input_line) |
| 237 | |
River Riddle | a9d4015 | 2019-08-13 23:42:41 | [diff] [blame] | 238 | # Split the line at the each SSA value name. |
| 239 | ssa_split = input_line.split('%') |
| 240 | |
| 241 | # If this is a top-level operation use 'CHECK-LABEL', otherwise 'CHECK:'. |
Tim Shen | 25b3806 | 2020-06-16 02:41:03 | [diff] [blame] | 242 | if len(output_segments[-1]) != 0 or not ssa_split[0]: |
River Riddle | a9d4015 | 2019-08-13 23:42:41 | [diff] [blame] | 243 | output_line = '// ' + args.check_prefix + ': ' |
| 244 | # Pad to align with the 'LABEL' statements. |
| 245 | output_line += (' ' * len('-LABEL')) |
| 246 | |
| 247 | # Output the first line chunk that does not contain an SSA name. |
| 248 | output_line += ssa_split[0] |
| 249 | |
| 250 | # Process the rest of the input line. |
| 251 | output_line += process_line(ssa_split[1:], variable_namer) |
| 252 | |
| 253 | else: |
River Riddle | a9d4015 | 2019-08-13 23:42:41 | [diff] [blame] | 254 | # Output the first line chunk that does not contain an SSA name for the |
| 255 | # label. |
Tim Shen | 25b3806 | 2020-06-16 02:41:03 | [diff] [blame] | 256 | output_line = '// ' + args.check_prefix + '-LABEL: ' + ssa_split[0] + '\n' |
River Riddle | a9d4015 | 2019-08-13 23:42:41 | [diff] [blame] | 257 | |
Tim Shen | 25b3806 | 2020-06-16 02:41:03 | [diff] [blame] | 258 | # Process the rest of the input line on separate check lines. |
| 259 | for argument in ssa_split[1:]: |
River Riddle | a9d4015 | 2019-08-13 23:42:41 | [diff] [blame] | 260 | output_line += '// ' + args.check_prefix + '-SAME: ' |
| 261 | |
| 262 | # Pad to align with the original position in the line. |
| 263 | output_line += ' ' * len(ssa_split[0]) |
| 264 | |
| 265 | # Process the rest of the line. |
Tim Shen | 25b3806 | 2020-06-16 02:41:03 | [diff] [blame] | 266 | output_line += process_line([argument], variable_namer) |
River Riddle | a9d4015 | 2019-08-13 23:42:41 | [diff] [blame] | 267 | |
| 268 | # Append the output line. |
Tim Shen | 25b3806 | 2020-06-16 02:41:03 | [diff] [blame] | 269 | output_segments[-1].append(output_line) |
| 270 | |
| 271 | output.write(autogenerated_note + '\n') |
River Riddle | a9d4015 | 2019-08-13 23:42:41 | [diff] [blame] | 272 | |
| 273 | # Write the output. |
Tim Shen | 25b3806 | 2020-06-16 02:41:03 | [diff] [blame] | 274 | if source_segments: |
| 275 | assert len(output_segments) == len(source_segments) |
| 276 | for check_segment, source_segment in zip(output_segments, source_segments): |
| 277 | for line in check_segment: |
| 278 | output.write(line) |
| 279 | for line in source_segment: |
| 280 | output.write(line) |
| 281 | else: |
| 282 | for segment in output_segments: |
| 283 | output.write('\n') |
| 284 | for output_line in segment: |
| 285 | output.write(output_line) |
| 286 | output.write('\n') |
| 287 | output.close() |
River Riddle | a9d4015 | 2019-08-13 23:42:41 | [diff] [blame] | 288 | |
| 289 | |
| 290 | if __name__ == '__main__': |
| 291 | main() |