Mircea Trofin | a5b7971 | 2024-03-04 22:11:30 | [diff] [blame] | 1 | #!/usr/bin/env python3 |
Kirill Bobyrev | 0addd17 | 2018-08-28 09:42:41 | [diff] [blame] | 2 | |
| 3 | """ |
| 4 | strip_asm.py - Cleanup ASM output for the specified file |
| 5 | """ |
| 6 | |
Kirill Bobyrev | 0addd17 | 2018-08-28 09:42:41 | [diff] [blame] | 7 | import os |
| 8 | import re |
Mircea Trofin | a5b7971 | 2024-03-04 22:11:30 | [diff] [blame] | 9 | import sys |
| 10 | from argparse import ArgumentParser |
Kirill Bobyrev | 0addd17 | 2018-08-28 09:42:41 | [diff] [blame] | 11 | |
Tobias Hieta | f98ee40 | 2023-05-17 14:59:41 | [diff] [blame] | 12 | |
Kirill Bobyrev | 0addd17 | 2018-08-28 09:42:41 | [diff] [blame] | 13 | def find_used_labels(asm): |
| 14 | found = set() |
Mircea Trofin | a5b7971 | 2024-03-04 22:11:30 | [diff] [blame] | 15 | label_re = re.compile(r"\s*j[a-z]+\s+\.L([a-zA-Z0-9][a-zA-Z0-9_]*)") |
| 16 | for line in asm.splitlines(): |
| 17 | m = label_re.match(line) |
Kirill Bobyrev | 0addd17 | 2018-08-28 09:42:41 | [diff] [blame] | 18 | if m: |
Tobias Hieta | f98ee40 | 2023-05-17 14:59:41 | [diff] [blame] | 19 | found.add(".L%s" % m.group(1)) |
Kirill Bobyrev | 0addd17 | 2018-08-28 09:42:41 | [diff] [blame] | 20 | return found |
| 21 | |
| 22 | |
| 23 | def normalize_labels(asm): |
| 24 | decls = set() |
| 25 | label_decl = re.compile("^[.]{0,1}L([a-zA-Z0-9][a-zA-Z0-9_]*)(?=:)") |
Mircea Trofin | a5b7971 | 2024-03-04 22:11:30 | [diff] [blame] | 26 | for line in asm.splitlines(): |
| 27 | m = label_decl.match(line) |
Kirill Bobyrev | 0addd17 | 2018-08-28 09:42:41 | [diff] [blame] | 28 | if m: |
| 29 | decls.add(m.group(0)) |
| 30 | if len(decls) == 0: |
| 31 | return asm |
Tobias Hieta | f98ee40 | 2023-05-17 14:59:41 | [diff] [blame] | 32 | needs_dot = next(iter(decls))[0] != "." |
Kirill Bobyrev | 0addd17 | 2018-08-28 09:42:41 | [diff] [blame] | 33 | if not needs_dot: |
| 34 | return asm |
| 35 | for ld in decls: |
Mircea Trofin | a5b7971 | 2024-03-04 22:11:30 | [diff] [blame] | 36 | asm = re.sub(r"(^|\s+)" + ld + r"(?=:|\s)", "\\1." + ld, asm) |
Kirill Bobyrev | 0addd17 | 2018-08-28 09:42:41 | [diff] [blame] | 37 | return asm |
| 38 | |
| 39 | |
| 40 | def transform_labels(asm): |
| 41 | asm = normalize_labels(asm) |
| 42 | used_decls = find_used_labels(asm) |
Tobias Hieta | f98ee40 | 2023-05-17 14:59:41 | [diff] [blame] | 43 | new_asm = "" |
Mircea Trofin | a5b7971 | 2024-03-04 22:11:30 | [diff] [blame] | 44 | label_decl = re.compile(r"^\.L([a-zA-Z0-9][a-zA-Z0-9_]*)(?=:)") |
| 45 | for line in asm.splitlines(): |
| 46 | m = label_decl.match(line) |
Kirill Bobyrev | 0addd17 | 2018-08-28 09:42:41 | [diff] [blame] | 47 | if not m or m.group(0) in used_decls: |
Mircea Trofin | a5b7971 | 2024-03-04 22:11:30 | [diff] [blame] | 48 | new_asm += line |
Tobias Hieta | f98ee40 | 2023-05-17 14:59:41 | [diff] [blame] | 49 | new_asm += "\n" |
Kirill Bobyrev | 0addd17 | 2018-08-28 09:42:41 | [diff] [blame] | 50 | return new_asm |
| 51 | |
| 52 | |
| 53 | def is_identifier(tk): |
| 54 | if len(tk) == 0: |
| 55 | return False |
| 56 | first = tk[0] |
Tobias Hieta | f98ee40 | 2023-05-17 14:59:41 | [diff] [blame] | 57 | if not first.isalpha() and first != "_": |
Kirill Bobyrev | 0addd17 | 2018-08-28 09:42:41 | [diff] [blame] | 58 | return False |
| 59 | for i in range(1, len(tk)): |
| 60 | c = tk[i] |
Tobias Hieta | f98ee40 | 2023-05-17 14:59:41 | [diff] [blame] | 61 | if not c.isalnum() and c != "_": |
Kirill Bobyrev | 0addd17 | 2018-08-28 09:42:41 | [diff] [blame] | 62 | return False |
| 63 | return True |
| 64 | |
Tobias Hieta | f98ee40 | 2023-05-17 14:59:41 | [diff] [blame] | 65 | |
Mircea Trofin | a5b7971 | 2024-03-04 22:11:30 | [diff] [blame] | 66 | def process_identifiers(line): |
Kirill Bobyrev | 0addd17 | 2018-08-28 09:42:41 | [diff] [blame] | 67 | """ |
| 68 | process_identifiers - process all identifiers and modify them to have |
| 69 | consistent names across all platforms; specifically across ELF and MachO. |
| 70 | For example, MachO inserts an additional understore at the beginning of |
| 71 | names. This function removes that. |
| 72 | """ |
Mircea Trofin | a5b7971 | 2024-03-04 22:11:30 | [diff] [blame] | 73 | parts = re.split(r"([a-zA-Z0-9_]+)", line) |
Tobias Hieta | f98ee40 | 2023-05-17 14:59:41 | [diff] [blame] | 74 | new_line = "" |
Kirill Bobyrev | 0addd17 | 2018-08-28 09:42:41 | [diff] [blame] | 75 | for tk in parts: |
| 76 | if is_identifier(tk): |
Tobias Hieta | f98ee40 | 2023-05-17 14:59:41 | [diff] [blame] | 77 | if tk.startswith("__Z"): |
Kirill Bobyrev | 0addd17 | 2018-08-28 09:42:41 | [diff] [blame] | 78 | tk = tk[1:] |
Tobias Hieta | f98ee40 | 2023-05-17 14:59:41 | [diff] [blame] | 79 | elif ( |
Mircea Trofin | a5b7971 | 2024-03-04 22:11:30 | [diff] [blame] | 80 | tk.startswith("_") |
| 81 | and len(tk) > 1 |
| 82 | and tk[1].isalpha() |
| 83 | and tk[1] != "Z" |
Tobias Hieta | f98ee40 | 2023-05-17 14:59:41 | [diff] [blame] | 84 | ): |
Kirill Bobyrev | 0addd17 | 2018-08-28 09:42:41 | [diff] [blame] | 85 | tk = tk[1:] |
| 86 | new_line += tk |
| 87 | return new_line |
| 88 | |
| 89 | |
| 90 | def process_asm(asm): |
| 91 | """ |
| 92 | Strip the ASM of unwanted directives and lines |
| 93 | """ |
Tobias Hieta | f98ee40 | 2023-05-17 14:59:41 | [diff] [blame] | 94 | new_contents = "" |
Kirill Bobyrev | 0addd17 | 2018-08-28 09:42:41 | [diff] [blame] | 95 | asm = transform_labels(asm) |
| 96 | |
| 97 | # TODO: Add more things we want to remove |
| 98 | discard_regexes = [ |
Mircea Trofin | a5b7971 | 2024-03-04 22:11:30 | [diff] [blame] | 99 | re.compile(r"\s+\..*$"), # directive |
| 100 | re.compile(r"\s*#(NO_APP|APP)$"), # inline ASM |
| 101 | re.compile(r"\s*#.*$"), # comment line |
Tobias Hieta | f98ee40 | 2023-05-17 14:59:41 | [diff] [blame] | 102 | re.compile( |
Mircea Trofin | a5b7971 | 2024-03-04 22:11:30 | [diff] [blame] | 103 | r"\s*\.globa?l\s*([.a-zA-Z_][a-zA-Z0-9$_.]*)" |
| 104 | ), # global directive |
| 105 | re.compile( |
| 106 | r"\s*\.(string|asciz|ascii|[1248]?byte|short|word|long|quad|value|zero)" |
Tobias Hieta | f98ee40 | 2023-05-17 14:59:41 | [diff] [blame] | 107 | ), |
Kirill Bobyrev | 0addd17 | 2018-08-28 09:42:41 | [diff] [blame] | 108 | ] |
Mircea Trofin | a5b7971 | 2024-03-04 22:11:30 | [diff] [blame] | 109 | keep_regexes: list[re.Pattern] = [] |
Kirill Bobyrev | 0addd17 | 2018-08-28 09:42:41 | [diff] [blame] | 110 | fn_label_def = re.compile("^[a-zA-Z_][a-zA-Z0-9_.]*:") |
Mircea Trofin | a5b7971 | 2024-03-04 22:11:30 | [diff] [blame] | 111 | for line in asm.splitlines(): |
Kirill Bobyrev | 0addd17 | 2018-08-28 09:42:41 | [diff] [blame] | 112 | # Remove Mach-O attribute |
Mircea Trofin | a5b7971 | 2024-03-04 22:11:30 | [diff] [blame] | 113 | line = line.replace("@GOTPCREL", "") |
Kirill Bobyrev | 0addd17 | 2018-08-28 09:42:41 | [diff] [blame] | 114 | add_line = True |
| 115 | for reg in discard_regexes: |
Mircea Trofin | a5b7971 | 2024-03-04 22:11:30 | [diff] [blame] | 116 | if reg.match(line) is not None: |
Kirill Bobyrev | 0addd17 | 2018-08-28 09:42:41 | [diff] [blame] | 117 | add_line = False |
| 118 | break |
| 119 | for reg in keep_regexes: |
Mircea Trofin | a5b7971 | 2024-03-04 22:11:30 | [diff] [blame] | 120 | if reg.match(line) is not None: |
Kirill Bobyrev | 0addd17 | 2018-08-28 09:42:41 | [diff] [blame] | 121 | add_line = True |
| 122 | break |
| 123 | if add_line: |
Mircea Trofin | a5b7971 | 2024-03-04 22:11:30 | [diff] [blame] | 124 | if fn_label_def.match(line) and len(new_contents) != 0: |
Tobias Hieta | f98ee40 | 2023-05-17 14:59:41 | [diff] [blame] | 125 | new_contents += "\n" |
Mircea Trofin | a5b7971 | 2024-03-04 22:11:30 | [diff] [blame] | 126 | line = process_identifiers(line) |
| 127 | new_contents += line |
Tobias Hieta | f98ee40 | 2023-05-17 14:59:41 | [diff] [blame] | 128 | new_contents += "\n" |
Kirill Bobyrev | 0addd17 | 2018-08-28 09:42:41 | [diff] [blame] | 129 | return new_contents |
| 130 | |
Tobias Hieta | f98ee40 | 2023-05-17 14:59:41 | [diff] [blame] | 131 | |
Kirill Bobyrev | 0addd17 | 2018-08-28 09:42:41 | [diff] [blame] | 132 | def main(): |
Tobias Hieta | f98ee40 | 2023-05-17 14:59:41 | [diff] [blame] | 133 | parser = ArgumentParser(description="generate a stripped assembly file") |
Kirill Bobyrev | 0addd17 | 2018-08-28 09:42:41 | [diff] [blame] | 134 | parser.add_argument( |
Mircea Trofin | a5b7971 | 2024-03-04 22:11:30 | [diff] [blame] | 135 | "input", |
| 136 | metavar="input", |
| 137 | type=str, |
| 138 | nargs=1, |
| 139 | help="An input assembly file", |
Tobias Hieta | f98ee40 | 2023-05-17 14:59:41 | [diff] [blame] | 140 | ) |
Kirill Bobyrev | 0addd17 | 2018-08-28 09:42:41 | [diff] [blame] | 141 | parser.add_argument( |
Tobias Hieta | f98ee40 | 2023-05-17 14:59:41 | [diff] [blame] | 142 | "out", metavar="output", type=str, nargs=1, help="The output file" |
| 143 | ) |
Kirill Bobyrev | 0addd17 | 2018-08-28 09:42:41 | [diff] [blame] | 144 | args, unknown_args = parser.parse_known_args() |
| 145 | input = args.input[0] |
| 146 | output = args.out[0] |
| 147 | if not os.path.isfile(input): |
Mircea Trofin | a5b7971 | 2024-03-04 22:11:30 | [diff] [blame] | 148 | print("ERROR: input file '%s' does not exist" % input) |
Kirill Bobyrev | 0addd17 | 2018-08-28 09:42:41 | [diff] [blame] | 149 | sys.exit(1) |
Mircea Trofin | a5b7971 | 2024-03-04 22:11:30 | [diff] [blame] | 150 | |
Tobias Hieta | f98ee40 | 2023-05-17 14:59:41 | [diff] [blame] | 151 | with open(input, "r") as f: |
Kirill Bobyrev | 0addd17 | 2018-08-28 09:42:41 | [diff] [blame] | 152 | contents = f.read() |
| 153 | new_contents = process_asm(contents) |
Tobias Hieta | f98ee40 | 2023-05-17 14:59:41 | [diff] [blame] | 154 | with open(output, "w") as f: |
Kirill Bobyrev | 0addd17 | 2018-08-28 09:42:41 | [diff] [blame] | 155 | f.write(new_contents) |
| 156 | |
| 157 | |
Tobias Hieta | f98ee40 | 2023-05-17 14:59:41 | [diff] [blame] | 158 | if __name__ == "__main__": |
Kirill Bobyrev | 0addd17 | 2018-08-28 09:42:41 | [diff] [blame] | 159 | main() |
| 160 | |
| 161 | # vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 |
| 162 | # kate: tab-width: 4; replace-tabs on; indent-width 4; tab-indents: off; |
| 163 | # kate: indent-mode python; remove-trailing-spaces modified; |