blob: bc3a774a79320704138c59a04a7f229c2f43f736 [file] [log] [blame]
Mircea Trofina5b79712024-03-04 22:11:301#!/usr/bin/env python3
Kirill Bobyrev0addd172018-08-28 09:42:412
3"""
4strip_asm.py - Cleanup ASM output for the specified file
5"""
6
Kirill Bobyrev0addd172018-08-28 09:42:417import os
8import re
Mircea Trofina5b79712024-03-04 22:11:309import sys
10from argparse import ArgumentParser
Kirill Bobyrev0addd172018-08-28 09:42:4111
Tobias Hietaf98ee402023-05-17 14:59:4112
Kirill Bobyrev0addd172018-08-28 09:42:4113def find_used_labels(asm):
14 found = set()
Mircea Trofina5b79712024-03-04 22:11:3015 label_re = re.compile(r"\s*j[a-z]+\s+\.L([a-zA-Z0-9][a-zA-Z0-9_]*)")
16 for line in asm.splitlines():
17 m = label_re.match(line)
Kirill Bobyrev0addd172018-08-28 09:42:4118 if m:
Tobias Hietaf98ee402023-05-17 14:59:4119 found.add(".L%s" % m.group(1))
Kirill Bobyrev0addd172018-08-28 09:42:4120 return found
21
22
23def normalize_labels(asm):
24 decls = set()
25 label_decl = re.compile("^[.]{0,1}L([a-zA-Z0-9][a-zA-Z0-9_]*)(?=:)")
Mircea Trofina5b79712024-03-04 22:11:3026 for line in asm.splitlines():
27 m = label_decl.match(line)
Kirill Bobyrev0addd172018-08-28 09:42:4128 if m:
29 decls.add(m.group(0))
30 if len(decls) == 0:
31 return asm
Tobias Hietaf98ee402023-05-17 14:59:4132 needs_dot = next(iter(decls))[0] != "."
Kirill Bobyrev0addd172018-08-28 09:42:4133 if not needs_dot:
34 return asm
35 for ld in decls:
Mircea Trofina5b79712024-03-04 22:11:3036 asm = re.sub(r"(^|\s+)" + ld + r"(?=:|\s)", "\\1." + ld, asm)
Kirill Bobyrev0addd172018-08-28 09:42:4137 return asm
38
39
40def transform_labels(asm):
41 asm = normalize_labels(asm)
42 used_decls = find_used_labels(asm)
Tobias Hietaf98ee402023-05-17 14:59:4143 new_asm = ""
Mircea Trofina5b79712024-03-04 22:11:3044 label_decl = re.compile(r"^\.L([a-zA-Z0-9][a-zA-Z0-9_]*)(?=:)")
45 for line in asm.splitlines():
46 m = label_decl.match(line)
Kirill Bobyrev0addd172018-08-28 09:42:4147 if not m or m.group(0) in used_decls:
Mircea Trofina5b79712024-03-04 22:11:3048 new_asm += line
Tobias Hietaf98ee402023-05-17 14:59:4149 new_asm += "\n"
Kirill Bobyrev0addd172018-08-28 09:42:4150 return new_asm
51
52
53def is_identifier(tk):
54 if len(tk) == 0:
55 return False
56 first = tk[0]
Tobias Hietaf98ee402023-05-17 14:59:4157 if not first.isalpha() and first != "_":
Kirill Bobyrev0addd172018-08-28 09:42:4158 return False
59 for i in range(1, len(tk)):
60 c = tk[i]
Tobias Hietaf98ee402023-05-17 14:59:4161 if not c.isalnum() and c != "_":
Kirill Bobyrev0addd172018-08-28 09:42:4162 return False
63 return True
64
Tobias Hietaf98ee402023-05-17 14:59:4165
Mircea Trofina5b79712024-03-04 22:11:3066def process_identifiers(line):
Kirill Bobyrev0addd172018-08-28 09:42:4167 """
68 process_identifiers - process all identifiers and modify them to have
69 consistent names across all platforms; specifically across ELF and MachO.
70 For example, MachO inserts an additional understore at the beginning of
71 names. This function removes that.
72 """
Mircea Trofina5b79712024-03-04 22:11:3073 parts = re.split(r"([a-zA-Z0-9_]+)", line)
Tobias Hietaf98ee402023-05-17 14:59:4174 new_line = ""
Kirill Bobyrev0addd172018-08-28 09:42:4175 for tk in parts:
76 if is_identifier(tk):
Tobias Hietaf98ee402023-05-17 14:59:4177 if tk.startswith("__Z"):
Kirill Bobyrev0addd172018-08-28 09:42:4178 tk = tk[1:]
Tobias Hietaf98ee402023-05-17 14:59:4179 elif (
Mircea Trofina5b79712024-03-04 22:11:3080 tk.startswith("_")
81 and len(tk) > 1
82 and tk[1].isalpha()
83 and tk[1] != "Z"
Tobias Hietaf98ee402023-05-17 14:59:4184 ):
Kirill Bobyrev0addd172018-08-28 09:42:4185 tk = tk[1:]
86 new_line += tk
87 return new_line
88
89
90def process_asm(asm):
91 """
92 Strip the ASM of unwanted directives and lines
93 """
Tobias Hietaf98ee402023-05-17 14:59:4194 new_contents = ""
Kirill Bobyrev0addd172018-08-28 09:42:4195 asm = transform_labels(asm)
96
97 # TODO: Add more things we want to remove
98 discard_regexes = [
Mircea Trofina5b79712024-03-04 22:11:3099 re.compile(r"\s+\..*$"), # directive
100 re.compile(r"\s*#(NO_APP|APP)$"), # inline ASM
101 re.compile(r"\s*#.*$"), # comment line
Tobias Hietaf98ee402023-05-17 14:59:41102 re.compile(
Mircea Trofina5b79712024-03-04 22:11:30103 r"\s*\.globa?l\s*([.a-zA-Z_][a-zA-Z0-9$_.]*)"
104 ), # global directive
105 re.compile(
106 r"\s*\.(string|asciz|ascii|[1248]?byte|short|word|long|quad|value|zero)"
Tobias Hietaf98ee402023-05-17 14:59:41107 ),
Kirill Bobyrev0addd172018-08-28 09:42:41108 ]
Mircea Trofina5b79712024-03-04 22:11:30109 keep_regexes: list[re.Pattern] = []
Kirill Bobyrev0addd172018-08-28 09:42:41110 fn_label_def = re.compile("^[a-zA-Z_][a-zA-Z0-9_.]*:")
Mircea Trofina5b79712024-03-04 22:11:30111 for line in asm.splitlines():
Kirill Bobyrev0addd172018-08-28 09:42:41112 # Remove Mach-O attribute
Mircea Trofina5b79712024-03-04 22:11:30113 line = line.replace("@GOTPCREL", "")
Kirill Bobyrev0addd172018-08-28 09:42:41114 add_line = True
115 for reg in discard_regexes:
Mircea Trofina5b79712024-03-04 22:11:30116 if reg.match(line) is not None:
Kirill Bobyrev0addd172018-08-28 09:42:41117 add_line = False
118 break
119 for reg in keep_regexes:
Mircea Trofina5b79712024-03-04 22:11:30120 if reg.match(line) is not None:
Kirill Bobyrev0addd172018-08-28 09:42:41121 add_line = True
122 break
123 if add_line:
Mircea Trofina5b79712024-03-04 22:11:30124 if fn_label_def.match(line) and len(new_contents) != 0:
Tobias Hietaf98ee402023-05-17 14:59:41125 new_contents += "\n"
Mircea Trofina5b79712024-03-04 22:11:30126 line = process_identifiers(line)
127 new_contents += line
Tobias Hietaf98ee402023-05-17 14:59:41128 new_contents += "\n"
Kirill Bobyrev0addd172018-08-28 09:42:41129 return new_contents
130
Tobias Hietaf98ee402023-05-17 14:59:41131
Kirill Bobyrev0addd172018-08-28 09:42:41132def main():
Tobias Hietaf98ee402023-05-17 14:59:41133 parser = ArgumentParser(description="generate a stripped assembly file")
Kirill Bobyrev0addd172018-08-28 09:42:41134 parser.add_argument(
Mircea Trofina5b79712024-03-04 22:11:30135 "input",
136 metavar="input",
137 type=str,
138 nargs=1,
139 help="An input assembly file",
Tobias Hietaf98ee402023-05-17 14:59:41140 )
Kirill Bobyrev0addd172018-08-28 09:42:41141 parser.add_argument(
Tobias Hietaf98ee402023-05-17 14:59:41142 "out", metavar="output", type=str, nargs=1, help="The output file"
143 )
Kirill Bobyrev0addd172018-08-28 09:42:41144 args, unknown_args = parser.parse_known_args()
145 input = args.input[0]
146 output = args.out[0]
147 if not os.path.isfile(input):
Mircea Trofina5b79712024-03-04 22:11:30148 print("ERROR: input file '%s' does not exist" % input)
Kirill Bobyrev0addd172018-08-28 09:42:41149 sys.exit(1)
Mircea Trofina5b79712024-03-04 22:11:30150
Tobias Hietaf98ee402023-05-17 14:59:41151 with open(input, "r") as f:
Kirill Bobyrev0addd172018-08-28 09:42:41152 contents = f.read()
153 new_contents = process_asm(contents)
Tobias Hietaf98ee402023-05-17 14:59:41154 with open(output, "w") as f:
Kirill Bobyrev0addd172018-08-28 09:42:41155 f.write(new_contents)
156
157
Tobias Hietaf98ee402023-05-17 14:59:41158if __name__ == "__main__":
Kirill Bobyrev0addd172018-08-28 09:42:41159 main()
160
161# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
162# kate: tab-width: 4; replace-tabs on; indent-width 4; tab-indents: off;
163# kate: indent-mode python; remove-trailing-spaces modified;