serge-sans-paille | 515bc8c | 2020-07-15 07:31:13 | [diff] [blame] | 1 | #!/usr/bin/env python |
Pavel Labath | 0e44cff | 2017-06-29 13:02:15 | [diff] [blame] | 2 | |
Zachary Turner | 1d75297 | 2017-03-06 17:41:00 | [diff] [blame] | 3 | import argparse |
Zachary Turner | 4dbf9fa | 2017-03-21 22:46:46 | [diff] [blame] | 4 | import itertools |
Zachary Turner | e030d10 | 2017-03-06 17:40:36 | [diff] [blame] | 5 | import os |
| 6 | import re |
Zachary Turner | 4dbf9fa | 2017-03-21 22:46:46 | [diff] [blame] | 7 | import sys |
Zachary Turner | bbd1722 | 2017-03-22 18:23:14 | [diff] [blame] | 8 | from collections import defaultdict |
Zachary Turner | e030d10 | 2017-03-06 17:40:36 | [diff] [blame] | 9 | |
| 10 | from use_lldb_suite import lldb_root |
| 11 | |
Zachary Turner | 1d75297 | 2017-03-06 17:41:00 | [diff] [blame] | 12 | parser = argparse.ArgumentParser( |
David Spickett | 602e47c | 2023-09-14 07:54:02 | [diff] [blame] | 13 | description="Analyze LLDB project #include dependencies." |
| 14 | ) |
| 15 | parser.add_argument( |
| 16 | "--show-counts", |
| 17 | default=False, |
| 18 | action="store_true", |
| 19 | help="When true, show the number of dependencies from each subproject", |
| 20 | ) |
| 21 | parser.add_argument( |
| 22 | "--discover-cycles", |
| 23 | default=False, |
| 24 | action="store_true", |
| 25 | help="When true, find and display all project dependency cycles. Note," |
| 26 | "this option is very slow", |
| 27 | ) |
Zachary Turner | 7e3050c | 2017-03-20 23:54:26 | [diff] [blame] | 28 | |
Zachary Turner | 1d75297 | 2017-03-06 17:41:00 | [diff] [blame] | 29 | args = parser.parse_args() |
| 30 | |
Zachary Turner | e030d10 | 2017-03-06 17:40:36 | [diff] [blame] | 31 | src_dir = os.path.join(lldb_root, "source") |
| 32 | inc_dir = os.path.join(lldb_root, "include") |
| 33 | |
| 34 | src_map = {} |
| 35 | |
David Spickett | 602e47c | 2023-09-14 07:54:02 | [diff] [blame] | 36 | include_regex = re.compile('#include "((lldb|Plugins|clang)(.*/)+).*"') |
| 37 | |
Zachary Turner | 1d75297 | 2017-03-06 17:41:00 | [diff] [blame] | 38 | |
Zachary Turner | 7e3050c | 2017-03-20 23:54:26 | [diff] [blame] | 39 | def is_sublist(small, big): |
Zachary Turner | 4dbf9fa | 2017-03-21 22:46:46 | [diff] [blame] | 40 | it = iter(big) |
Zachary Turner | 7e3050c | 2017-03-20 23:54:26 | [diff] [blame] | 41 | return all(c in it for c in small) |
| 42 | |
David Spickett | 602e47c | 2023-09-14 07:54:02 | [diff] [blame] | 43 | |
Zachary Turner | 1d75297 | 2017-03-06 17:41:00 | [diff] [blame] | 44 | def normalize_host(str): |
| 45 | if str.startswith("lldb/Host"): |
| 46 | return "lldb/Host" |
Zachary Turner | 7e3050c | 2017-03-20 23:54:26 | [diff] [blame] | 47 | if str.startswith("Plugins"): |
| 48 | return "lldb/" + str |
| 49 | if str.startswith("lldb/../../source"): |
| 50 | return str.replace("lldb/../../source", "lldb") |
Zachary Turner | 1d75297 | 2017-03-06 17:41:00 | [diff] [blame] | 51 | return str |
Zachary Turner | e030d10 | 2017-03-06 17:40:36 | [diff] [blame] | 52 | |
David Spickett | 602e47c | 2023-09-14 07:54:02 | [diff] [blame] | 53 | |
Zachary Turner | e030d10 | 2017-03-06 17:40:36 | [diff] [blame] | 54 | def scan_deps(this_dir, file): |
Zachary Turner | 1d75297 | 2017-03-06 17:41:00 | [diff] [blame] | 55 | global src_map |
| 56 | deps = {} |
| 57 | this_dir = normalize_host(this_dir) |
| 58 | if this_dir in src_map: |
| 59 | deps = src_map[this_dir] |
| 60 | |
Zachary Turner | e030d10 | 2017-03-06 17:40:36 | [diff] [blame] | 61 | with open(file) as f: |
| 62 | for line in list(f): |
| 63 | m = include_regex.match(line) |
Zachary Turner | 1d75297 | 2017-03-06 17:41:00 | [diff] [blame] | 64 | if m is None: |
| 65 | continue |
| 66 | relative = m.groups()[0].rstrip("/") |
| 67 | if relative == this_dir: |
| 68 | continue |
| 69 | relative = normalize_host(relative) |
| 70 | if relative in deps: |
| 71 | deps[relative] += 1 |
Zachary Turner | 7e3050c | 2017-03-20 23:54:26 | [diff] [blame] | 72 | elif relative != this_dir: |
Zachary Turner | 1d75297 | 2017-03-06 17:41:00 | [diff] [blame] | 73 | deps[relative] = 1 |
| 74 | if this_dir not in src_map and len(deps) > 0: |
| 75 | src_map[this_dir] = deps |
Zachary Turner | e030d10 | 2017-03-06 17:40:36 | [diff] [blame] | 76 | |
David Spickett | 602e47c | 2023-09-14 07:54:02 | [diff] [blame] | 77 | |
| 78 | for base, dirs, files in os.walk(inc_dir): |
Zachary Turner | e030d10 | 2017-03-06 17:40:36 | [diff] [blame] | 79 | dir = os.path.basename(base) |
| 80 | relative = os.path.relpath(base, inc_dir) |
Serge Guelton | 32cffcf | 2019-03-21 07:19:09 | [diff] [blame] | 81 | inc_files = [x for x in files if os.path.splitext(x)[1] in [".h"]] |
Zachary Turner | e030d10 | 2017-03-06 17:40:36 | [diff] [blame] | 82 | relative = relative.replace("\\", "/") |
| 83 | for inc in inc_files: |
| 84 | inc_path = os.path.join(base, inc) |
Zachary Turner | 1d75297 | 2017-03-06 17:41:00 | [diff] [blame] | 85 | scan_deps(relative, inc_path) |
Zachary Turner | e030d10 | 2017-03-06 17:40:36 | [diff] [blame] | 86 | |
David Spickett | 602e47c | 2023-09-14 07:54:02 | [diff] [blame] | 87 | for base, dirs, files in os.walk(src_dir): |
Zachary Turner | e030d10 | 2017-03-06 17:40:36 | [diff] [blame] | 88 | dir = os.path.basename(base) |
| 89 | relative = os.path.relpath(base, src_dir) |
Serge Guelton | 32cffcf | 2019-03-21 07:19:09 | [diff] [blame] | 90 | src_files = [x for x in files if os.path.splitext(x)[1] in [".cpp", ".h", ".mm"]] |
Zachary Turner | e030d10 | 2017-03-06 17:40:36 | [diff] [blame] | 91 | norm_base_path = os.path.normpath(os.path.join("lldb", relative)) |
| 92 | norm_base_path = norm_base_path.replace("\\", "/") |
| 93 | for src in src_files: |
| 94 | src_path = os.path.join(base, src) |
Zachary Turner | 1d75297 | 2017-03-06 17:41:00 | [diff] [blame] | 95 | scan_deps(norm_base_path, src_path) |
Zachary Turner | e030d10 | 2017-03-06 17:40:36 | [diff] [blame] | 96 | pass |
| 97 | |
David Spickett | 602e47c | 2023-09-14 07:54:02 | [diff] [blame] | 98 | |
Zachary Turner | 7e3050c | 2017-03-20 23:54:26 | [diff] [blame] | 99 | def is_existing_cycle(path, cycles): |
| 100 | # If we have a cycle like # A -> B -> C (with an implicit -> A at the end) |
| 101 | # then we don't just want to check for an occurrence of A -> B -> C in the |
| 102 | # list of known cycles, but every possible rotation of A -> B -> C. For |
| 103 | # example, if we previously encountered B -> C -> A (with an implicit -> B |
| 104 | # at the end), then A -> B -> C is also a cycle. This is an important |
| 105 | # optimization which reduces the search space by multiple orders of |
| 106 | # magnitude. |
David Spickett | 602e47c | 2023-09-14 07:54:02 | [diff] [blame] | 107 | for i in range(0, len(path)): |
Zachary Turner | 7e3050c | 2017-03-20 23:54:26 | [diff] [blame] | 108 | if any(is_sublist(x, path) for x in cycles): |
| 109 | return True |
| 110 | path = [path[-1]] + path[0:-1] |
| 111 | return False |
| 112 | |
David Spickett | 602e47c | 2023-09-14 07:54:02 | [diff] [blame] | 113 | |
Zachary Turner | 7e3050c | 2017-03-20 23:54:26 | [diff] [blame] | 114 | def expand(path_queue, path_lengths, cycles, src_map): |
| 115 | # We do a breadth first search, to make sure we visit all paths in order |
| 116 | # of ascending length. This is an important optimization to make sure that |
| 117 | # short cycles are discovered first, which will allow us to discard longer |
| 118 | # cycles which grow the search space exponentially the longer they get. |
| 119 | while len(path_queue) > 0: |
| 120 | cur_path = path_queue.pop(0) |
| 121 | if is_existing_cycle(cur_path, cycles): |
| 122 | continue |
| 123 | |
| 124 | next_len = path_lengths.pop(0) + 1 |
Zachary Turner | 7e3050c | 2017-03-20 23:54:26 | [diff] [blame] | 125 | last_component = cur_path[-1] |
Zachary Turner | 84a6218 | 2017-03-22 18:04:20 | [diff] [blame] | 126 | |
Pavel Labath | e55a097 | 2020-06-30 15:05:08 | [diff] [blame] | 127 | for item in src_map.get(last_component, []): |
Zachary Turner | 7e3050c | 2017-03-20 23:54:26 | [diff] [blame] | 128 | if item.startswith("clang"): |
| 129 | continue |
| 130 | |
| 131 | if item in cur_path: |
| 132 | # This is a cycle. Minimize it and then check if the result is |
| 133 | # already in the list of cycles. Insert it (or not) and then |
| 134 | # exit. |
| 135 | new_index = cur_path.index(item) |
| 136 | cycle = cur_path[new_index:] |
| 137 | if not is_existing_cycle(cycle, cycles): |
| 138 | cycles.append(cycle) |
| 139 | continue |
| 140 | |
| 141 | path_lengths.append(next_len) |
| 142 | path_queue.append(cur_path + [item]) |
| 143 | pass |
| 144 | |
David Spickett | 602e47c | 2023-09-14 07:54:02 | [diff] [blame] | 145 | |
Zachary Turner | 7e3050c | 2017-03-20 23:54:26 | [diff] [blame] | 146 | cycles = [] |
| 147 | |
Pavel Labath | a1ff820 | 2018-12-06 10:27:38 | [diff] [blame] | 148 | path_queue = [[x] for x in iter(src_map)] |
Zachary Turner | 7e3050c | 2017-03-20 23:54:26 | [diff] [blame] | 149 | path_lens = [1] * len(path_queue) |
| 150 | |
Pavel Labath | a1ff820 | 2018-12-06 10:27:38 | [diff] [blame] | 151 | items = list(src_map.items()) |
David Spickett | 602e47c | 2023-09-14 07:54:02 | [diff] [blame] | 152 | items.sort(key=lambda A: A[0]) |
Zachary Turner | e030d10 | 2017-03-06 17:40:36 | [diff] [blame] | 153 | |
David Spickett | 602e47c | 2023-09-14 07:54:02 | [diff] [blame] | 154 | for path, deps in items: |
Pavel Labath | a1ff820 | 2018-12-06 10:27:38 | [diff] [blame] | 155 | print(path + ":") |
| 156 | sorted_deps = list(deps.items()) |
Zachary Turner | 1d75297 | 2017-03-06 17:41:00 | [diff] [blame] | 157 | if args.show_counts: |
David Spickett | 602e47c | 2023-09-14 07:54:02 | [diff] [blame] | 158 | sorted_deps.sort(key=lambda A: (A[1], A[0])) |
Zachary Turner | 1d75297 | 2017-03-06 17:41:00 | [diff] [blame] | 159 | for dep in sorted_deps: |
Pavel Labath | a1ff820 | 2018-12-06 10:27:38 | [diff] [blame] | 160 | print("\t{} [{}]".format(dep[0], dep[1])) |
Zachary Turner | 1d75297 | 2017-03-06 17:41:00 | [diff] [blame] | 161 | else: |
David Spickett | 602e47c | 2023-09-14 07:54:02 | [diff] [blame] | 162 | sorted_deps.sort(key=lambda A: A[0]) |
Zachary Turner | 1d75297 | 2017-03-06 17:41:00 | [diff] [blame] | 163 | for dep in sorted_deps: |
Pavel Labath | a1ff820 | 2018-12-06 10:27:38 | [diff] [blame] | 164 | print("\t{}".format(dep[0])) |
Zachary Turner | 7e3050c | 2017-03-20 23:54:26 | [diff] [blame] | 165 | |
David Spickett | 602e47c | 2023-09-14 07:54:02 | [diff] [blame] | 166 | |
Zachary Turner | 4dbf9fa | 2017-03-21 22:46:46 | [diff] [blame] | 167 | def iter_cycles(cycles): |
| 168 | global src_map |
| 169 | for cycle in cycles: |
| 170 | cycle.append(cycle[0]) |
| 171 | zipper = list(zip(cycle[0:-1], cycle[1:])) |
David Spickett | 602e47c | 2023-09-14 07:54:02 | [diff] [blame] | 172 | result = [(x, src_map[x][y], y) for (x, y) in zipper] |
Zachary Turner | 4dbf9fa | 2017-03-21 22:46:46 | [diff] [blame] | 173 | total = 0 |
| 174 | smallest = result[0][1] |
David Spickett | 602e47c | 2023-09-14 07:54:02 | [diff] [blame] | 175 | for first, value, last in result: |
Zachary Turner | 4dbf9fa | 2017-03-21 22:46:46 | [diff] [blame] | 176 | total += value |
| 177 | smallest = min(smallest, value) |
| 178 | yield (total, smallest, result) |
| 179 | |
David Spickett | 602e47c | 2023-09-14 07:54:02 | [diff] [blame] | 180 | |
Zachary Turner | 7e3050c | 2017-03-20 23:54:26 | [diff] [blame] | 181 | if args.discover_cycles: |
Pavel Labath | a1ff820 | 2018-12-06 10:27:38 | [diff] [blame] | 182 | print("Analyzing cycles...") |
Zachary Turner | 7e3050c | 2017-03-20 23:54:26 | [diff] [blame] | 183 | |
| 184 | expand(path_queue, path_lens, cycles, src_map) |
| 185 | |
David Spickett | 602e47c | 2023-09-14 07:54:02 | [diff] [blame] | 186 | average = sum([len(x) + 1 for x in cycles]) / len(cycles) |
Zachary Turner | 7e3050c | 2017-03-20 23:54:26 | [diff] [blame] | 187 | |
Pavel Labath | a1ff820 | 2018-12-06 10:27:38 | [diff] [blame] | 188 | print("Found {} cycles. Average cycle length = {}.".format(len(cycles), average)) |
Zachary Turner | bbd1722 | 2017-03-22 18:23:14 | [diff] [blame] | 189 | counted = list(iter_cycles(cycles)) |
Zachary Turner | 4dbf9fa | 2017-03-21 22:46:46 | [diff] [blame] | 190 | if args.show_counts: |
David Spickett | 602e47c | 2023-09-14 07:54:02 | [diff] [blame] | 191 | counted.sort(key=lambda A: A[0]) |
| 192 | for total, smallest, cycle in counted: |
Zachary Turner | 4dbf9fa | 2017-03-21 22:46:46 | [diff] [blame] | 193 | sys.stdout.write("{} deps to break: ".format(total)) |
| 194 | sys.stdout.write(cycle[0][0]) |
David Spickett | 602e47c | 2023-09-14 07:54:02 | [diff] [blame] | 195 | for first, count, last in cycle: |
Zachary Turner | 4dbf9fa | 2017-03-21 22:46:46 | [diff] [blame] | 196 | sys.stdout.write(" [{}->] {}".format(count, last)) |
| 197 | sys.stdout.write("\n") |
| 198 | else: |
| 199 | for cycle in cycles: |
| 200 | cycle.append(cycle[0]) |
Pavel Labath | a1ff820 | 2018-12-06 10:27:38 | [diff] [blame] | 201 | print(" -> ".join(cycle)) |
Zachary Turner | 84a6218 | 2017-03-22 18:04:20 | [diff] [blame] | 202 | |
Pavel Labath | a1ff820 | 2018-12-06 10:27:38 | [diff] [blame] | 203 | print("Analyzing islands...") |
Zachary Turner | 84a6218 | 2017-03-22 18:04:20 | [diff] [blame] | 204 | islands = [] |
Zachary Turner | bbd1722 | 2017-03-22 18:23:14 | [diff] [blame] | 205 | outgoing_counts = defaultdict(int) |
| 206 | incoming_counts = defaultdict(int) |
David Spickett | 602e47c | 2023-09-14 07:54:02 | [diff] [blame] | 207 | for total, smallest, cycle in counted: |
| 208 | for first, count, last in cycle: |
Zachary Turner | bbd1722 | 2017-03-22 18:23:14 | [diff] [blame] | 209 | outgoing_counts[first] += count |
| 210 | incoming_counts[last] += count |
Zachary Turner | 84a6218 | 2017-03-22 18:04:20 | [diff] [blame] | 211 | for cycle in cycles: |
| 212 | this_cycle = set(cycle) |
| 213 | disjoints = [x for x in islands if this_cycle.isdisjoint(x)] |
| 214 | overlaps = [x for x in islands if not this_cycle.isdisjoint(x)] |
| 215 | islands = disjoints + [set.union(this_cycle, *overlaps)] |
Pavel Labath | a1ff820 | 2018-12-06 10:27:38 | [diff] [blame] | 216 | print("Found {} disjoint cycle islands...".format(len(islands))) |
Zachary Turner | 84a6218 | 2017-03-22 18:04:20 | [diff] [blame] | 217 | for island in islands: |
Pavel Labath | a1ff820 | 2018-12-06 10:27:38 | [diff] [blame] | 218 | print("Island ({} elements)".format(len(island))) |
Zachary Turner | bbd1722 | 2017-03-22 18:23:14 | [diff] [blame] | 219 | sorted = [] |
Zachary Turner | 84a6218 | 2017-03-22 18:04:20 | [diff] [blame] | 220 | for node in island: |
Zachary Turner | bbd1722 | 2017-03-22 18:23:14 | [diff] [blame] | 221 | sorted.append((node, incoming_counts[node], outgoing_counts[node])) |
David Spickett | 602e47c | 2023-09-14 07:54:02 | [diff] [blame] | 222 | sorted.sort(key=lambda x: x[1] + x[2]) |
| 223 | for node, inc, outg in sorted: |
Pavel Labath | a1ff820 | 2018-12-06 10:27:38 | [diff] [blame] | 224 | print(" {} [{} in, {} out]".format(node, inc, outg)) |
Zachary Turner | 4dbf9fa | 2017-03-21 22:46:46 | [diff] [blame] | 225 | sys.stdout.flush() |
Pavel Labath | 0e44cff | 2017-06-29 13:02:15 | [diff] [blame] | 226 | pass |