blob: cdf9b0d4d14804df775d1732fc35acc92852355e [file] [log] [blame]
skym1ce6ac502016-10-05 00:26:451#!/usr/bin/env python
2# Copyright 2016 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""This script will search through the target folder specified and try to find
7duplicate includes from h and cc files, and remove them from the cc files. The
8current/working directory needs to be chromium_checkout/src/ when this tool is
9run.
10
11Usage: remove_duplicate_includes.py --dry-run components/foo components/bar
12"""
13
14import argparse
15import collections
16import logging
17import os
18import re
19import sys
20
21# This could be generalized if desired, and moved to command line arguments.
22H_FILE_SUFFIX = '.h'
23CC_FILE_SUFFIX = '.cc'
24
25# The \s should allow us to ignore any whitespace and only focus on the group
26# captured when comparing between files.
27INCLUDE_REGEX = re.compile('^\s*(#include\s+[\"<](.*?)[\">])\s*$')
28
29def HasSuffix(file_name, suffix):
30 return os.path.splitext(file_name)[1] == suffix
31
32def IsEmpty(line):
33 return not line.strip()
34
35def FindIncludeSet(input_lines, h_path_to_include_set, cc_file_name):
36 """Finds and returns the corresponding include set for the given .cc file.
37
38 This is done by finding the first include in the file and then trying to look
39 up an .h file in the passed in map. If not present, then None is returned
40 immediately.
41 """
42 for line in input_lines:
43 match = INCLUDE_REGEX.search(line)
44 # The first include match should be the corresponding .h file, else skip.
45 if match:
46 h_file_path = os.path.join(os.getcwd(), match.group(2))
47 if h_file_path not in h_path_to_include_set:
48 print 'First include did not match to a known .h file, skipping ' + \
49 cc_file_name + ', line: ' + match.group(1)
50 return None
51 return h_path_to_include_set[h_file_path]
52
53def WithoutDuplicates(input_lines, include_set, cc_file_name):
54 """Checks every input line and sees if we can remove it based on the contents
55 of the given include set.
56
57 Returns what the new contents of the file should be.
58 """
59 output_lines = []
60 # When a section of includes are completely removed, we want to remove the
61 # trailing empty as well.
62 lastCopiedLineWasEmpty = False
63 lastLineWasOmitted = False
64 for line in input_lines:
65 match = INCLUDE_REGEX.search(line)
66 if match and match.group(2) in include_set:
67 print 'Removed ' + match.group(1) + ' from ' + cc_file_name
68 lastLineWasOmitted = True
69 elif lastCopiedLineWasEmpty and lastLineWasOmitted and IsEmpty(line):
70 print 'Removed empty line from ' + cc_file_name
71 lastLineWasOmitted = True
72 else:
73 lastCopiedLineWasEmpty = IsEmpty(line)
74 lastLineWasOmitted = False
75 output_lines.append(line)
76 return output_lines
77
78def main():
79 parser = argparse.ArgumentParser()
80 parser.add_argument('--dry-run', action='store_true',
81 help='Does not actually remove lines when specified.')
82 parser.add_argument('targets', nargs='+',
83 help='Relative path to folders to search for duplicate includes in.')
84 args = parser.parse_args()
85
86 # A map of header file paths to the includes they contain.
87 h_path_to_include_set = {}
88
89 # Simply collects the path of all cc files present.
90 cc_file_path_set = set()
91
92 for relative_root in args.targets:
93 absolute_root = os.path.join(os.getcwd(), relative_root)
94 for dir_path, dir_name_list, file_name_list in os.walk(absolute_root):
95 for file_name in file_name_list:
96 file_path = os.path.join(dir_path, file_name)
97 if HasSuffix(file_name, H_FILE_SUFFIX):
98 # By manually adding the set instead of using defaultdict we can avoid
99 # warning about missing .h files when the .h file has no includes.
100 h_path_to_include_set[file_path] = set()
101 with open(file_path) as fh:
102 for line in fh:
103 match = INCLUDE_REGEX.search(line)
104 if match:
105 h_path_to_include_set[file_path].add(match.group(2))
106 elif HasSuffix(file_name, CC_FILE_SUFFIX):
107 cc_file_path_set.add(file_path)
108
109 for cc_file_path in cc_file_path_set:
110 cc_file_name = os.path.basename(cc_file_path)
111 with open(cc_file_path, 'r' if args.dry_run else 'r+') as fh:
112 # Read out all lines and reset file position to allow overwriting.
113 input_lines = fh.readlines()
114 fh.seek(0)
115 include_set = FindIncludeSet(input_lines, h_path_to_include_set,
116 cc_file_name)
117 if include_set:
118 output_lines = WithoutDuplicates(input_lines, include_set, cc_file_name)
119 if not args.dry_run:
120 fh.writelines(output_lines)
121 fh.truncate()
122
123if __name__ == '__main__':
124 sys.exit(main())