blob: e4a07b6ec66a084cb8041e1758700cd57cfcda19 [file] [log] [blame]
agrieve142e2752016-09-12 14:36:211#!/usr/bin/env python
smaierb6dc58c2016-06-13 22:14:442# Copyright 2016 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5"""Tool to diff 2 dex files that have been proguarded.
6
7To use this tool, first get dextra. http://newandroidbook.com/tools/dextra.html
agrieve07bf88a2016-09-30 00:13:458Then invoke script like:
9
10 PATH=$PATH:/path/to/dextra dexdiffer.py --old classes1.dex --new classes2.dex
11
12apks files may be used as well.
smaierb6dc58c2016-06-13 22:14:4413"""
14
15import argparse
agrieve07bf88a2016-09-30 00:13:4516import errno
smaierb6dc58c2016-06-13 22:14:4417import re
agrieve07bf88a2016-09-30 00:13:4518import subprocess
smaierb6dc58c2016-06-13 22:14:4419import sys
agrieve07bf88a2016-09-30 00:13:4520import tempfile
21import zipfile
smaierb6dc58c2016-06-13 22:14:4422
23
24_QUALIFIERS = set(['public', 'protected', 'private', 'final', 'static',
25 'abstract', 'volatile', 'native', 'enum'])
26
27
28def _IsNewClass(line):
29 return line.endswith(':')
30
31
32# Expects lines like one of these 3:
33# 'android.support.v8.MenuPopupHelper -> android.support.v8.v:'
34# ' android.view.LayoutInflater mInflater -> d'
35# ' 117:118:void setForceShowIcon(boolean) -> b'
36# Those three examples would return
37# 'android.support.v8.MenuPopupHelper', 'android.support.v8.v'
38# 'android.view.LayoutInflater mInflater', 'android.view.LayoutInflater d'
39# 'void setForceShowIcon(boolean)', 'void b(boolean)'
40def _ParseMappingLine(line):
41 line = line.rstrip(':')
42
43 # Stripping any line number denotations
44 line = re.sub(r'\d+:\d+:', '', line)
45 line = re.sub(r'\):\d+', ')', line)
46
47 original_name, new_name = line.split(' -> ')
48
49 type_string = ''
50 if ' ' in original_name:
51 type_string = original_name[:original_name.find(' ') + 1]
52
53 arguments_string = ''
54 match = re.search(r'(\(.*?\))', original_name)
55 if match:
56 arguments_string = match.group(1)
57
58 return original_name, type_string + new_name + arguments_string
59
60
61def _ReadMappingDict(mapping_file):
62 mapping = {}
63 renamed_class_name = ''
64 original_class_name = ''
smaier58d015042016-08-05 21:18:1665 current_entry = []
smaierb6dc58c2016-06-13 22:14:4466 for line in mapping_file:
67 line = line.strip()
68 if _IsNewClass(line):
69 if renamed_class_name:
70 mapping[renamed_class_name] = current_entry
71
72 member_mappings = {}
73 original_class_name, renamed_class_name = _ParseMappingLine(line)
74 current_entry = [original_class_name, member_mappings]
75 else:
76 original_member_name, renamed_member_name = _ParseMappingLine(line)
77 member_mappings[renamed_member_name] = original_member_name
78
smaier58d015042016-08-05 21:18:1679 if current_entry and renamed_class_name:
80 mapping[renamed_class_name] = current_entry
smaierb6dc58c2016-06-13 22:14:4481 return mapping
82
83
84def _StripComments(string):
85 # Remove all occurances of multiline comments (/*COMMENT*/)
86 string = re.sub(r'/\*.*?\*/', "", string, flags=re.DOTALL)
87 # Remove all occurances of single line comments (//COMMENT)
88 string = re.sub(r'//.*?$', "", string)
89 return string
90
91
92def _StripQuotes(string):
93 return re.sub(r'([\'"]).*?\1', '', string)
94
95
96def _RemoveQualifiers(string_tokens):
97 while string_tokens and string_tokens[0] in _QUALIFIERS:
98 string_tokens = string_tokens[1:]
99 return string_tokens
100
101
102def _GetLineTokens(line):
103 line = _StripComments(line)
104 # Match all alphanumeric + underscore with \w then cases for:
105 # '$', '<', '>', '{', '}', '[', ']', and '.'
106 tokens = re.findall(r'[\w\$\.<>\{\}\[\]]+', line)
107 return _RemoveQualifiers(tokens)
108
109
110def _IsClassDefinition(line_tokens):
111 return line_tokens and line_tokens[0] == 'class'
112
113
114def _IsEndOfClass_definition(line_tokens):
115 return line_tokens and line_tokens[-1] == '{'
116
117
118def _IsEndOfClass(line_tokens):
119 return line_tokens and line_tokens[-1] == '}'
120
121
122def _TypeLookup(renamed_type, mapping_dict):
123 renamed_type_stripped = renamed_type.strip('[]')
124 postfix = renamed_type.replace(renamed_type_stripped, '')
125
126 if renamed_type_stripped in mapping_dict:
127 real_type = mapping_dict[renamed_type_stripped][0]
128 else:
129 real_type = renamed_type_stripped
130
131 return real_type + postfix
132
133
134def _GetMemberIdentifier(line_tokens, mapping_dict, renamed_class_name,
135 is_function):
136 assert len(line_tokens) > 1
smaier58d015042016-08-05 21:18:16137 if mapping_dict:
138 assert renamed_class_name in mapping_dict
139 mapping_entry = mapping_dict[renamed_class_name][1]
smaierb6dc58c2016-06-13 22:14:44140
141 renamed_type = line_tokens[0]
142 real_type = _TypeLookup(renamed_type, mapping_dict)
143
144 renamed_name_token = line_tokens[1]
145 renamed_name_token, _, _ = renamed_name_token.partition('=')
146
147 function_args = ''
148 if is_function:
149 function_args += '('
150 for token in line_tokens[2:]:
151 function_args += _TypeLookup(token, mapping_dict) + ','
152 # Remove trailing ','
153 function_args = function_args.rstrip(',')
154 function_args += ')'
155
156 renamed_member_identifier = (real_type + ' ' + renamed_name_token
157 + function_args)
smaier58d015042016-08-05 21:18:16158
159 if not mapping_dict:
160 return renamed_member_identifier
161
smaierb6dc58c2016-06-13 22:14:44162 if renamed_member_identifier not in mapping_entry:
163 print 'Proguarded class which caused the issue:', renamed_class_name
164 print 'Key supposed to be in this dict:', mapping_entry
165 print 'Definition line tokens:', line_tokens
166
167 # This will be the real type + real_identifier + any real function args (if
168 # applicable)
169 return mapping_entry[renamed_member_identifier]
170
171
172def _GetClassNames(line_tokens, mapping_dict):
173 assert len(line_tokens) > 1
smaier58d015042016-08-05 21:18:16174 if not mapping_dict:
175 return line_tokens[1], line_tokens[1]
smaierb6dc58c2016-06-13 22:14:44176 assert line_tokens[1] in mapping_dict
177 return line_tokens[1], mapping_dict[line_tokens[1]][0]
178
179
180def _IsLineFunctionDefinition(line):
181 line = _StripComments(line)
182 line = _StripQuotes(line)
183 return line.find('(') > 0 and line.find(')') > 0
184
185
186# Expects data from dextra -j -m -f
187# Returns dictionary mapping class name to list of members
188def _BuildMappedDexDict(dextra_file, mapping_dict):
189 # Have to add 'bool' -> 'boolean' mapping in dictionary, since for some reason
190 # dextra shortens boolean to bool.
smaier58d015042016-08-05 21:18:16191 if mapping_dict:
192 mapping_dict['bool'] = ['boolean', {}]
smaierb6dc58c2016-06-13 22:14:44193 dex_dict = {}
194 current_entry = []
195 reading_class_header = True
196 unmatched_string = False
197
198 for line in dextra_file:
199 # Accounting for multi line strings
200 if line.count('"') % 2:
201 unmatched_string = not unmatched_string
202 continue
203 if unmatched_string:
204 continue
205
206 line_tokens = _GetLineTokens(line)
207 if _IsClassDefinition(line_tokens):
208 reading_class_header = True
209 renamed_class_name, real_class_name = _GetClassNames(line_tokens,
210 mapping_dict)
211 if _IsEndOfClass_definition(line_tokens):
212 reading_class_header = False
213 continue
214 if _IsEndOfClass(line_tokens):
215 dex_dict[real_class_name] = current_entry
216 current_entry = []
217 continue
218
219 if not reading_class_header and line_tokens:
220 is_function = _IsLineFunctionDefinition(line)
221 member = _GetMemberIdentifier(line_tokens, mapping_dict,
222 renamed_class_name, is_function)
223 current_entry.append(member)
224
225 return dex_dict
226
227
228def _DiffDexDicts(dex_base, dex_new):
229 diffs = []
230 for key, base_class_members in dex_base.iteritems():
231 if key in dex_new:
232 # Class in both
233 base_class_members_set = set(base_class_members)
234 # Removing from dex_new to have just those which only appear in dex_new
235 # left over.
236 new_class_members_set = set(dex_new.pop(key))
237 if base_class_members_set == new_class_members_set:
238 continue
239 else:
240 # They are not equal
241 diff_string = key
242 for diff in base_class_members_set.difference(new_class_members_set):
243 # Base has stuff the new one doesn't
244 diff_string += '\n' + '- ' + diff
245 for diff in new_class_members_set.difference(base_class_members_set):
246 # New has stuff the base one doesn't
247 diff_string += '\n' + '+ ' + diff
248 diffs.append(diff_string)
249 else:
250 # Class not found in new
251 diff_string = '-class ' + key
252 diffs.append(diff_string)
253 if dex_new:
254 # Classes in new that have yet to be hit by base
255 for key in dex_new:
256 diff_string = '+class ' + key
257 diffs.append(diff_string)
258
259 return diffs
260
261
agrieve07bf88a2016-09-30 00:13:45262def _RunDextraOnDex(dex_path):
263 try:
264 out = subprocess.check_output(
265 ['dextra.ELF64', '-j', '-f', '-m', dex_path])
266 return out.splitlines()
267 except OSError as e:
268 if e.errno == errno.ENOENT:
269 raise Exception('Ensure dextra.ELF64 is in your PATH')
270 raise
271
272
273def _RunDextra(dex_or_apk_path):
274 if dex_or_apk_path.endswith('.dex'):
275 return _RunDextraOnDex(dex_or_apk_path)
276
277 with tempfile.NamedTemporaryFile(suffix='.dex') as tmp_file:
278 with zipfile.ZipFile(dex_or_apk_path) as apk:
279 tmp_file.write(apk.read('classes.dex'))
280 tmp_file.flush()
281 return _RunDextraOnDex(tmp_file.name)
282
283
smaierb6dc58c2016-06-13 22:14:44284def main():
285 parser = argparse.ArgumentParser()
agrieve07bf88a2016-09-30 00:13:45286 parser.add_argument('--base-mapping-file',
smaierb6dc58c2016-06-13 22:14:44287 help='Mapping file from proguard output for base dex')
agrieve07bf88a2016-09-30 00:13:45288 parser.add_argument('--base-dextra-output',
smaierb6dc58c2016-06-13 22:14:44289 help='dextra -j -f -m output for base dex')
agrieve07bf88a2016-09-30 00:13:45290 parser.add_argument('--new-mapping-file',
smaierb6dc58c2016-06-13 22:14:44291 help='Mapping file from proguard output for new dex')
agrieve07bf88a2016-09-30 00:13:45292 parser.add_argument('--new-dextra-output',
smaierb6dc58c2016-06-13 22:14:44293 help='dextra -j -f -m output for new dex')
agrieve07bf88a2016-09-30 00:13:45294 parser.add_argument('--old',
295 help='Path to base apk / classes.dex')
296 parser.add_argument('--new',
297 help='Path to new apk / classes.dex')
smaierb6dc58c2016-06-13 22:14:44298 args = parser.parse_args()
299
agrieve07bf88a2016-09-30 00:13:45300 mapping_base = {}
301 mapping_new = {}
302 if args.base_mapping_file:
303 with open(args.base_mapping_file) as f:
304 mapping_base = _ReadMappingDict(f)
305 if args.new_mapping_file:
306 with open(args.new_mapping_file) as f:
307 mapping_new = _ReadMappingDict(f)
smaierb6dc58c2016-06-13 22:14:44308
agrieve07bf88a2016-09-30 00:13:45309 if args.base_dextra_output:
310 with open(args.base_dextra_output) as f:
311 dex_base = _BuildMappedDexDict(f, mapping_base)
312 else:
313 assert args.old, 'Must pass either --old or --base-dextra-output'
314 print 'Running dextra #1'
315 lines = _RunDextra(args.old)
316 dex_base = _BuildMappedDexDict(lines, mapping_base)
317 if args.new_dextra_output:
318 with open(args.new_dextra_output) as f:
319 dex_new = _BuildMappedDexDict(f, mapping_new)
320 else:
321 assert args.new, 'Must pass either --new or --new-dextra-output'
322 print 'Running dextra #2'
323 lines = _RunDextra(args.new)
324 dex_new = _BuildMappedDexDict(lines, mapping_base)
325
326 print 'Analyzing...'
smaierb6dc58c2016-06-13 22:14:44327 diffs = _DiffDexDicts(dex_base, dex_new)
328 if diffs:
329 for diff in diffs:
330 print diff
agrieve07bf88a2016-09-30 00:13:45331 sys.exit(1)
332 else:
333 class_count = len(dex_base)
334 method_count = sum(len(v) for v in dex_base.itervalues())
335 print ('No meaningful differences: '
336 'both have the same %d classes and %d methods.' %
337 (class_count, method_count))
smaierb6dc58c2016-06-13 22:14:44338
339
340if __name__ == '__main__':
341 main()
342