cygprofile: Improve orderfile comparison.
This CL adds the "fractional average distance" to examine ordering stability.
Also makes it compatible with orderfiles with symbols.
For instance, for a regular roll:
$ tools/cygprofile/compare_orderfiles.py --from-commit f9062ad5c6383ae959f1c80d20099375a2be745e
[...]
Symbols count:
first: 417918
second: 418657
New symbols = 1004
Removed symbols = 265
Average fractional distance = 0.15%
And a significant one:
$ ./tools/cygprofile/compare_orderfiles.py --from-commit 4c1e7a85c8bac01731203990fd4b66a7b5b19dd9
[...]
Symbols count:
first: 428730
second: 190074
New symbols = 16952
Removed symbols = 255608
Average fractional distance = 26.53%
Note that the distance is insensitive to the relative sizes of the
orderfiles, only to the relative ordering of the intersection.
Change-Id: I188684ef7a0bb545e6a55bc335d432dd6690a203
Reviewed-on: https://chromium-review.googlesource.com/876088
Commit-Queue: Benoit L <[email protected]>
Reviewed-by: Matthew Cary <[email protected]>
Cr-Commit-Position: refs/heads/master@{#539814}
diff --git a/tools/cygprofile/compare_orderfiles.py b/tools/cygprofile/compare_orderfiles.py
index 3cf2934..b110d8e 100755
--- a/tools/cygprofile/compare_orderfiles.py
+++ b/tools/cygprofile/compare_orderfiles.py
@@ -10,6 +10,7 @@
"""
import argparse
+import collections
import logging
import os
import subprocess
@@ -32,6 +33,10 @@
lines = [line.strip() for line in f]
for entry in lines:
+ # Keep only sections, not symbols (symbols don't contain '.').
+ # We could only keep symbols, but then old orderfiles would not be parsed.
+ if '.' not in entry:
+ continue
# Example: .text.startup.BLA
symbol_name = entry[entry.rindex('.'):]
if symbol_name in already_seen or symbol_name == '*' or entry == '.text':
@@ -41,12 +46,32 @@
return symbols
+def CommonSymbolsToOrder(symbols, common_symbols):
+ """Returns s -> index for all s in common_symbols."""
+ result = {}
+ index = 0
+ for s in symbols:
+ if s not in common_symbols:
+ continue
+ result[s] = index
+ index += 1
+ return result
+
+
+CompareResult = collections.namedtuple(
+ 'CompareResult', ('first_count', 'second_count',
+ 'new_count', 'removed_count',
+ 'average_fractional_distance'))
+
def Compare(first_filename, second_filename):
"""Outputs a comparison of two orderfiles to stdout.
Args:
first_filename: (str) First orderfile.
second_filename: (str) Second orderfile.
+
+ Returns:
+ An instance of CompareResult.
"""
first_symbols = ParseOrderfile(first_filename)
second_symbols = ParseOrderfile(second_filename)
@@ -56,8 +81,22 @@
second_symbols = set(second_symbols)
new_symbols = second_symbols - first_symbols
removed_symbols = first_symbols - second_symbols
+ common_symbols = first_symbols & second_symbols
+ # Distance between orderfiles.
+ first_to_ordering = CommonSymbolsToOrder(first_symbols, common_symbols)
+ second_to_ordering = CommonSymbolsToOrder(second_symbols, common_symbols)
+ total_distance = sum(abs(first_to_ordering[s] - second_to_ordering[s])\
+ for s in first_to_ordering)
+ # Each distance is in [0, len(common_symbols)] and there are
+ # len(common_symbols) entries, hence the normalization.
+ average_fractional_distance = float(total_distance) / (len(common_symbols)**2)
print 'New symbols = %d' % len(new_symbols)
print 'Removed symbols = %d' % len(removed_symbols)
+ print 'Average fractional distance = %.2f%%' % (
+ 100. * average_fractional_distance)
+ return CompareResult(len(first_symbols), len(second_symbols),
+ len(new_symbols), len(removed_symbols),
+ average_fractional_distance)
def CheckOrderfileCommit(commit_hash, clank_path):
@@ -70,7 +109,8 @@
output = subprocess.check_output(
['git', 'show', r'--format=%an %s', commit_hash], cwd=clank_path)
first_line = output.split('\n')[0]
- assert first_line == 'clank-autoroller Update Orderfile.', (
+ # Capitalization changed at some point.
+ assert first_line.upper() == 'clank-autoroller Update Orderfile.'.upper(), (
'Not an orderfile commit')
@@ -129,6 +169,7 @@
parser.add_argument('--second', help='Second orderfile')
parser.add_argument('--from-commit', help='Analyze the difference in the '
'orderfile from an orderfile bot commit.')
+ parser.add_argument('--csv-output', help='Appends the result to a CSV file.')
return parser
@@ -143,7 +184,11 @@
first, second = GetOrderfilesFromCommit(args.from_commit)
try:
logging.info('Comparing the orderfiles')
- Compare(first, second)
+ result = Compare(first, second)
+ if args.csv_output:
+ with open(args.csv_output, 'a') as f:
+ f.write('%s,%d,%d,%d,%d,%f\n' % tuple(
+ [args.from_commit] + list(result)))
finally:
os.remove(first)
os.remove(second)