Tien Mai | 597a0d1f | 2020-02-18 16:01:25 | [diff] [blame] | 1 | #!/usr/bin/env python |
Tien Mai | f9a36f3a | 2020-06-19 21:28:25 | [diff] [blame] | 2 | # Copyright 2020 The Chromium Authors. All rights reserved. |
Tien Mai | 597a0d1f | 2020-02-18 16:01:25 | [diff] [blame] | 3 | # Use of this source code is governed by a BSD-style license that can be |
| 4 | # found in the LICENSE file. |
Tien Mai | 597a0d1f | 2020-02-18 16:01:25 | [diff] [blame] | 5 | """Transform CBCM Takeout API Data (Python2).""" |
| 6 | |
| 7 | from __future__ import print_function |
| 8 | |
| 9 | import argparse |
| 10 | import csv |
| 11 | import json |
| 12 | import sys |
| 13 | |
| 14 | import google_auth_httplib2 |
| 15 | |
| 16 | from httplib2 import Http |
| 17 | from google.oauth2.service_account import Credentials |
| 18 | |
| 19 | |
| 20 | def ComputeExtensionsList(extensions_list, data): |
| 21 | """Computes list of machines that have an extension. |
| 22 | |
| 23 | This sample function processes the |data| retrieved from the Takeout API and |
| 24 | calculates the list of machines that have installed each extension listed in |
| 25 | the data. |
| 26 | |
| 27 | Args: |
| 28 | extensions_list: the extension list dictionary to fill. |
| 29 | data: the data fetched from the Takeout API. |
| 30 | """ |
| 31 | for device in data['browsers']: |
| 32 | if 'browsers' not in device: |
| 33 | continue |
| 34 | for browser in device['browsers']: |
| 35 | if 'profiles' not in browser: |
| 36 | continue |
| 37 | for profile in browser['profiles']: |
| 38 | if 'extensions' not in profile: |
| 39 | continue |
| 40 | for extension in profile['extensions']: |
| 41 | key = extension['extensionId'] |
| 42 | if 'version' in extension: |
| 43 | key = key + ' @ ' + extension['version'] |
| 44 | if key not in extensions_list: |
| 45 | current_extension = { |
Tien Mai | f9a36f3a | 2020-06-19 21:28:25 | [diff] [blame] | 46 | 'name': extension.get('name', ''), |
| 47 | 'permissions': extension.get('permissions', ''), |
| 48 | 'installed': set(), |
| 49 | 'disabled': set(), |
| 50 | 'forced': set() |
Tien Mai | 597a0d1f | 2020-02-18 16:01:25 | [diff] [blame] | 51 | } |
| 52 | else: |
| 53 | current_extension = extensions_list[key] |
| 54 | |
| 55 | machine_name = device['machineName'] |
| 56 | current_extension['installed'].add(machine_name) |
Tien Mai | f9a36f3a | 2020-06-19 21:28:25 | [diff] [blame] | 57 | if extension.get('installType', '') == 'ADMIN': |
Tien Mai | 597a0d1f | 2020-02-18 16:01:25 | [diff] [blame] | 58 | current_extension['forced'].add(machine_name) |
Tien Mai | f9a36f3a | 2020-06-19 21:28:25 | [diff] [blame] | 59 | if extension.get('disabled', False): |
Tien Mai | 597a0d1f | 2020-02-18 16:01:25 | [diff] [blame] | 60 | current_extension['disabled'].add(machine_name) |
| 61 | |
| 62 | extensions_list[key] = current_extension |
| 63 | |
Tien Mai | f9a36f3a | 2020-06-19 21:28:25 | [diff] [blame] | 64 | |
Tien Mai | 9b1a46b8 | 2020-03-16 21:54:10 | [diff] [blame] | 65 | def ToUtf8(data): |
| 66 | """Ensures all the values in |data| are encoded as UTF-8. |
| 67 | |
| 68 | Expects |data| to be a list of dict objects. |
| 69 | |
| 70 | Args: |
| 71 | data: the data to be converted to UTF-8. |
| 72 | |
| 73 | Yields: |
| 74 | A list of dict objects whose values have been encoded as UTF-8. |
| 75 | """ |
| 76 | for entry in data: |
| 77 | for prop, value in entry.iteritems(): |
| 78 | entry[prop] = unicode(value).encode('utf-8') |
| 79 | yield entry |
Tien Mai | 597a0d1f | 2020-02-18 16:01:25 | [diff] [blame] | 80 | |
Tien Mai | f9a36f3a | 2020-06-19 21:28:25 | [diff] [blame] | 81 | |
Tien Mai | 597a0d1f | 2020-02-18 16:01:25 | [diff] [blame] | 82 | def DictToList(data, key_name='id'): |
| 83 | """Converts a dict into a list. |
| 84 | |
| 85 | The value of each member of |data| must also be a dict. The original key for |
| 86 | the value will be inlined into the value, under the |key_name| key. |
| 87 | |
| 88 | Args: |
| 89 | data: a dict where every value is a dict |
| 90 | key_name: the name given to the key that is inlined into the dict's values |
| 91 | |
| 92 | Yields: |
| 93 | The values from |data|, with each value's key inlined into the value. |
| 94 | """ |
| 95 | assert isinstance(data, dict), '|data| must be a dict' |
| 96 | for key, value in data.items(): |
| 97 | assert isinstance(value, dict), '|value| must contain dict items' |
| 98 | value[key_name] = key |
| 99 | yield value |
| 100 | |
| 101 | |
Tien Mai | 8bcfbfc | 2020-06-30 16:18:16 | [diff] [blame] | 102 | def Flatten(data, all_columns): |
Tien Mai | 597a0d1f | 2020-02-18 16:01:25 | [diff] [blame] | 103 | """Flattens lists inside |data|, one level deep. |
| 104 | |
| 105 | This function will flatten each dictionary key in |data| into a single row |
| 106 | so that it can be written to a CSV file. |
| 107 | |
| 108 | Args: |
| 109 | data: the data to be flattened. |
Tien Mai | 8bcfbfc | 2020-06-30 16:18:16 | [diff] [blame] | 110 | all_columns: set of all columns that are found in the result (this will be |
| 111 | filled by the function). |
Tien Mai | 597a0d1f | 2020-02-18 16:01:25 | [diff] [blame] | 112 | |
| 113 | Yields: |
| 114 | A list of dict objects whose lists or sets have been flattened. |
| 115 | """ |
Tien Mai | 8bcfbfc | 2020-06-30 16:18:16 | [diff] [blame] | 116 | SEPARATOR = ', ' |
| 117 | |
| 118 | # Max length of a cell in Excel is technically 32767 characters but if we get |
| 119 | # too close to this limit Excel seems to create weird results when we open |
| 120 | # the CSV file. To protect against this, give a little more buffer to the max |
| 121 | # characters. |
| 122 | MAX_CELL_LENGTH = 32700 |
| 123 | |
Tien Mai | 597a0d1f | 2020-02-18 16:01:25 | [diff] [blame] | 124 | for item in data: |
Tien Mai | 8bcfbfc | 2020-06-30 16:18:16 | [diff] [blame] | 125 | added_item = {} |
Tien Mai | 597a0d1f | 2020-02-18 16:01:25 | [diff] [blame] | 126 | for prop, value in item.items(): |
Tien Mai | 8bcfbfc | 2020-06-30 16:18:16 | [diff] [blame] | 127 | # Non-container properties can be added directly. |
| 128 | if not isinstance(value, (list, set)): |
| 129 | added_item[prop] = value |
| 130 | continue |
| 131 | |
| 132 | # Otherwise join the container together into a single cell. |
| 133 | num_prop = 'num_' + prop |
| 134 | added_item[num_prop] = len(value) |
| 135 | |
| 136 | # For long lists, the cell contents may go over MAX_CELL_LENGTH, so |
| 137 | # split the list into chunks that will fit into MAX_CELL_LENGTH. |
| 138 | flat_list = SEPARATOR.join(sorted(value)) |
| 139 | overflow_prop_index = 0 |
| 140 | while True: |
| 141 | current_column = prop |
| 142 | if overflow_prop_index: |
| 143 | current_column = prop + '_' + str(overflow_prop_index) |
| 144 | |
| 145 | flat_list_len = len(flat_list) |
| 146 | if flat_list_len > MAX_CELL_LENGTH: |
| 147 | last_separator = flat_list.rfind(SEPARATOR, 0, |
| 148 | MAX_CELL_LENGTH - flat_list_len) |
| 149 | if last_separator != -1: |
| 150 | added_item[current_column] = flat_list[0:last_separator] |
| 151 | flat_list = flat_list[last_separator + 2:] |
| 152 | overflow_prop_index = overflow_prop_index + 1 |
| 153 | continue |
| 154 | |
| 155 | # Fall-through case where no more splitting is possible, this is the |
| 156 | # lass cell to add for this list. |
| 157 | added_item[current_column] = flat_list |
| 158 | break |
Tien Mai | 597a0d1f | 2020-02-18 16:01:25 | [diff] [blame] | 159 | |
| 160 | assert isinstance( |
Tien Mai | 8bcfbfc | 2020-06-30 16:18:16 | [diff] [blame] | 161 | added_item[prop], |
Tien Mai | 597a0d1f | 2020-02-18 16:01:25 | [diff] [blame] | 162 | (int, bool, str, unicode)), ('unexpected type for item: %s' % |
Tien Mai | 8bcfbfc | 2020-06-30 16:18:16 | [diff] [blame] | 163 | type(added_item[prop]).__name__) |
Tien Mai | 597a0d1f | 2020-02-18 16:01:25 | [diff] [blame] | 164 | |
Tien Mai | 8bcfbfc | 2020-06-30 16:18:16 | [diff] [blame] | 165 | all_columns.update(added_item.keys()) |
| 166 | yield added_item |
Tien Mai | 597a0d1f | 2020-02-18 16:01:25 | [diff] [blame] | 167 | |
| 168 | |
| 169 | def ExtensionListAsCsv(extensions_list, csv_filename, sort_column='name'): |
| 170 | """Saves an extensions list to a CSV file. |
| 171 | |
| 172 | Args: |
| 173 | extensions_list: an extensions list as returned by ComputeExtensionsList |
| 174 | csv_filename: the name of the CSV file to save |
| 175 | sort_column: the name of the column by which to sort the data |
| 176 | """ |
Tien Mai | 8bcfbfc | 2020-06-30 16:18:16 | [diff] [blame] | 177 | all_columns = set() |
| 178 | flattened_list = [ |
| 179 | x for x in ToUtf8(Flatten(DictToList(extensions_list), all_columns)) |
| 180 | ] |
Tien Mai | 597a0d1f | 2020-02-18 16:01:25 | [diff] [blame] | 181 | desired_column_order = [ |
| 182 | 'id', 'name', 'num_permissions', 'num_installed', 'num_disabled', |
| 183 | 'num_forced', 'permissions', 'installed', 'disabled', 'forced' |
| 184 | ] |
| 185 | |
| 186 | # Order the columns as desired. Columns other than those in |
| 187 | # |desired_column_order| will be in an unspecified order after these columns. |
Tien Mai | 8bcfbfc | 2020-06-30 16:18:16 | [diff] [blame] | 188 | ordered_fieldnames = [] |
| 189 | for c in desired_column_order: |
| 190 | matching_columns = [] |
| 191 | for f in all_columns: |
| 192 | if f == c or f.startswith(c): |
| 193 | matching_columns.append(f) |
| 194 | ordered_fieldnames.extend(sorted(matching_columns)) |
Tien Mai | 597a0d1f | 2020-02-18 16:01:25 | [diff] [blame] | 195 | |
| 196 | ordered_fieldnames.extend( |
| 197 | [x for x in desired_column_order if x not in ordered_fieldnames]) |
| 198 | with open(csv_filename, mode='w') as csv_file: |
| 199 | writer = csv.DictWriter(csv_file, fieldnames=ordered_fieldnames) |
| 200 | writer.writeheader() |
| 201 | for row in sorted(flattened_list, key=lambda ext: ext[sort_column]): |
| 202 | writer.writerow(row) |
| 203 | |
| 204 | |
| 205 | def main(args): |
| 206 | # Load the json format key that you downloaded from the Google API |
| 207 | # Console when you created your service account. For p12 keys, use the |
| 208 | # from_p12_keyfile method of ServiceAccountCredentials and specify the |
| 209 | # service account email address, p12 keyfile, and scopes. |
| 210 | service_credentials = Credentials.from_service_account_file( |
| 211 | args.service_account_key_path, |
| 212 | scopes=[ |
Tien Mai | 41367aa | 2020-03-12 14:24:24 | [diff] [blame] | 213 | 'https://www.googleapis.com/auth/admin.directory.device.chromebrowsers.readonly' |
Tien Mai | 597a0d1f | 2020-02-18 16:01:25 | [diff] [blame] | 214 | ], |
| 215 | subject=args.admin_email) |
| 216 | |
| 217 | try: |
| 218 | http = google_auth_httplib2.AuthorizedHttp(service_credentials, http=Http()) |
| 219 | extensions_list = {} |
| 220 | base_request_url = 'https://admin.googleapis.com/admin/directory/v1.1beta1/customer/my_customer/devices/chromebrowsers' |
| 221 | request_parameters = '' |
| 222 | browsers_processed = 0 |
| 223 | while True: |
| 224 | print('Making request to server ...') |
Tien Mai | f9a36f3a | 2020-06-19 21:28:25 | [diff] [blame] | 225 | retrycount = 0 |
| 226 | while retrycount < 5: |
| 227 | data = json.loads( |
| 228 | http.request(base_request_url + '?' + request_parameters, 'GET')[1]) |
| 229 | |
| 230 | if 'browsers' not in data: |
| 231 | print('Response error, retrying...') |
| 232 | time.sleep(3) |
| 233 | retrycount += 1 |
| 234 | else: |
| 235 | break |
Tien Mai | 597a0d1f | 2020-02-18 16:01:25 | [diff] [blame] | 236 | |
| 237 | browsers_in_data = len(data['browsers']) |
| 238 | print('Request returned %s results, analyzing ...' % (browsers_in_data)) |
| 239 | ComputeExtensionsList(extensions_list, data) |
| 240 | browsers_processed += browsers_in_data |
| 241 | |
| 242 | if 'nextPageToken' not in data or not data['nextPageToken']: |
| 243 | break |
| 244 | |
| 245 | print('%s browsers processed.' % (browsers_processed)) |
| 246 | |
| 247 | if (args.max_browsers_to_process is not None and |
| 248 | args.max_browsers_to_process <= browsers_processed): |
| 249 | print('Stopping at %s browsers processed.' % (browsers_processed)) |
| 250 | break |
| 251 | |
| 252 | request_parameters = ('pageToken={}').format(data['nextPageToken']) |
| 253 | finally: |
| 254 | print('Analyze results ...') |
| 255 | ExtensionListAsCsv(extensions_list, args.extension_list_csv) |
| 256 | print("Results written to '%s'" % (args.extension_list_csv)) |
| 257 | |
| 258 | |
| 259 | if __name__ == '__main__': |
| 260 | parser = argparse.ArgumentParser(description='CBCM Extension Analyzer') |
| 261 | parser.add_argument( |
| 262 | '-k', |
| 263 | '--service_account_key_path', |
| 264 | metavar='FILENAME', |
| 265 | required=True, |
| 266 | help='The service account key file used to make API requests.') |
| 267 | parser.add_argument( |
| 268 | '-a', |
| 269 | '--admin_email', |
| 270 | required=True, |
| 271 | help='The admin user used to make the API requests.') |
| 272 | parser.add_argument( |
| 273 | '-x', |
| 274 | '--extension_list_csv', |
| 275 | metavar='FILENAME', |
| 276 | default='./extension_list.csv', |
| 277 | help='Generate an extension list to the specified CSV ' |
| 278 | 'file') |
| 279 | parser.add_argument( |
| 280 | '-m', |
| 281 | '--max_browsers_to_process', |
| 282 | type=int, |
| 283 | help='Maximum number of browsers to process. (Must be > 0).') |
| 284 | args = parser.parse_args() |
| 285 | |
| 286 | if (args.max_browsers_to_process is not None and |
| 287 | args.max_browsers_to_process <= 0): |
| 288 | print('max_browsers_to_process must be > 0.') |
| 289 | parser.print_help() |
| 290 | sys.exit(1) |
| 291 | |
| 292 | main(args) |