blob: 597a5647aa10811054b5baf1253d202b8c33d64c [file] [log] [blame]
Tien Mai597a0d1f2020-02-18 16:01:251#!/usr/bin/env python
Tien Maif9a36f3a2020-06-19 21:28:252# Copyright 2020 The Chromium Authors. All rights reserved.
Tien Mai597a0d1f2020-02-18 16:01:253# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
Tien Mai597a0d1f2020-02-18 16:01:255"""Transform CBCM Takeout API Data (Python2)."""
6
7from __future__ import print_function
8
9import argparse
10import csv
11import json
12import sys
13
14import google_auth_httplib2
15
16from httplib2 import Http
17from google.oauth2.service_account import Credentials
18
19
20def ComputeExtensionsList(extensions_list, data):
21 """Computes list of machines that have an extension.
22
23 This sample function processes the |data| retrieved from the Takeout API and
24 calculates the list of machines that have installed each extension listed in
25 the data.
26
27 Args:
28 extensions_list: the extension list dictionary to fill.
29 data: the data fetched from the Takeout API.
30 """
31 for device in data['browsers']:
32 if 'browsers' not in device:
33 continue
34 for browser in device['browsers']:
35 if 'profiles' not in browser:
36 continue
37 for profile in browser['profiles']:
38 if 'extensions' not in profile:
39 continue
40 for extension in profile['extensions']:
41 key = extension['extensionId']
42 if 'version' in extension:
43 key = key + ' @ ' + extension['version']
44 if key not in extensions_list:
45 current_extension = {
Tien Maif9a36f3a2020-06-19 21:28:2546 'name': extension.get('name', ''),
47 'permissions': extension.get('permissions', ''),
48 'installed': set(),
49 'disabled': set(),
50 'forced': set()
Tien Mai597a0d1f2020-02-18 16:01:2551 }
52 else:
53 current_extension = extensions_list[key]
54
55 machine_name = device['machineName']
56 current_extension['installed'].add(machine_name)
Tien Maif9a36f3a2020-06-19 21:28:2557 if extension.get('installType', '') == 'ADMIN':
Tien Mai597a0d1f2020-02-18 16:01:2558 current_extension['forced'].add(machine_name)
Tien Maif9a36f3a2020-06-19 21:28:2559 if extension.get('disabled', False):
Tien Mai597a0d1f2020-02-18 16:01:2560 current_extension['disabled'].add(machine_name)
61
62 extensions_list[key] = current_extension
63
Tien Maif9a36f3a2020-06-19 21:28:2564
Tien Mai9b1a46b82020-03-16 21:54:1065def ToUtf8(data):
66 """Ensures all the values in |data| are encoded as UTF-8.
67
68 Expects |data| to be a list of dict objects.
69
70 Args:
71 data: the data to be converted to UTF-8.
72
73 Yields:
74 A list of dict objects whose values have been encoded as UTF-8.
75 """
76 for entry in data:
77 for prop, value in entry.iteritems():
78 entry[prop] = unicode(value).encode('utf-8')
79 yield entry
Tien Mai597a0d1f2020-02-18 16:01:2580
Tien Maif9a36f3a2020-06-19 21:28:2581
Tien Mai597a0d1f2020-02-18 16:01:2582def DictToList(data, key_name='id'):
83 """Converts a dict into a list.
84
85 The value of each member of |data| must also be a dict. The original key for
86 the value will be inlined into the value, under the |key_name| key.
87
88 Args:
89 data: a dict where every value is a dict
90 key_name: the name given to the key that is inlined into the dict's values
91
92 Yields:
93 The values from |data|, with each value's key inlined into the value.
94 """
95 assert isinstance(data, dict), '|data| must be a dict'
96 for key, value in data.items():
97 assert isinstance(value, dict), '|value| must contain dict items'
98 value[key_name] = key
99 yield value
100
101
Tien Mai8bcfbfc2020-06-30 16:18:16102def Flatten(data, all_columns):
Tien Mai597a0d1f2020-02-18 16:01:25103 """Flattens lists inside |data|, one level deep.
104
105 This function will flatten each dictionary key in |data| into a single row
106 so that it can be written to a CSV file.
107
108 Args:
109 data: the data to be flattened.
Tien Mai8bcfbfc2020-06-30 16:18:16110 all_columns: set of all columns that are found in the result (this will be
111 filled by the function).
Tien Mai597a0d1f2020-02-18 16:01:25112
113 Yields:
114 A list of dict objects whose lists or sets have been flattened.
115 """
Tien Mai8bcfbfc2020-06-30 16:18:16116 SEPARATOR = ', '
117
118 # Max length of a cell in Excel is technically 32767 characters but if we get
119 # too close to this limit Excel seems to create weird results when we open
120 # the CSV file. To protect against this, give a little more buffer to the max
121 # characters.
122 MAX_CELL_LENGTH = 32700
123
Tien Mai597a0d1f2020-02-18 16:01:25124 for item in data:
Tien Mai8bcfbfc2020-06-30 16:18:16125 added_item = {}
Tien Mai597a0d1f2020-02-18 16:01:25126 for prop, value in item.items():
Tien Mai8bcfbfc2020-06-30 16:18:16127 # Non-container properties can be added directly.
128 if not isinstance(value, (list, set)):
129 added_item[prop] = value
130 continue
131
132 # Otherwise join the container together into a single cell.
133 num_prop = 'num_' + prop
134 added_item[num_prop] = len(value)
135
136 # For long lists, the cell contents may go over MAX_CELL_LENGTH, so
137 # split the list into chunks that will fit into MAX_CELL_LENGTH.
138 flat_list = SEPARATOR.join(sorted(value))
139 overflow_prop_index = 0
140 while True:
141 current_column = prop
142 if overflow_prop_index:
143 current_column = prop + '_' + str(overflow_prop_index)
144
145 flat_list_len = len(flat_list)
146 if flat_list_len > MAX_CELL_LENGTH:
147 last_separator = flat_list.rfind(SEPARATOR, 0,
148 MAX_CELL_LENGTH - flat_list_len)
149 if last_separator != -1:
150 added_item[current_column] = flat_list[0:last_separator]
151 flat_list = flat_list[last_separator + 2:]
152 overflow_prop_index = overflow_prop_index + 1
153 continue
154
155 # Fall-through case where no more splitting is possible, this is the
156 # lass cell to add for this list.
157 added_item[current_column] = flat_list
158 break
Tien Mai597a0d1f2020-02-18 16:01:25159
160 assert isinstance(
Tien Mai8bcfbfc2020-06-30 16:18:16161 added_item[prop],
Tien Mai597a0d1f2020-02-18 16:01:25162 (int, bool, str, unicode)), ('unexpected type for item: %s' %
Tien Mai8bcfbfc2020-06-30 16:18:16163 type(added_item[prop]).__name__)
Tien Mai597a0d1f2020-02-18 16:01:25164
Tien Mai8bcfbfc2020-06-30 16:18:16165 all_columns.update(added_item.keys())
166 yield added_item
Tien Mai597a0d1f2020-02-18 16:01:25167
168
169def ExtensionListAsCsv(extensions_list, csv_filename, sort_column='name'):
170 """Saves an extensions list to a CSV file.
171
172 Args:
173 extensions_list: an extensions list as returned by ComputeExtensionsList
174 csv_filename: the name of the CSV file to save
175 sort_column: the name of the column by which to sort the data
176 """
Tien Mai8bcfbfc2020-06-30 16:18:16177 all_columns = set()
178 flattened_list = [
179 x for x in ToUtf8(Flatten(DictToList(extensions_list), all_columns))
180 ]
Tien Mai597a0d1f2020-02-18 16:01:25181 desired_column_order = [
182 'id', 'name', 'num_permissions', 'num_installed', 'num_disabled',
183 'num_forced', 'permissions', 'installed', 'disabled', 'forced'
184 ]
185
186 # Order the columns as desired. Columns other than those in
187 # |desired_column_order| will be in an unspecified order after these columns.
Tien Mai8bcfbfc2020-06-30 16:18:16188 ordered_fieldnames = []
189 for c in desired_column_order:
190 matching_columns = []
191 for f in all_columns:
192 if f == c or f.startswith(c):
193 matching_columns.append(f)
194 ordered_fieldnames.extend(sorted(matching_columns))
Tien Mai597a0d1f2020-02-18 16:01:25195
196 ordered_fieldnames.extend(
197 [x for x in desired_column_order if x not in ordered_fieldnames])
198 with open(csv_filename, mode='w') as csv_file:
199 writer = csv.DictWriter(csv_file, fieldnames=ordered_fieldnames)
200 writer.writeheader()
201 for row in sorted(flattened_list, key=lambda ext: ext[sort_column]):
202 writer.writerow(row)
203
204
205def main(args):
206 # Load the json format key that you downloaded from the Google API
207 # Console when you created your service account. For p12 keys, use the
208 # from_p12_keyfile method of ServiceAccountCredentials and specify the
209 # service account email address, p12 keyfile, and scopes.
210 service_credentials = Credentials.from_service_account_file(
211 args.service_account_key_path,
212 scopes=[
Tien Mai41367aa2020-03-12 14:24:24213 'https://www.googleapis.com/auth/admin.directory.device.chromebrowsers.readonly'
Tien Mai597a0d1f2020-02-18 16:01:25214 ],
215 subject=args.admin_email)
216
217 try:
218 http = google_auth_httplib2.AuthorizedHttp(service_credentials, http=Http())
219 extensions_list = {}
220 base_request_url = 'https://admin.googleapis.com/admin/directory/v1.1beta1/customer/my_customer/devices/chromebrowsers'
221 request_parameters = ''
222 browsers_processed = 0
223 while True:
224 print('Making request to server ...')
Tien Maif9a36f3a2020-06-19 21:28:25225 retrycount = 0
226 while retrycount < 5:
227 data = json.loads(
228 http.request(base_request_url + '?' + request_parameters, 'GET')[1])
229
230 if 'browsers' not in data:
231 print('Response error, retrying...')
232 time.sleep(3)
233 retrycount += 1
234 else:
235 break
Tien Mai597a0d1f2020-02-18 16:01:25236
237 browsers_in_data = len(data['browsers'])
238 print('Request returned %s results, analyzing ...' % (browsers_in_data))
239 ComputeExtensionsList(extensions_list, data)
240 browsers_processed += browsers_in_data
241
242 if 'nextPageToken' not in data or not data['nextPageToken']:
243 break
244
245 print('%s browsers processed.' % (browsers_processed))
246
247 if (args.max_browsers_to_process is not None and
248 args.max_browsers_to_process <= browsers_processed):
249 print('Stopping at %s browsers processed.' % (browsers_processed))
250 break
251
252 request_parameters = ('pageToken={}').format(data['nextPageToken'])
253 finally:
254 print('Analyze results ...')
255 ExtensionListAsCsv(extensions_list, args.extension_list_csv)
256 print("Results written to '%s'" % (args.extension_list_csv))
257
258
259if __name__ == '__main__':
260 parser = argparse.ArgumentParser(description='CBCM Extension Analyzer')
261 parser.add_argument(
262 '-k',
263 '--service_account_key_path',
264 metavar='FILENAME',
265 required=True,
266 help='The service account key file used to make API requests.')
267 parser.add_argument(
268 '-a',
269 '--admin_email',
270 required=True,
271 help='The admin user used to make the API requests.')
272 parser.add_argument(
273 '-x',
274 '--extension_list_csv',
275 metavar='FILENAME',
276 default='./extension_list.csv',
277 help='Generate an extension list to the specified CSV '
278 'file')
279 parser.add_argument(
280 '-m',
281 '--max_browsers_to_process',
282 type=int,
283 help='Maximum number of browsers to process. (Must be > 0).')
284 args = parser.parse_args()
285
286 if (args.max_browsers_to_process is not None and
287 args.max_browsers_to_process <= 0):
288 print('max_browsers_to_process must be > 0.')
289 parser.print_help()
290 sys.exit(1)
291
292 main(args)