Blame - docs/enterprise/extension_query_py2.py - chromium/src.git

blob: 597a5647aa10811054b5baf1253d202b8c33d64c [file] [log] [blame]

Tien Mai	597a0d1f	2020-02-18 16:01:25	[diff] [blame]	1	#!/usr/bin/env python
Tien Mai	f9a36f3a	2020-06-19 21:28:25	[diff] [blame]	2	# Copyright 2020 The Chromium Authors. All rights reserved.
Tien Mai	597a0d1f	2020-02-18 16:01:25	[diff] [blame]	3	# Use of this source code is governed by a BSD-style license that can be
				4	# found in the LICENSE file.
Tien Mai	597a0d1f	2020-02-18 16:01:25	[diff] [blame]	5	"""Transform CBCM Takeout API Data (Python2)."""
				6
				7	from __future__ import print_function
				8
				9	import argparse
				10	import csv
				11	import json
				12	import sys
				13
				14	import google_auth_httplib2
				15
				16	from httplib2 import Http
				17	from google.oauth2.service_account import Credentials
				18
				19
				20	def ComputeExtensionsList(extensions_list, data):
				21	"""Computes list of machines that have an extension.
				22
				23	This sample function processes the \|data\| retrieved from the Takeout API and
				24	calculates the list of machines that have installed each extension listed in
				25	the data.
				26
				27	Args:
				28	extensions_list: the extension list dictionary to fill.
				29	data: the data fetched from the Takeout API.
				30	"""
				31	for device in data['browsers']:
				32	if 'browsers' not in device:
				33	continue
				34	for browser in device['browsers']:
				35	if 'profiles' not in browser:
				36	continue
				37	for profile in browser['profiles']:
				38	if 'extensions' not in profile:
				39	continue
				40	for extension in profile['extensions']:
				41	key = extension['extensionId']
				42	if 'version' in extension:
				43	key = key + ' @ ' + extension['version']
				44	if key not in extensions_list:
				45	current_extension = {
Tien Mai	f9a36f3a	2020-06-19 21:28:25	[diff] [blame]	46	'name': extension.get('name', ''),
				47	'permissions': extension.get('permissions', ''),
				48	'installed': set(),
				49	'disabled': set(),
				50	'forced': set()
Tien Mai	597a0d1f	2020-02-18 16:01:25	[diff] [blame]	51	}
				52	else:
				53	current_extension = extensions_list[key]
				54
				55	machine_name = device['machineName']
				56	current_extension['installed'].add(machine_name)
Tien Mai	f9a36f3a	2020-06-19 21:28:25	[diff] [blame]	57	if extension.get('installType', '') == 'ADMIN':
Tien Mai	597a0d1f	2020-02-18 16:01:25	[diff] [blame]	58	current_extension['forced'].add(machine_name)
Tien Mai	f9a36f3a	2020-06-19 21:28:25	[diff] [blame]	59	if extension.get('disabled', False):
Tien Mai	597a0d1f	2020-02-18 16:01:25	[diff] [blame]	60	current_extension['disabled'].add(machine_name)
				61
				62	extensions_list[key] = current_extension
				63
Tien Mai	f9a36f3a	2020-06-19 21:28:25	[diff] [blame]	64
Tien Mai	9b1a46b8	2020-03-16 21:54:10	[diff] [blame]	65	def ToUtf8(data):
				66	"""Ensures all the values in \|data\| are encoded as UTF-8.
				67
				68	Expects \|data\| to be a list of dict objects.
				69
				70	Args:
				71	data: the data to be converted to UTF-8.
				72
				73	Yields:
				74	A list of dict objects whose values have been encoded as UTF-8.
				75	"""
				76	for entry in data:
				77	for prop, value in entry.iteritems():
				78	entry[prop] = unicode(value).encode('utf-8')
				79	yield entry
Tien Mai	597a0d1f	2020-02-18 16:01:25	[diff] [blame]	80
Tien Mai	f9a36f3a	2020-06-19 21:28:25	[diff] [blame]	81
Tien Mai	597a0d1f	2020-02-18 16:01:25	[diff] [blame]	82	def DictToList(data, key_name='id'):
				83	"""Converts a dict into a list.
				84
				85	The value of each member of \|data\| must also be a dict. The original key for
				86	the value will be inlined into the value, under the \|key_name\| key.
				87
				88	Args:
				89	data: a dict where every value is a dict
				90	key_name: the name given to the key that is inlined into the dict's values
				91
				92	Yields:
				93	The values from \|data\|, with each value's key inlined into the value.
				94	"""
				95	assert isinstance(data, dict), '\|data\| must be a dict'
				96	for key, value in data.items():
				97	assert isinstance(value, dict), '\|value\| must contain dict items'
				98	value[key_name] = key
				99	yield value
				100
				101
Tien Mai	8bcfbfc	2020-06-30 16:18:16	[diff] [blame]	102	def Flatten(data, all_columns):
Tien Mai	597a0d1f	2020-02-18 16:01:25	[diff] [blame]	103	"""Flattens lists inside \|data\|, one level deep.
				104
				105	This function will flatten each dictionary key in \|data\| into a single row
				106	so that it can be written to a CSV file.
				107
				108	Args:
				109	data: the data to be flattened.
Tien Mai	8bcfbfc	2020-06-30 16:18:16	[diff] [blame]	110	all_columns: set of all columns that are found in the result (this will be
				111	filled by the function).
Tien Mai	597a0d1f	2020-02-18 16:01:25	[diff] [blame]	112
				113	Yields:
				114	A list of dict objects whose lists or sets have been flattened.
				115	"""
Tien Mai	8bcfbfc	2020-06-30 16:18:16	[diff] [blame]	116	SEPARATOR = ', '
				117
				118	# Max length of a cell in Excel is technically 32767 characters but if we get
				119	# too close to this limit Excel seems to create weird results when we open
				120	# the CSV file. To protect against this, give a little more buffer to the max
				121	# characters.
				122	MAX_CELL_LENGTH = 32700
				123
Tien Mai	597a0d1f	2020-02-18 16:01:25	[diff] [blame]	124	for item in data:
Tien Mai	8bcfbfc	2020-06-30 16:18:16	[diff] [blame]	125	added_item = {}
Tien Mai	597a0d1f	2020-02-18 16:01:25	[diff] [blame]	126	for prop, value in item.items():
Tien Mai	8bcfbfc	2020-06-30 16:18:16	[diff] [blame]	127	# Non-container properties can be added directly.
				128	if not isinstance(value, (list, set)):
				129	added_item[prop] = value
				130	continue
				131
				132	# Otherwise join the container together into a single cell.
				133	num_prop = 'num_' + prop
				134	added_item[num_prop] = len(value)
				135
				136	# For long lists, the cell contents may go over MAX_CELL_LENGTH, so
				137	# split the list into chunks that will fit into MAX_CELL_LENGTH.
				138	flat_list = SEPARATOR.join(sorted(value))
				139	overflow_prop_index = 0
				140	while True:
				141	current_column = prop
				142	if overflow_prop_index:
				143	current_column = prop + '_' + str(overflow_prop_index)
				144
				145	flat_list_len = len(flat_list)
				146	if flat_list_len > MAX_CELL_LENGTH:
				147	last_separator = flat_list.rfind(SEPARATOR, 0,
				148	MAX_CELL_LENGTH - flat_list_len)
				149	if last_separator != -1:
				150	added_item[current_column] = flat_list[0:last_separator]
				151	flat_list = flat_list[last_separator + 2:]
				152	overflow_prop_index = overflow_prop_index + 1
				153	continue
				154
				155	# Fall-through case where no more splitting is possible, this is the
				156	# lass cell to add for this list.
				157	added_item[current_column] = flat_list
				158	break
Tien Mai	597a0d1f	2020-02-18 16:01:25	[diff] [blame]	159
				160	assert isinstance(
Tien Mai	8bcfbfc	2020-06-30 16:18:16	[diff] [blame]	161	added_item[prop],
Tien Mai	597a0d1f	2020-02-18 16:01:25	[diff] [blame]	162	(int, bool, str, unicode)), ('unexpected type for item: %s' %
Tien Mai	8bcfbfc	2020-06-30 16:18:16	[diff] [blame]	163	type(added_item[prop]).__name__)
Tien Mai	597a0d1f	2020-02-18 16:01:25	[diff] [blame]	164
Tien Mai	8bcfbfc	2020-06-30 16:18:16	[diff] [blame]	165	all_columns.update(added_item.keys())
				166	yield added_item
Tien Mai	597a0d1f	2020-02-18 16:01:25	[diff] [blame]	167
				168
				169	def ExtensionListAsCsv(extensions_list, csv_filename, sort_column='name'):
				170	"""Saves an extensions list to a CSV file.
				171
				172	Args:
				173	extensions_list: an extensions list as returned by ComputeExtensionsList
				174	csv_filename: the name of the CSV file to save
				175	sort_column: the name of the column by which to sort the data
				176	"""
Tien Mai	8bcfbfc	2020-06-30 16:18:16	[diff] [blame]	177	all_columns = set()
				178	flattened_list = [
				179	x for x in ToUtf8(Flatten(DictToList(extensions_list), all_columns))
				180	]
Tien Mai	597a0d1f	2020-02-18 16:01:25	[diff] [blame]	181	desired_column_order = [
				182	'id', 'name', 'num_permissions', 'num_installed', 'num_disabled',
				183	'num_forced', 'permissions', 'installed', 'disabled', 'forced'
				184	]
				185
				186	# Order the columns as desired. Columns other than those in
				187	# \|desired_column_order\| will be in an unspecified order after these columns.
Tien Mai	8bcfbfc	2020-06-30 16:18:16	[diff] [blame]	188	ordered_fieldnames = []
				189	for c in desired_column_order:
				190	matching_columns = []
				191	for f in all_columns:
				192	if f == c or f.startswith(c):
				193	matching_columns.append(f)
				194	ordered_fieldnames.extend(sorted(matching_columns))
Tien Mai	597a0d1f	2020-02-18 16:01:25	[diff] [blame]	195
				196	ordered_fieldnames.extend(
				197	[x for x in desired_column_order if x not in ordered_fieldnames])
				198	with open(csv_filename, mode='w') as csv_file:
				199	writer = csv.DictWriter(csv_file, fieldnames=ordered_fieldnames)
				200	writer.writeheader()
				201	for row in sorted(flattened_list, key=lambda ext: ext[sort_column]):
				202	writer.writerow(row)
				203
				204
				205	def main(args):
				206	# Load the json format key that you downloaded from the Google API
				207	# Console when you created your service account. For p12 keys, use the
				208	# from_p12_keyfile method of ServiceAccountCredentials and specify the
				209	# service account email address, p12 keyfile, and scopes.
				210	service_credentials = Credentials.from_service_account_file(
				211	args.service_account_key_path,
				212	scopes=[
Tien Mai	41367aa	2020-03-12 14:24:24	[diff] [blame]	213	'https://www.googleapis.com/auth/admin.directory.device.chromebrowsers.readonly'
Tien Mai	597a0d1f	2020-02-18 16:01:25	[diff] [blame]	214	],
				215	subject=args.admin_email)
				216
				217	try:
				218	http = google_auth_httplib2.AuthorizedHttp(service_credentials, http=Http())
				219	extensions_list = {}
				220	base_request_url = 'https://admin.googleapis.com/admin/directory/v1.1beta1/customer/my_customer/devices/chromebrowsers'
				221	request_parameters = ''
				222	browsers_processed = 0
				223	while True:
				224	print('Making request to server ...')
Tien Mai	f9a36f3a	2020-06-19 21:28:25	[diff] [blame]	225	retrycount = 0
				226	while retrycount < 5:
				227	data = json.loads(
				228	http.request(base_request_url + '?' + request_parameters, 'GET')[1])
				229
				230	if 'browsers' not in data:
				231	print('Response error, retrying...')
				232	time.sleep(3)
				233	retrycount += 1
				234	else:
				235	break
Tien Mai	597a0d1f	2020-02-18 16:01:25	[diff] [blame]	236
				237	browsers_in_data = len(data['browsers'])
				238	print('Request returned %s results, analyzing ...' % (browsers_in_data))
				239	ComputeExtensionsList(extensions_list, data)
				240	browsers_processed += browsers_in_data
				241
				242	if 'nextPageToken' not in data or not data['nextPageToken']:
				243	break
				244
				245	print('%s browsers processed.' % (browsers_processed))
				246
				247	if (args.max_browsers_to_process is not None and
				248	args.max_browsers_to_process <= browsers_processed):
				249	print('Stopping at %s browsers processed.' % (browsers_processed))
				250	break
				251
				252	request_parameters = ('pageToken={}').format(data['nextPageToken'])
				253	finally:
				254	print('Analyze results ...')
				255	ExtensionListAsCsv(extensions_list, args.extension_list_csv)
				256	print("Results written to '%s'" % (args.extension_list_csv))
				257
				258
				259	if __name__ == '__main__':
				260	parser = argparse.ArgumentParser(description='CBCM Extension Analyzer')
				261	parser.add_argument(
				262	'-k',
				263	'--service_account_key_path',
				264	metavar='FILENAME',
				265	required=True,
				266	help='The service account key file used to make API requests.')
				267	parser.add_argument(
				268	'-a',
				269	'--admin_email',
				270	required=True,
				271	help='The admin user used to make the API requests.')
				272	parser.add_argument(
				273	'-x',
				274	'--extension_list_csv',
				275	metavar='FILENAME',
				276	default='./extension_list.csv',
				277	help='Generate an extension list to the specified CSV '
				278	'file')
				279	parser.add_argument(
				280	'-m',
				281	'--max_browsers_to_process',
				282	type=int,
				283	help='Maximum number of browsers to process. (Must be > 0).')
				284	args = parser.parse_args()
				285
				286	if (args.max_browsers_to_process is not None and
				287	args.max_browsers_to_process <= 0):
				288	print('max_browsers_to_process must be > 0.')
				289	parser.print_help()
				290	sys.exit(1)
				291
				292	main(args)