Skip to content

Commit e86a4da

Browse files
authored
chore: add a new metric (#1345)
* chore: add a new metric * update print
1 parent 6408f84 commit e86a4da

File tree

2 files changed

+36
-8
lines changed

2 files changed

+36
-8
lines changed

bigframes/session/metrics.py

+17-6
Original file line numberDiff line numberDiff line change
@@ -30,28 +30,31 @@ class ExecutionMetrics:
3030
slot_millis: int = 0
3131
bytes_processed: int = 0
3232
execution_secs: float = 0
33+
query_char_count: int = 0
3334

3435
def count_job_stats(self, query_job: bq_job.QueryJob):
3536
stats = get_performance_stats(query_job)
3637
if stats is not None:
37-
bytes_processed, slot_millis, execution_secs = stats
38+
bytes_processed, slot_millis, execution_secs, query_char_count = stats
3839
self.execution_count += 1
3940
self.bytes_processed += bytes_processed
4041
self.slot_millis += slot_millis
4142
self.execution_secs += execution_secs
43+
self.query_char_count += query_char_count
4244
if LOGGING_NAME_ENV_VAR in os.environ:
4345
# when running notebooks via pytest nbmake
44-
write_stats_to_disk(bytes_processed, slot_millis, execution_secs)
46+
write_stats_to_disk(
47+
bytes_processed, slot_millis, execution_secs, query_char_count
48+
)
4549

4650

4751
def get_performance_stats(
4852
query_job: bigquery.QueryJob,
49-
) -> Optional[Tuple[int, int, float]]:
53+
) -> Optional[Tuple[int, int, float, int]]:
5054
"""Parse the query job for performance stats.
5155
5256
Return None if the stats do not reflect real work done in bigquery.
5357
"""
54-
5558
if (
5659
query_job.configuration.dry_run
5760
or query_job.created is None
@@ -68,12 +71,13 @@ def get_performance_stats(
6871
return None # filter out mocks
6972

7073
execution_secs = (query_job.ended - query_job.created).total_seconds()
74+
query_char_count = len(query_job.query)
7175

72-
return bytes_processed, slot_millis, execution_secs
76+
return bytes_processed, slot_millis, execution_secs, query_char_count
7377

7478

7579
def write_stats_to_disk(
76-
bytes_processed: int, slot_millis: int, exec_seconds: Optional[float]
80+
bytes_processed: int, slot_millis: int, exec_seconds: float, query_char_count: int
7781
):
7882
"""For pytest runs only, log information about the query job
7983
to a file in order to create a performance report.
@@ -103,3 +107,10 @@ def write_stats_to_disk(
103107
)
104108
with open(exec_time_file, "a") as f:
105109
f.write(str(exec_seconds) + "\n")
110+
111+
# store length of query
112+
query_char_count_file = os.path.join(
113+
current_directory, test_name + ".query_char_count"
114+
)
115+
with open(query_char_count_file, "a") as f:
116+
f.write(str(query_char_count) + "\n")

scripts/run_and_publish_benchmark.py

+19-2
Original file line numberDiff line numberDiff line change
@@ -88,22 +88,27 @@ def collect_benchmark_result(
8888
millis_files = sorted(path.rglob("*.slotmillis"))
8989
bq_seconds_files = sorted(path.rglob("*.bq_exec_time_seconds"))
9090
local_seconds_files = sorted(path.rglob("*.local_exec_time_seconds"))
91+
query_char_count_files = sorted(path.rglob("*.query_char_count"))
92+
9193
error_files = sorted(path.rglob("*.error"))
9294

9395
if not (
9496
len(bytes_files)
9597
== len(millis_files)
9698
== len(local_seconds_files)
9799
== len(bq_seconds_files)
100+
== len(query_char_count_files)
98101
):
99102
raise ValueError(
100-
"Mismatch in the number of report files for bytes, millis, and seconds."
103+
"Mismatch in the number of report files for bytes, millis, seconds and query char count."
101104
)
102105

103106
for idx in range(len(bytes_files)):
104107
bytes_file = bytes_files[idx]
105108
millis_file = millis_files[idx]
106109
bq_seconds_file = bq_seconds_files[idx]
110+
query_char_count_file = query_char_count_files[idx]
111+
107112
filename = bytes_file.relative_to(path).with_suffix("")
108113

109114
if filename != millis_file.relative_to(path).with_suffix(
@@ -136,19 +141,25 @@ def collect_benchmark_result(
136141
lines = file.read().splitlines()
137142
bq_seconds = sum(float(line) for line in lines) / iterations
138143

144+
with open(query_char_count_file, "r") as file:
145+
lines = file.read().splitlines()
146+
query_char_count = sum(int(line) for line in lines) / iterations
147+
139148
results_dict[str(filename)] = [
140149
query_count,
141150
total_bytes,
142151
total_slot_millis,
143152
local_seconds,
144153
bq_seconds,
154+
query_char_count,
145155
]
146156
finally:
147157
for files_to_remove in (
148158
path.rglob("*.bytesprocessed"),
149159
path.rglob("*.slotmillis"),
150160
path.rglob("*.local_exec_time_seconds"),
151161
path.rglob("*.bq_exec_time_seconds"),
162+
path.rglob("*.query_char_count"),
152163
path.rglob("*.error"),
153164
):
154165
for log_file in files_to_remove:
@@ -160,6 +171,7 @@ def collect_benchmark_result(
160171
"Slot_Millis",
161172
"Local_Execution_Time_Sec",
162173
"BigQuery_Execution_Time_Sec",
174+
"Query_Char_Count",
163175
]
164176

165177
benchmark_metrics = pd.DataFrame.from_dict(
@@ -182,15 +194,19 @@ def collect_benchmark_result(
182194
)
183195
print(
184196
f"{index} - query count: {row['Query_Count']},"
197+
f" query char count: {row['Query_Char_Count']},",
185198
f" bytes processed sum: {row['Bytes_Processed']},"
186199
f" slot millis sum: {row['Slot_Millis']},"
187200
f" local execution time: {formatted_local_exec_time} seconds,"
188-
f" bigquery execution time: {round(row['BigQuery_Execution_Time_Sec'], 1)} seconds"
201+
f" bigquery execution time: {round(row['BigQuery_Execution_Time_Sec'], 1)} seconds",
189202
)
190203

191204
geometric_mean_queries = geometric_mean_excluding_zeros(
192205
benchmark_metrics["Query_Count"]
193206
)
207+
geometric_mean_query_char_count = geometric_mean_excluding_zeros(
208+
benchmark_metrics["Query_Char_Count"]
209+
)
194210
geometric_mean_bytes = geometric_mean_excluding_zeros(
195211
benchmark_metrics["Bytes_Processed"]
196212
)
@@ -206,6 +222,7 @@ def collect_benchmark_result(
206222

207223
print(
208224
f"---Geometric mean of queries: {geometric_mean_queries}, "
225+
f"Geometric mean of queries char counts: {geometric_mean_query_char_count}, "
209226
f"Geometric mean of bytes processed: {geometric_mean_bytes}, "
210227
f"Geometric mean of slot millis: {geometric_mean_slot_millis}, "
211228
f"Geometric mean of local execution time: {geometric_mean_local_seconds} seconds, "

0 commit comments

Comments
 (0)