@@ -88,22 +88,27 @@ def collect_benchmark_result(
88
88
millis_files = sorted (path .rglob ("*.slotmillis" ))
89
89
bq_seconds_files = sorted (path .rglob ("*.bq_exec_time_seconds" ))
90
90
local_seconds_files = sorted (path .rglob ("*.local_exec_time_seconds" ))
91
+ query_char_count_files = sorted (path .rglob ("*.query_char_count" ))
92
+
91
93
error_files = sorted (path .rglob ("*.error" ))
92
94
93
95
if not (
94
96
len (bytes_files )
95
97
== len (millis_files )
96
98
== len (local_seconds_files )
97
99
== len (bq_seconds_files )
100
+ == len (query_char_count_files )
98
101
):
99
102
raise ValueError (
100
- "Mismatch in the number of report files for bytes, millis, and seconds ."
103
+ "Mismatch in the number of report files for bytes, millis, seconds and query char count ."
101
104
)
102
105
103
106
for idx in range (len (bytes_files )):
104
107
bytes_file = bytes_files [idx ]
105
108
millis_file = millis_files [idx ]
106
109
bq_seconds_file = bq_seconds_files [idx ]
110
+ query_char_count_file = query_char_count_files [idx ]
111
+
107
112
filename = bytes_file .relative_to (path ).with_suffix ("" )
108
113
109
114
if filename != millis_file .relative_to (path ).with_suffix (
@@ -136,19 +141,25 @@ def collect_benchmark_result(
136
141
lines = file .read ().splitlines ()
137
142
bq_seconds = sum (float (line ) for line in lines ) / iterations
138
143
144
+ with open (query_char_count_file , "r" ) as file :
145
+ lines = file .read ().splitlines ()
146
+ query_char_count = sum (int (line ) for line in lines ) / iterations
147
+
139
148
results_dict [str (filename )] = [
140
149
query_count ,
141
150
total_bytes ,
142
151
total_slot_millis ,
143
152
local_seconds ,
144
153
bq_seconds ,
154
+ query_char_count ,
145
155
]
146
156
finally :
147
157
for files_to_remove in (
148
158
path .rglob ("*.bytesprocessed" ),
149
159
path .rglob ("*.slotmillis" ),
150
160
path .rglob ("*.local_exec_time_seconds" ),
151
161
path .rglob ("*.bq_exec_time_seconds" ),
162
+ path .rglob ("*.query_char_count" ),
152
163
path .rglob ("*.error" ),
153
164
):
154
165
for log_file in files_to_remove :
@@ -160,6 +171,7 @@ def collect_benchmark_result(
160
171
"Slot_Millis" ,
161
172
"Local_Execution_Time_Sec" ,
162
173
"BigQuery_Execution_Time_Sec" ,
174
+ "Query_Char_Count" ,
163
175
]
164
176
165
177
benchmark_metrics = pd .DataFrame .from_dict (
@@ -182,15 +194,19 @@ def collect_benchmark_result(
182
194
)
183
195
print (
184
196
f"{ index } - query count: { row ['Query_Count' ]} ,"
197
+ f" query char count: { row ['Query_Char_Count' ]} ," ,
185
198
f" bytes processed sum: { row ['Bytes_Processed' ]} ,"
186
199
f" slot millis sum: { row ['Slot_Millis' ]} ,"
187
200
f" local execution time: { formatted_local_exec_time } seconds,"
188
- f" bigquery execution time: { round (row ['BigQuery_Execution_Time_Sec' ], 1 )} seconds"
201
+ f" bigquery execution time: { round (row ['BigQuery_Execution_Time_Sec' ], 1 )} seconds" ,
189
202
)
190
203
191
204
geometric_mean_queries = geometric_mean_excluding_zeros (
192
205
benchmark_metrics ["Query_Count" ]
193
206
)
207
+ geometric_mean_query_char_count = geometric_mean_excluding_zeros (
208
+ benchmark_metrics ["Query_Char_Count" ]
209
+ )
194
210
geometric_mean_bytes = geometric_mean_excluding_zeros (
195
211
benchmark_metrics ["Bytes_Processed" ]
196
212
)
@@ -206,6 +222,7 @@ def collect_benchmark_result(
206
222
207
223
print (
208
224
f"---Geometric mean of queries: { geometric_mean_queries } , "
225
+ f"Geometric mean of queries char counts: { geometric_mean_query_char_count } , "
209
226
f"Geometric mean of bytes processed: { geometric_mean_bytes } , "
210
227
f"Geometric mean of slot millis: { geometric_mean_slot_millis } , "
211
228
f"Geometric mean of local execution time: { geometric_mean_local_seconds } seconds, "
0 commit comments