Skip to content

Commit d6fbb5b

Browse files
authored
chore: add experimental blob.image_normalize function (#1388)
1 parent 64b5ff1 commit d6fbb5b

File tree

2 files changed

+104
-0
lines changed

2 files changed

+104
-0
lines changed

bigframes/blob/_functions.py

+48
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,54 @@ def image_resize_func(
174174
)
175175

176176

177+
def image_normalize_func(
178+
src_obj_ref_rt: str, dst_obj_ref_rt: str, alpha: float, beta: float, norm_type: str
179+
) -> str:
180+
import json
181+
182+
import cv2 as cv # type: ignore
183+
import numpy as np
184+
import requests
185+
186+
norm_type_mapping = {
187+
"inf": cv.NORM_INF,
188+
"l1": cv.NORM_L1,
189+
"l2": cv.NORM_L2,
190+
"minmax": cv.NORM_MINMAX,
191+
}
192+
193+
src_obj_ref_rt_json = json.loads(src_obj_ref_rt)
194+
dst_obj_ref_rt_json = json.loads(dst_obj_ref_rt)
195+
196+
src_url = src_obj_ref_rt_json["access_urls"]["read_url"]
197+
dst_url = dst_obj_ref_rt_json["access_urls"]["write_url"]
198+
199+
response = requests.get(src_url)
200+
bts = response.content
201+
202+
nparr = np.frombuffer(bts, np.uint8)
203+
img = cv.imdecode(nparr, cv.IMREAD_UNCHANGED)
204+
img_normalized = cv.normalize(
205+
img, None, alpha=alpha, beta=beta, norm_type=norm_type_mapping[norm_type]
206+
)
207+
bts = cv.imencode(".jpeg", img_normalized)[1].tobytes()
208+
209+
requests.put(
210+
url=dst_url,
211+
data=bts,
212+
headers={
213+
"Content-Type": "image/jpeg",
214+
},
215+
)
216+
217+
return dst_obj_ref_rt
218+
219+
220+
image_normalize_def = FunctionDef(
221+
image_normalize_func, ["opencv-python", "numpy", "requests"]
222+
)
223+
224+
177225
# Extracts all text from a PDF url
178226
def pdf_extract_func(src_obj_ref_rt: str) -> str:
179227
import io

bigframes/operations/blob.py

+56
Original file line numberDiff line numberDiff line change
@@ -384,6 +384,62 @@ def image_resize(
384384

385385
return dst
386386

387+
def image_normalize(
388+
self,
389+
*,
390+
alpha: float = 1.0,
391+
beta: float = 0.0,
392+
norm_type: str = "l2",
393+
dst: Union[str, bigframes.series.Series],
394+
connection: Optional[str] = None,
395+
) -> bigframes.series.Series:
396+
"""Normalize images.
397+
398+
.. note::
399+
BigFrames Blob is still under experiments. It may not work and subject to change in the future.
400+
401+
Args:
402+
alpha (float, default 1.0): Norm value to normalize to or the lower range boundary in case of the range normalization.
403+
beta (float, default 0.0): Upper range boundary in case of the range normalization; it is not used for the norm normalization.
404+
norm_type (str, default "l2"): Normalization type. Accepted values are "inf", "l1", "l2" and "minmax".
405+
dst (str or bigframes.series.Series): Destination GCS folder str or blob series.
406+
connection (str or None, default None): BQ connection used for function internet transactions, and the output blob if "dst" is str. If None, uses default connection of the session.
407+
408+
Returns:
409+
BigFrames Blob Series
410+
"""
411+
import bigframes.blob._functions as blob_func
412+
413+
connection = self._resolve_connection(connection)
414+
415+
if isinstance(dst, str):
416+
dst = os.path.join(dst, "")
417+
src_uri = bigframes.series.Series(self._block).struct.explode()["uri"]
418+
# Replace src folder with dst folder, keep the file names.
419+
dst_uri = src_uri.str.replace(r"^.*\/(.*)$", rf"{dst}\1", regex=True)
420+
dst = cast(
421+
bigframes.series.Series, dst_uri.str.to_blob(connection=connection)
422+
)
423+
424+
image_normalize_udf = blob_func.TransformFunction(
425+
blob_func.image_normalize_def,
426+
session=self._block.session,
427+
connection=connection,
428+
).udf()
429+
430+
src_rt = self._get_runtime_json_str(mode="R")
431+
dst_rt = dst.blob._get_runtime_json_str(mode="RW")
432+
433+
df = src_rt.to_frame().join(dst_rt.to_frame(), how="outer")
434+
df["alpha"] = alpha
435+
df["beta"] = beta
436+
df["norm_type"] = norm_type
437+
438+
res = df.apply(image_normalize_udf, axis=1)
439+
res.cache() # to execute the udf
440+
441+
return dst
442+
387443
def pdf_extract(
388444
self, *, connection: Optional[str] = None
389445
) -> bigframes.series.Series:

0 commit comments

Comments
 (0)