Skip to content

Commit 3c3e14c

Browse files
arwas11sycaiGenesis929
authored
feat: add GeoSeries.from_xy() (#1364)
* feat: add GeoSeries.from_xy * add from_xy test and update ibis types * update geoseries notebook with from_xy * Update docstring example * fix doctstring lint error * return GeometryDtype() for all ibis geo types * chore: support timestamp subtractions (#1346) * chore: support timestamp subtractions * Fix format * use tree rewrites to dispatch timestamp_diff operator * add TODO for more node updates * polish the code and fix typos * fix comment * add rewrites to compile_raw and compile_peek_sql * chore: add a tool to upload tpcds data to bigquery. (#1367) * chore: add a tool to upload tpcds data to bigquery. * update error type * update docstring --------- Co-authored-by: Shenyang Cai <[email protected]> Co-authored-by: Huan Chen <[email protected]>
1 parent 1970031 commit 3c3e14c

File tree

9 files changed

+199
-45
lines changed

9 files changed

+199
-45
lines changed

bigframes/core/compile/ibis_types.py

+14-1
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,8 @@
4747
ibis_dtypes.JSON,
4848
]
4949

50+
IBIS_GEO_TYPE = ibis_dtypes.GeoSpatial(geotype="geography", srid=4326, nullable=True)
51+
5052

5153
BIDIRECTIONAL_MAPPINGS: Iterable[Tuple[IbisDtype, bigframes.dtypes.Dtype]] = (
5254
(ibis_dtypes.boolean, pd.BooleanDtype()),
@@ -70,7 +72,7 @@
7072
pd.ArrowDtype(pa.decimal256(76, 38)),
7173
),
7274
(
73-
ibis_dtypes.GeoSpatial(geotype="geography", srid=4326, nullable=True),
75+
IBIS_GEO_TYPE,
7476
gpd.array.GeometryDtype(),
7577
),
7678
(ibis_dtypes.json, db_dtypes.JSONDtype()),
@@ -177,6 +179,14 @@ def cast_ibis_value(
177179
ibis_dtypes.timestamp,
178180
),
179181
ibis_dtypes.binary: (ibis_dtypes.string,),
182+
ibis_dtypes.point: (IBIS_GEO_TYPE,),
183+
ibis_dtypes.geometry: (IBIS_GEO_TYPE,),
184+
ibis_dtypes.geography: (IBIS_GEO_TYPE,),
185+
ibis_dtypes.linestring: (IBIS_GEO_TYPE,),
186+
ibis_dtypes.polygon: (IBIS_GEO_TYPE,),
187+
ibis_dtypes.multilinestring: (IBIS_GEO_TYPE,),
188+
ibis_dtypes.multipoint: (IBIS_GEO_TYPE,),
189+
ibis_dtypes.multipolygon: (IBIS_GEO_TYPE,),
180190
}
181191

182192
value = ibis_value_to_canonical_type(value)
@@ -282,6 +292,9 @@ def ibis_dtype_to_bigframes_dtype(
282292
if isinstance(ibis_dtype, ibis_dtypes.JSON):
283293
return bigframes.dtypes.JSON_DTYPE
284294

295+
if isinstance(ibis_dtype, ibis_dtypes.GeoSpatial):
296+
return gpd.array.GeometryDtype()
297+
285298
if ibis_dtype in IBIS_TO_BIGFRAMES:
286299
return IBIS_TO_BIGFRAMES[ibis_dtype]
287300
elif isinstance(ibis_dtype, ibis_dtypes.Decimal):

bigframes/core/compile/scalar_op_compiler.py

+7
Original file line numberDiff line numberDiff line change
@@ -1003,6 +1003,13 @@ def geo_area_op_impl(x: ibis_types.Value):
10031003
return typing.cast(ibis_types.GeoSpatialValue, x).area()
10041004

10051005

1006+
@scalar_op_compiler.register_binary_op(ops.geo_st_geogpoint_op, pass_op=False)
1007+
def geo_st_geogpoint_op_impl(x: ibis_types.Value, y: ibis_types.Value):
1008+
return typing.cast(ibis_types.NumericValue, x).point(
1009+
typing.cast(ibis_types.NumericValue, y)
1010+
)
1011+
1012+
10061013
# Parameterized ops
10071014
@scalar_op_compiler.register_unary_op(ops.StructFieldOp, pass_op=True)
10081015
def struct_field_op_impl(x: ibis_types.Value, op: ops.StructFieldOp):

bigframes/geopandas/geoseries.py

+9
Original file line numberDiff line numberDiff line change
@@ -66,3 +66,12 @@ def area(self, crs=None) -> bigframes.series.Series: # type: ignore
6666
raise NotImplementedError(
6767
f"GeoSeries.area is not supported. Use bigframes.bigquery.st_area(series), instead. {constants.FEEDBACK_LINK}"
6868
)
69+
70+
@classmethod
71+
def from_xy(cls, x, y, index=None, session=None, **kwargs) -> GeoSeries:
72+
# TODO: if either x or y is local and the other is remote. Use the
73+
# session from the remote object.
74+
series_x = bigframes.series.Series(x, index=index, session=session, **kwargs)
75+
series_y = bigframes.series.Series(y, index=index, session=session, **kwargs)
76+
77+
return cls(series_x._apply_binary_op(series_y, ops.geo_st_geogpoint_op))

bigframes/operations/__init__.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,12 @@
8585
SqlScalarOp,
8686
where_op,
8787
)
88-
from bigframes.operations.geo_ops import geo_area_op, geo_x_op, geo_y_op
88+
from bigframes.operations.geo_ops import (
89+
geo_area_op,
90+
geo_st_geogpoint_op,
91+
geo_x_op,
92+
geo_y_op,
93+
)
8994
from bigframes.operations.json_ops import (
9095
JSONExtract,
9196
JSONExtractArray,
@@ -337,6 +342,7 @@
337342
"geo_x_op",
338343
"geo_y_op",
339344
"geo_area_op",
345+
"geo_st_geogpoint_op",
340346
# Numpy ops mapping
341347
"NUMPY_TO_BINOP",
342348
"NUMPY_TO_OP",

bigframes/operations/geo_ops.py

+4
Original file line numberDiff line numberDiff line change
@@ -36,3 +36,7 @@
3636
dtypes.is_geo_like, dtypes.FLOAT_DTYPE, description="geo-like"
3737
),
3838
)
39+
40+
geo_st_geogpoint_op = base_ops.create_binary_op(
41+
name="geo_st_geogpoint", type_signature=op_typing.BinaryNumericGeo()
42+
)

bigframes/operations/type.py

+14
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,20 @@ def output_type(
121121
return bigframes.dtypes.coerce_to_common(left_type, right_type)
122122

123123

124+
@dataclasses.dataclass
125+
class BinaryNumericGeo(BinaryTypeSignature):
126+
"""Type signature for geo functions like from_xy that can map ints to ints."""
127+
128+
def output_type(
129+
self, left_type: ExpressionType, right_type: ExpressionType
130+
) -> ExpressionType:
131+
if (left_type is not None) and not bigframes.dtypes.is_numeric(left_type):
132+
raise TypeError(f"Type {left_type} is not numeric")
133+
if (right_type is not None) and not bigframes.dtypes.is_numeric(right_type):
134+
raise TypeError(f"Type {right_type} is not numeric")
135+
return bigframes.dtypes.GEO_DTYPE
136+
137+
124138
@dataclasses.dataclass
125139
class BinaryRealNumeric(BinaryTypeSignature):
126140
"""Type signature for real-valued functions like divide, arctan2, pow."""

notebooks/geo/geoseries.ipynb

+82-43
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@
4444
"cell_type": "markdown",
4545
"metadata": {},
4646
"source": [
47-
"### Load the Counties table from the Census Bureau US Boundaries dataset"
47+
"### 1. Load the Counties table from the Census Bureau US Boundaries dataset"
4848
]
4949
},
5050
{
@@ -56,7 +56,7 @@
5656
"name": "stderr",
5757
"output_type": "stream",
5858
"text": [
59-
"/usr/local/google/home/arwas/src1/python-bigquery-dataframes/bigframes/session/_io/bigquery/read_gbq_table.py:280: DefaultIndexWarning: Table 'bigquery-public-data.geo_us_boundaries.counties' is clustered and/or partitioned, but BigQuery DataFrames was not able to find a suitable index. To avoid this warning, set at least one of: `index_col` or `filters`.\n",
59+
"/usr/local/google/home/arwas/src/bigframes3/bigframes/session/_io/bigquery/read_gbq_table.py:280: DefaultIndexWarning: Table 'bigquery-public-data.geo_us_boundaries.counties' is clustered and/or partitioned, but BigQuery DataFrames was not able to find a suitable index. To avoid this warning, set at least one of: `index_col` or `filters`.\n",
6060
" warnings.warn(msg, category=bfe.DefaultIndexWarning)\n"
6161
]
6262
}
@@ -69,7 +69,7 @@
6969
"cell_type": "markdown",
7070
"metadata": {},
7171
"source": [
72-
"### Create a series from the int_point_geom column"
72+
"### 2. Create a series from the int_point_geom column"
7373
]
7474
},
7575
{
@@ -103,11 +103,11 @@
103103
{
104104
"data": {
105105
"text/plain": [
106-
"37 POINT (-91.19496 39.98605)\n",
107-
"406 POINT (-84.86717 33.92103)\n",
108-
"926 POINT (-82.47974 35.33641)\n",
109-
"940 POINT (-75.50298 39.09709)\n",
110-
"996 POINT (-92.56434 39.8298)\n",
106+
"171 POINT (-95.50742 42.39186)\n",
107+
"219 POINT (-105.42894 37.27755)\n",
108+
"402 POINT (-93.34905 32.10121)\n",
109+
"526 POINT (-84.60469 43.29233)\n",
110+
"677 POINT (-89.5681 37.04779)\n",
111111
"Name: int_point_geom, dtype: geometry"
112112
]
113113
},
@@ -136,11 +136,11 @@
136136
{
137137
"data": {
138138
"text/plain": [
139-
"0 POINT (-91.19496 39.98605)\n",
140-
"1 POINT (-84.86717 33.92103)\n",
141-
"2 POINT (-82.47974 35.33641)\n",
142-
"3 POINT (-75.50298 39.09709)\n",
143-
"4 POINT (-92.56434 39.8298)\n",
139+
"0 POINT (-95.50742 42.39186)\n",
140+
"1 POINT (-105.42894 37.27755)\n",
141+
"2 POINT (-93.34905 32.10121)\n",
142+
"3 POINT (-84.60469 43.29233)\n",
143+
"4 POINT (-89.5681 37.04779)\n",
144144
"dtype: geometry"
145145
]
146146
},
@@ -185,11 +185,11 @@
185185
{
186186
"data": {
187187
"text/plain": [
188-
"0 -91.194961\n",
189-
"1 -84.867169\n",
190-
"2 -82.479741\n",
191-
"3 -75.502982\n",
192-
"4 -92.56434\n",
188+
"0 -95.507421\n",
189+
"1 -105.42894\n",
190+
"2 -93.34905\n",
191+
"3 -84.60469\n",
192+
"4 -89.568097\n",
193193
"dtype: Float64"
194194
]
195195
},
@@ -217,11 +217,11 @@
217217
{
218218
"data": {
219219
"text/plain": [
220-
"0 39.986053\n",
221-
"1 33.92103\n",
222-
"2 35.336415\n",
223-
"3 39.097088\n",
224-
"4 39.829795\n",
220+
"0 42.39186\n",
221+
"1 37.277547\n",
222+
"2 32.101213\n",
223+
"3 43.292326\n",
224+
"4 37.047793\n",
225225
"dtype: Float64"
226226
]
227227
},
@@ -367,11 +367,11 @@
367367
{
368368
"data": {
369369
"text/plain": [
370-
"10 POLYGON ((-101.7778 40.34969, -101.77812 40.34...\n",
371-
"127 POLYGON ((-89.22333 44.50398, -89.22334 44.499...\n",
372-
"253 POLYGON ((-76.69446 37.07288, -76.69515 37.072...\n",
373-
"261 POLYGON ((-98.70136 44.45055, -98.70136 44.450...\n",
374-
"303 POLYGON ((-85.99565 30.28131, -85.99566 30.280...\n",
370+
"54 POLYGON ((-93.76575 45.06448, -93.76575 45.064...\n",
371+
"256 POLYGON ((-89.83723 42.68318, -89.83732 42.682...\n",
372+
"266 POLYGON ((-104.19381 39.56523, -104.19464 39.5...\n",
373+
"485 MULTIPOLYGON (((-91.05884 32.17233, -91.05891 ...\n",
374+
"765 POLYGON ((-83.61848 38.1557, -83.61861 38.1554...\n",
375375
"Name: county_geom, dtype: geometry"
376376
]
377377
},
@@ -389,7 +389,7 @@
389389
"cell_type": "markdown",
390390
"metadata": {},
391391
"source": [
392-
"### Convert the geometry collection to `bigframes.gopandas.GeoSeries`"
392+
"### 2. Convert the geometry collection to `bigframes.gopandas.GeoSeries`"
393393
]
394394
},
395395
{
@@ -400,11 +400,11 @@
400400
{
401401
"data": {
402402
"text/plain": [
403-
"0 POLYGON ((-101.7778 40.34969, -101.77812 40.34...\n",
404-
"1 POLYGON ((-89.22333 44.50398, -89.22334 44.499...\n",
405-
"2 POLYGON ((-76.69446 37.07288, -76.69515 37.072...\n",
406-
"3 POLYGON ((-98.70136 44.45055, -98.70136 44.450...\n",
407-
"4 POLYGON ((-85.99565 30.28131, -85.99566 30.280...\n",
403+
"0 POLYGON ((-93.76575 45.06448, -93.76575 45.064...\n",
404+
"1 POLYGON ((-89.83723 42.68318, -89.83732 42.682...\n",
405+
"2 POLYGON ((-104.19381 39.56523, -104.19464 39.5...\n",
406+
"3 MULTIPOLYGON (((-91.05884 32.17233, -91.05891 ...\n",
407+
"4 POLYGON ((-83.61848 38.1557, -83.61861 38.1554...\n",
408408
"dtype: geometry"
409409
]
410410
},
@@ -442,14 +442,14 @@
442442
"outputs": [
443443
{
444444
"ename": "NotImplementedError",
445-
"evalue": "GeoSeries.area is not supported. Use bigframes.bigquery.st_area(series), instead. Share your usecase with the BigQuery DataFrames team at the https://bit.ly/bigframes-feedback survey.You are currently running BigFrames version 1.34.0",
445+
"evalue": "GeoSeries.area is not supported. Use bigframes.bigquery.st_area(series), instead. Share your usecase with the BigQuery DataFrames team at the https://bit.ly/bigframes-feedback survey.You are currently running BigFrames version 1.35.0",
446446
"output_type": "error",
447447
"traceback": [
448448
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
449449
"\u001b[0;31mNotImplementedError\u001b[0m Traceback (most recent call last)",
450450
"Cell \u001b[0;32mIn[13], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mfive_geom\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43marea\u001b[49m\n",
451-
"File \u001b[0;32m~/src1/python-bigquery-dataframes/bigframes/geopandas/geoseries.py:66\u001b[0m, in \u001b[0;36mGeoSeries.area\u001b[0;34m(self, crs)\u001b[0m\n\u001b[1;32m 47\u001b[0m \u001b[38;5;129m@property\u001b[39m\n\u001b[1;32m 48\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21marea\u001b[39m(\u001b[38;5;28mself\u001b[39m, crs\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m bigframes\u001b[38;5;241m.\u001b[39mseries\u001b[38;5;241m.\u001b[39mSeries: \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n\u001b[1;32m 49\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Returns a Series containing the area of each geometry in the GeoSeries\u001b[39;00m\n\u001b[1;32m 50\u001b[0m \u001b[38;5;124;03m expressed in the units of the CRS.\u001b[39;00m\n\u001b[1;32m 51\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 64\u001b[0m \u001b[38;5;124;03m GeoSeries.area is not supported. Use bigframes.bigquery.st_area(series), insetead.\u001b[39;00m\n\u001b[1;32m 65\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m---> 66\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mNotImplementedError\u001b[39;00m(\n\u001b[1;32m 67\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mGeoSeries.area is not supported. Use bigframes.bigquery.st_area(series), instead. \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mconstants\u001b[38;5;241m.\u001b[39mFEEDBACK_LINK\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 68\u001b[0m )\n",
452-
"\u001b[0;31mNotImplementedError\u001b[0m: GeoSeries.area is not supported. Use bigframes.bigquery.st_area(series), instead. Share your usecase with the BigQuery DataFrames team at the https://bit.ly/bigframes-feedback survey.You are currently running BigFrames version 1.34.0"
451+
"File \u001b[0;32m~/src/bigframes3/bigframes/geopandas/geoseries.py:66\u001b[0m, in \u001b[0;36mGeoSeries.area\u001b[0;34m(self, crs)\u001b[0m\n\u001b[1;32m 47\u001b[0m \u001b[38;5;129m@property\u001b[39m\n\u001b[1;32m 48\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21marea\u001b[39m(\u001b[38;5;28mself\u001b[39m, crs\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m bigframes\u001b[38;5;241m.\u001b[39mseries\u001b[38;5;241m.\u001b[39mSeries: \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n\u001b[1;32m 49\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Returns a Series containing the area of each geometry in the GeoSeries\u001b[39;00m\n\u001b[1;32m 50\u001b[0m \u001b[38;5;124;03m expressed in the units of the CRS.\u001b[39;00m\n\u001b[1;32m 51\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 64\u001b[0m \u001b[38;5;124;03m GeoSeries.area is not supported. Use bigframes.bigquery.st_area(series), insetead.\u001b[39;00m\n\u001b[1;32m 65\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m---> 66\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mNotImplementedError\u001b[39;00m(\n\u001b[1;32m 67\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mGeoSeries.area is not supported. Use bigframes.bigquery.st_area(series), instead. \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mconstants\u001b[38;5;241m.\u001b[39mFEEDBACK_LINK\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 68\u001b[0m )\n",
452+
"\u001b[0;31mNotImplementedError\u001b[0m: GeoSeries.area is not supported. Use bigframes.bigquery.st_area(series), instead. Share your usecase with the BigQuery DataFrames team at the https://bit.ly/bigframes-feedback survey.You are currently running BigFrames version 1.35.0"
453453
]
454454
}
455455
],
@@ -461,7 +461,7 @@
461461
"cell_type": "markdown",
462462
"metadata": {},
463463
"source": [
464-
"## Use `bigframes.bigquery.st_area` to retirive the `area` in square meters instead. See: https://cloud.google.com/bigquery/docs/reference/standard-sql/geography_functions#st_area"
464+
"### 3. Use `bigframes.bigquery.st_area` to retirive the `area` in square meters instead. See: https://cloud.google.com/bigquery/docs/reference/standard-sql/geography_functions#st_area"
465465
]
466466
},
467467
{
@@ -481,11 +481,11 @@
481481
{
482482
"data": {
483483
"text/plain": [
484-
"0 2382382043.48891\n",
485-
"1 1977633097.26862\n",
486-
"2 939388839.499466\n",
487-
"3 3269015229.381782\n",
488-
"4 2678752241.321673\n",
484+
"0 1567505274.453911\n",
485+
"1 1511436852.079554\n",
486+
"2 4789800692.948824\n",
487+
"3 1686877416.586061\n",
488+
"4 740944862.916908\n",
489489
"dtype: Float64"
490490
]
491491
},
@@ -498,6 +498,45 @@
498498
"geom_area = bbq.st_area(five_geom)\n",
499499
"geom_area"
500500
]
501+
},
502+
{
503+
"cell_type": "markdown",
504+
"metadata": {},
505+
"source": [
506+
"## Use `bigframes.geopandas.GeoSeries.from_xy()` to create a GeoSeries of `Point` geometries. "
507+
]
508+
},
509+
{
510+
"cell_type": "markdown",
511+
"metadata": {},
512+
"source": [
513+
"### 1. Reuse the `geo_points.x` and `geo_points.y` results by passing them to `.from_xy()` "
514+
]
515+
},
516+
{
517+
"cell_type": "code",
518+
"execution_count": 16,
519+
"metadata": {},
520+
"outputs": [
521+
{
522+
"data": {
523+
"text/plain": [
524+
"0 POINT (-95.50742 42.39186)\n",
525+
"1 POINT (-105.42894 37.27755)\n",
526+
"2 POINT (-93.34905 32.10121)\n",
527+
"3 POINT (-84.60469 43.29233)\n",
528+
"4 POINT (-89.5681 37.04779)\n",
529+
"dtype: geometry"
530+
]
531+
},
532+
"execution_count": 16,
533+
"metadata": {},
534+
"output_type": "execute_result"
535+
}
536+
],
537+
"source": [
538+
"bigframes.geopandas.GeoSeries.from_xy(geo_points.x, geo_points.y)"
539+
]
501540
}
502541
],
503542
"metadata": {

tests/system/small/geopandas/test_geoseries.py

+20
Original file line numberDiff line numberDiff line change
@@ -87,3 +87,23 @@ def test_geo_area_not_supported():
8787
),
8888
):
8989
bf_series.area
90+
91+
92+
def test_geo_from_xy():
93+
x = [2.5, 5, -3.0]
94+
y = [0.5, 1, 1.5]
95+
bf_result = (
96+
bigframes.geopandas.GeoSeries.from_xy(x, y)
97+
.astype(geopandas.array.GeometryDtype())
98+
.to_pandas()
99+
)
100+
pd_result = geopandas.GeoSeries.from_xy(x, y, crs="EPSG:4326").astype(
101+
geopandas.array.GeometryDtype()
102+
)
103+
104+
pd.testing.assert_series_equal(
105+
bf_result,
106+
pd_result,
107+
check_series_type=False,
108+
check_index=False,
109+
)

0 commit comments

Comments
 (0)