From 931285ff85842ab07a0ef2ff9db808181ea3c5e4 Mon Sep 17 00:00:00 2001 From: aribray <45905583+aribray@users.noreply.github.com> Date: Mon, 14 Nov 2022 16:26:37 -0600 Subject: [PATCH 1/5] feat: add `reference_file_schema_uri` to LoadJobConfig, ExternalConfig (#1399) * feat: add 'reference_file_schema_uri' to LoadJobConfig and ExternalConfig --- google/cloud/bigquery/external_config.py | 14 ++ google/cloud/bigquery/job/load.py | 21 +++ testing/constraints-3.7.txt | 2 +- tests/system/test_client.py | 203 +++++++++++++++++++++++ tests/unit/job/test_base.py | 5 +- tests/unit/job/test_load.py | 12 ++ tests/unit/test_external_config.py | 6 + 7 files changed, 258 insertions(+), 5 deletions(-) diff --git a/google/cloud/bigquery/external_config.py b/google/cloud/bigquery/external_config.py index 640b2d16b..bd60e4ef1 100644 --- a/google/cloud/bigquery/external_config.py +++ b/google/cloud/bigquery/external_config.py @@ -756,6 +756,20 @@ def hive_partitioning(self, value): prop = value.to_api_repr() if value is not None else None self._properties["hivePartitioningOptions"] = prop + @property + def reference_file_schema_uri(self): + """Optional[str]: + When creating an external table, the user can provide a reference file with the + table schema. This is enabled for the following formats: + + AVRO, PARQUET, ORC + """ + return self._properties.get("referenceFileSchemaUri") + + @reference_file_schema_uri.setter + def reference_file_schema_uri(self, value): + self._properties["referenceFileSchemaUri"] = value + @property def ignore_unknown_values(self): """bool: If :data:`True`, extra values that are not represented in the diff --git a/google/cloud/bigquery/job/load.py b/google/cloud/bigquery/job/load.py index e4b44395e..5c7f26841 100644 --- a/google/cloud/bigquery/job/load.py +++ b/google/cloud/bigquery/job/load.py @@ -379,6 +379,20 @@ def range_partitioning(self, value): ) self._set_sub_prop("rangePartitioning", resource) + @property + def reference_file_schema_uri(self): + """Optional[str]: + When creating an external table, the user can provide a reference file with the + table schema. This is enabled for the following formats: + + AVRO, PARQUET, ORC + """ + return self._get_sub_prop("referenceFileSchemaUri") + + @reference_file_schema_uri.setter + def reference_file_schema_uri(self, value): + return self._set_sub_prop("referenceFileSchemaUri", value) + @property def schema(self): """Optional[Sequence[Union[ \ @@ -651,6 +665,13 @@ def quote_character(self): """ return self._configuration.quote_character + @property + def reference_file_schema_uri(self): + """See: + attr:`google.cloud.bigquery.job.LoadJobConfig.reference_file_schema_uri`. + """ + return self._configuration.reference_file_schema_uri + @property def skip_leading_rows(self): """See diff --git a/testing/constraints-3.7.txt b/testing/constraints-3.7.txt index 57928714f..2c5b169db 100644 --- a/testing/constraints-3.7.txt +++ b/testing/constraints-3.7.txt @@ -25,4 +25,4 @@ python-dateutil==2.7.3 requests==2.21.0 Shapely==1.6.4.post2 six==1.13.0 -tqdm==4.7.4 +tqdm==4.7.4 \ No newline at end of file diff --git a/tests/system/test_client.py b/tests/system/test_client.py index c99ee1c72..152bb8144 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -97,6 +97,20 @@ ), ] +SOURCE_URIS_AVRO = [ + "gs://cloud-samples-data/bigquery/federated-formats-reference-file-schema/a-twitter.avro", + "gs://cloud-samples-data/bigquery/federated-formats-reference-file-schema/b-twitter.avro", + "gs://cloud-samples-data/bigquery/federated-formats-reference-file-schema/c-twitter.avro", +] +SOURCE_URIS_PARQUET = [ + "gs://cloud-samples-data/bigquery/federated-formats-reference-file-schema/a-twitter.parquet", + "gs://cloud-samples-data/bigquery/federated-formats-reference-file-schema/b-twitter.parquet", + "gs://cloud-samples-data/bigquery/federated-formats-reference-file-schema/c-twitter.parquet", +] +REFERENCE_FILE_SCHEMA_URI_AVRO = "gs://cloud-samples-data/bigquery/federated-formats-reference-file-schema/a-twitter.avro" +REFERENCE_FILE_SCHEMA_URI_PARQUET = "gs://cloud-samples-data/bigquery/federated-formats-reference-file-schema/a-twitter.parquet" + + # The VPC-SC team maintains a mirror of the GCS bucket used for code # samples. The public bucket crosses the configured security boundary. # See: https://github.com/googleapis/google-cloud-python/issues/8550 @@ -1052,6 +1066,195 @@ def test_load_table_from_file_w_explicit_location(self): table_ref, "gs://{}/letters-us.csv".format(bucket_name), location="US" ).result() + def test_create_external_table_with_reference_file_schema_uri_avro(self): + client = Config.CLIENT + dataset_id = _make_dataset_id("external_reference_file_avro") + self.temp_dataset(dataset_id) + dataset_ref = bigquery.DatasetReference(client.project, dataset_id) + table_id = "test_ref_file_avro" + table_ref = bigquery.TableReference(dataset_ref=dataset_ref, table_id=table_id) + + expected_schema = [ + bigquery.SchemaField("username", "STRING", mode="NULLABLE"), + bigquery.SchemaField("tweet", "STRING", mode="NULLABLE"), + bigquery.SchemaField("timestamp", "STRING", mode="NULLABLE"), + bigquery.SchemaField("likes", "INTEGER", mode="NULLABLE"), + ] + + # By default, the table should have the c-twitter schema because it is lexicographically last + # in the `SOURCE_URIs` list: + # a-twitter schema: (username, tweet, timestamp, likes) + # b-twitter schema: (username, tweet, timestamp) + # c-twitter schema: (username, tweet) + + # Because `referenceFileSchemaUri` is set as a-twitter, the table will have a-twitter schema + + # Create external data configuration + external_config = bigquery.ExternalConfig(bigquery.ExternalSourceFormat.AVRO) + external_config.source_uris = SOURCE_URIS_AVRO + external_config.reference_file_schema_uri = REFERENCE_FILE_SCHEMA_URI_AVRO + + table = bigquery.Table(table_ref) + table.external_data_configuration = external_config + + table = client.create_table(table) + + # Get table created by the create_table API call + generated_table = client.get_table(table_ref) + + self.assertEqual(generated_table.schema, expected_schema) + self.assertEqual( + generated_table.external_data_configuration._properties[ + "referenceFileSchemaUri" + ], + REFERENCE_FILE_SCHEMA_URI_AVRO, + ) + + # Clean up test + self.to_delete.insert(0, generated_table) + + def test_load_table_from_uri_with_reference_file_schema_uri_avro(self): + dataset_id = _make_dataset_id("test_reference_file_avro") + self.temp_dataset(dataset_id) + client = Config.CLIENT + dataset_ref = bigquery.DatasetReference(client.project, dataset_id) + table_id = "test_ref_file_avro" + table_ref = bigquery.TableReference(dataset_ref=dataset_ref, table_id=table_id) + + expected_schema = [ + bigquery.SchemaField("username", "STRING", mode="NULLABLE"), + bigquery.SchemaField("tweet", "STRING", mode="NULLABLE"), + bigquery.SchemaField("timestamp", "STRING", mode="NULLABLE"), + bigquery.SchemaField("likes", "INTEGER", mode="NULLABLE"), + ] + + # By default, the table should have the c-twitter schema because it is lexicographically last + # in the `SOURCE_URIS` list: + # a-twitter schema: (username, tweet, timestamp, likes) + # b-twitter schema: (username, tweet, timestamp) + # c-twitter schema: (username, tweet) + + # Because `referenceFileSchemaUri` is set as a-twitter, the table will have a-twitter schema + + # Create load job configuration + load_job_config = bigquery.LoadJobConfig( + source_format=bigquery.SourceFormat.AVRO + ) + load_job_config.reference_file_schema_uri = REFERENCE_FILE_SCHEMA_URI_AVRO + + load_job = client.load_table_from_uri( + source_uris=SOURCE_URIS_AVRO, + destination=table_ref, + job_config=load_job_config, + ) + # Wait for load job to complete + result = load_job.result() + + # Get table created by the load job + generated_table = client.get_table(table_ref) + self.assertEqual(generated_table.schema, expected_schema) + self.assertEqual( + result._properties["configuration"]["load"]["referenceFileSchemaUri"], + REFERENCE_FILE_SCHEMA_URI_AVRO, + ) + + # Clean up test + self.to_delete.insert(0, generated_table) + + def test_create_external_table_with_reference_file_schema_uri_parquet(self): + client = Config.CLIENT + dataset_id = _make_dataset_id("external_table_ref_file_parquet") + self.temp_dataset(dataset_id) + dataset_ref = bigquery.DatasetReference(client.project, dataset_id) + table_id = "test_ref_file_parquet" + table_ref = bigquery.TableReference(dataset_ref=dataset_ref, table_id=table_id) + + expected_schema = [ + bigquery.SchemaField("username", "STRING", mode="NULLABLE"), + bigquery.SchemaField("tweet", "STRING", mode="NULLABLE"), + bigquery.SchemaField("timestamp", "STRING", mode="NULLABLE"), + bigquery.SchemaField("likes", "INTEGER", mode="NULLABLE"), + ] + + # By default, the table should have the c-twitter schema because it is lexicographically last + # in the `SOURCE_URIS` list: + # a-twitter schema: (username, tweet, timestamp, likes) + # b-twitter schema: (username, tweet, timestamp) + # c-twitter schema: (username, tweet) + + # Because `referenceFileSchemaUri` is set as a-twitter, the table will have a-twitter schema + + # Create external data configuration + external_config = bigquery.ExternalConfig(bigquery.ExternalSourceFormat.PARQUET) + external_config.source_uris = SOURCE_URIS_PARQUET + external_config.reference_file_schema_uri = REFERENCE_FILE_SCHEMA_URI_PARQUET + + table = bigquery.Table(table_ref) + table.external_data_configuration = external_config + + table = client.create_table(table) + + # Get table created by the create_table API call + generated_table = client.get_table(table_ref) + self.assertEqual(generated_table.schema, expected_schema) + self.assertEqual( + generated_table.external_data_configuration._properties[ + "referenceFileSchemaUri" + ], + REFERENCE_FILE_SCHEMA_URI_PARQUET, + ) + + # Clean up test + self.to_delete.insert(0, generated_table) + + def test_load_table_from_uri_with_reference_file_schema_uri_parquet(self): + dataset_id = _make_dataset_id("test_reference_file_parquet") + self.temp_dataset(dataset_id) + client = Config.CLIENT + dataset_ref = bigquery.DatasetReference(client.project, dataset_id) + table_id = "test_ref_file_parquet" + table_ref = bigquery.TableReference(dataset_ref=dataset_ref, table_id=table_id) + + expected_schema = [ + bigquery.SchemaField("username", "STRING", mode="NULLABLE"), + bigquery.SchemaField("tweet", "STRING", mode="NULLABLE"), + bigquery.SchemaField("timestamp", "STRING", mode="NULLABLE"), + bigquery.SchemaField("likes", "INTEGER", mode="NULLABLE"), + ] + + # By default, the table should have the c-twitter schema because it is lexicographically last + # in the `SOURCE_URIS` list: + # a-twitter schema: (username, tweet, timestamp, likes) + # b-twitter schema: (username, tweet, timestamp) + # c-twitter schema: (username, tweet) + + # Because `referenceFileSchemaUri` is set as a-twitter, the table will have a-twitter schema + + # Create load job configuration + load_job_config = bigquery.LoadJobConfig( + source_format=bigquery.SourceFormat.PARQUET + ) + load_job_config.reference_file_schema_uri = REFERENCE_FILE_SCHEMA_URI_PARQUET + + load_job = client.load_table_from_uri( + source_uris=SOURCE_URIS_PARQUET, + destination=table_ref, + job_config=load_job_config, + ) + # Wait for load job to complete + result = load_job.result() + + # Get table created by the load job + generated_table = client.get_table(table_ref) + self.assertEqual(generated_table.schema, expected_schema) + self.assertEqual( + result._properties["configuration"]["load"]["referenceFileSchemaUri"], + REFERENCE_FILE_SCHEMA_URI_PARQUET, + ) + + # Clean up test + self.to_delete.insert(0, generated_table) + def _write_csv_to_storage(self, bucket_name, blob_name, header_row, data_rows): from google.cloud._testing import _NamedTemporaryFile diff --git a/tests/unit/job/test_base.py b/tests/unit/job/test_base.py index f0525c22a..ed0dc731b 100644 --- a/tests/unit/job/test_base.py +++ b/tests/unit/job/test_base.py @@ -943,7 +943,6 @@ def test_result_default_wo_state(self): conn = make_connection( _make_retriable_exception(), begun_job_resource, - _make_retriable_exception(), done_job_resource, ) client = _make_client(project=self.PROJECT, connection=conn) @@ -963,9 +962,7 @@ def test_result_default_wo_state(self): query_params={"location": "US"}, timeout=None, ) - conn.api_request.assert_has_calls( - [begin_call, begin_call, reload_call, reload_call] - ) + conn.api_request.assert_has_calls([begin_call, begin_call, reload_call]) def test_result_w_retry_wo_state(self): begun_job_resource = _make_job_resource( diff --git a/tests/unit/job/test_load.py b/tests/unit/job/test_load.py index cf2096b8b..143e1da59 100644 --- a/tests/unit/job/test_load.py +++ b/tests/unit/job/test_load.py @@ -37,6 +37,7 @@ def _setUpConstants(self): self.INPUT_BYTES = 12345 self.OUTPUT_BYTES = 23456 self.OUTPUT_ROWS = 345 + self.REFERENCE_FILE_SCHEMA_URI = "gs://path/to/reference" def _make_resource(self, started=False, ended=False): resource = super(TestLoadJob, self)._make_resource(started, ended) @@ -47,6 +48,7 @@ def _make_resource(self, started=False, ended=False): "datasetId": self.DS_ID, "tableId": self.TABLE_ID, } + config["referenceFileSchemaUri"] = self.REFERENCE_FILE_SCHEMA_URI if ended: resource["status"] = {"state": "DONE"} @@ -136,6 +138,12 @@ def _verifyResourceProperties(self, job, resource): self.assertEqual(str(job.skip_leading_rows), config["skipLeadingRows"]) else: self.assertIsNone(job.skip_leading_rows) + if "referenceFileSchemaUri" in config: + self.assertEqual( + job.reference_file_schema_uri, config["referenceFileSchemaUri"] + ) + else: + self.assertIsNone(job.reference_file_schema_uri) if "destinationEncryptionConfiguration" in config: self.assertIsNotNone(job.destination_encryption_configuration) @@ -186,6 +194,7 @@ def test_ctor(self): self.assertIsNone(job.use_avro_logical_types) self.assertIsNone(job.clustering_fields) self.assertIsNone(job.schema_update_options) + self.assertIsNone(job.reference_file_schema_uri) def test_ctor_w_config(self): from google.cloud.bigquery.schema import SchemaField @@ -461,6 +470,7 @@ def test_begin_w_bound_client(self): "datasetId": self.DS_ID, "tableId": self.TABLE_ID, }, + "referenceFileSchemaUri": self.REFERENCE_FILE_SCHEMA_URI, } }, }, @@ -503,6 +513,7 @@ def test_begin_w_autodetect(self): "datasetId": self.DS_ID, "tableId": self.TABLE_ID, }, + "referenceFileSchemaUri": self.REFERENCE_FILE_SCHEMA_URI, "autodetect": True, } }, @@ -585,6 +596,7 @@ def test_begin_w_alternate_client(self): config.use_avro_logical_types = True config.write_disposition = WriteDisposition.WRITE_TRUNCATE config.schema_update_options = [SchemaUpdateOption.ALLOW_FIELD_ADDITION] + config.reference_file_schema_uri = "gs://path/to/reference" with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: diff --git a/tests/unit/test_external_config.py b/tests/unit/test_external_config.py index 3ef61d738..72fe2761a 100644 --- a/tests/unit/test_external_config.py +++ b/tests/unit/test_external_config.py @@ -99,6 +99,12 @@ def test_connection_id(self): ec.connection_id = "path/to/connection" self.assertEqual(ec.connection_id, "path/to/connection") + def test_reference_file_schema_uri(self): + ec = external_config.ExternalConfig("") + self.assertIsNone(ec.reference_file_schema_uri) + ec.reference_file_schema_uri = "path/to/reference" + self.assertEqual(ec.reference_file_schema_uri, "path/to/reference") + def test_schema_None(self): ec = external_config.ExternalConfig("") ec.schema = None From 207aa506ab634bdb13256fa5bd8745ec9de23290 Mon Sep 17 00:00:00 2001 From: aribray <45905583+aribray@users.noreply.github.com> Date: Tue, 15 Nov 2022 14:57:17 -0600 Subject: [PATCH 2/5] feat: add default value expression (#1408) * feat: Adds default_value_expression to SchemaField --- google/cloud/bigquery/schema.py | 38 +++++++++++++++++++- google/cloud/bigquery/table.py | 2 +- tests/system/test_client.py | 62 +++++++++++++++++++++++++++++++++ tests/unit/test_client.py | 40 ++++++++++++++------- tests/unit/test_schema.py | 9 +++-- 5 files changed, 135 insertions(+), 16 deletions(-) diff --git a/google/cloud/bigquery/schema.py b/google/cloud/bigquery/schema.py index 1df78424d..ebf34e4cd 100644 --- a/google/cloud/bigquery/schema.py +++ b/google/cloud/bigquery/schema.py @@ -93,6 +93,30 @@ class SchemaField(object): Scale (digits after decimal) of fields with NUMERIC or BIGNUMERIC type. max_length: Maximum length of fields with STRING or BYTES type. + + default_value_expression: str, Optional + Used to specify the default value of a field using a SQL expression. It can only be set for + top level fields (columns). + + You can use a struct or array expression to specify default value for the entire struct or + array. The valid SQL expressions are: + + - Literals for all data types, including STRUCT and ARRAY. + + - The following functions: + + `CURRENT_TIMESTAMP` + `CURRENT_TIME` + `CURRENT_DATE` + `CURRENT_DATETIME` + `GENERATE_UUID` + `RAND` + `SESSION_USER` + `ST_GEOPOINT` + + - Struct or array composed with the above allowed functions, for example: + + "[CURRENT_DATE(), DATE '2020-01-01'"] """ def __init__( @@ -100,6 +124,7 @@ def __init__( name: str, field_type: str, mode: str = "NULLABLE", + default_value_expression: str = None, description: Union[str, _DefaultSentinel] = _DEFAULT_VALUE, fields: Iterable["SchemaField"] = (), policy_tags: Union["PolicyTagList", None, _DefaultSentinel] = _DEFAULT_VALUE, @@ -115,6 +140,8 @@ def __init__( self._properties["mode"] = mode.upper() if description is not _DEFAULT_VALUE: self._properties["description"] = description + if default_value_expression is not None: + self._properties["defaultValueExpression"] = default_value_expression if precision is not _DEFAULT_VALUE: self._properties["precision"] = precision if scale is not _DEFAULT_VALUE: @@ -154,6 +181,8 @@ def from_api_repr(cls, api_repr: dict) -> "SchemaField": fields = api_repr.get("fields", ()) policy_tags = api_repr.get("policyTags", _DEFAULT_VALUE) + default_value_expression = api_repr.get("defaultValueExpression", None) + if policy_tags is not None and policy_tags is not _DEFAULT_VALUE: policy_tags = PolicyTagList.from_api_repr(policy_tags) @@ -161,6 +190,7 @@ def from_api_repr(cls, api_repr: dict) -> "SchemaField": field_type=field_type, fields=[cls.from_api_repr(f) for f in fields], mode=mode.upper(), + default_value_expression=default_value_expression, description=description, name=api_repr["name"], policy_tags=policy_tags, @@ -197,6 +227,11 @@ def is_nullable(self): """bool: whether 'mode' is 'nullable'.""" return self.mode == "NULLABLE" + @property + def default_value_expression(self): + """Optional[str] default value of a field, using an SQL expression""" + return self._properties.get("defaultValueExpression") + @property def description(self): """Optional[str]: description for the field.""" @@ -260,7 +295,7 @@ def _key(self): field_type = self.field_type.upper() if self.field_type is not None else None # Type can temporarily be set to None if the code needs a SchemaField instance, - # but has npt determined the exact type of the field yet. + # but has not determined the exact type of the field yet. if field_type is not None: if field_type == "STRING" or field_type == "BYTES": if self.max_length is not None: @@ -281,6 +316,7 @@ def _key(self): field_type, # Mode is always str, if not given it defaults to a str value self.mode.upper(), # pytype: disable=attribute-error + self.default_value_expression, self.description, self._fields, policy_tags, diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 4fd77dd21..96888d62d 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -1421,7 +1421,7 @@ def get(self, key: str, default: Any = None) -> Any: >>> Row(('a', 'b'), {'x': 0, 'y': 1}).get('z') None - The default value can be overrided with the ``default`` parameter. + The default value can be overridden with the ``default`` parameter. >>> Row(('a', 'b'), {'x': 0, 'y': 1}).get('z', '') '' diff --git a/tests/system/test_client.py b/tests/system/test_client.py index 152bb8144..25edc18e1 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -441,6 +441,68 @@ def test_create_table_with_real_custom_policy(self): list(table.schema[1].policy_tags.names), [child_policy_tag.name] ) + def test_create_table_with_default_value_expression(self): + dataset = self.temp_dataset( + _make_dataset_id("create_table_with_default_value_expression") + ) + + table_id = "test_table" + timestamp_field_name = "timestamp_field_with_default_value_expression" + + string_default_val_expression = "'FOO'" + timestamp_default_val_expression = "CURRENT_TIMESTAMP" + + schema = [ + bigquery.SchemaField( + "username", + "STRING", + default_value_expression=string_default_val_expression, + ), + bigquery.SchemaField( + timestamp_field_name, + "TIMESTAMP", + default_value_expression=timestamp_default_val_expression, + ), + ] + table_arg = Table(dataset.table(table_id), schema=schema) + self.assertFalse(_table_exists(table_arg)) + + table = helpers.retry_403(Config.CLIENT.create_table)(table_arg) + self.to_delete.insert(0, table) + + self.assertTrue(_table_exists(table)) + + # Fetch the created table and its metadata to verify that the default + # value expression is assigned to fields + remote_table = Config.CLIENT.get_table(table) + remote_schema = remote_table.schema + self.assertEqual(remote_schema, schema) + + for field in remote_schema: + if field.name == string_default_val_expression: + self.assertEqual("'FOO'", field.default_value_expression) + if field.name == timestamp_default_val_expression: + self.assertEqual("CURRENT_TIMESTAMP", field.default_value_expression) + + # Insert rows into the created table to verify default values are populated + # when value is not provided + NOW_SECONDS = 1448911495.484366 + NOW = datetime.datetime.utcfromtimestamp(NOW_SECONDS).replace(tzinfo=UTC) + + # Rows to insert. Row #1 will have default `TIMESTAMP` defaultValueExpression CURRENT_TIME + # Row #2 will have default `STRING` defaultValueExpression "'FOO" + ROWS = [{"username": "john_doe"}, {timestamp_field_name: NOW}] + + errors = Config.CLIENT.insert_rows(table, ROWS) + self.assertEqual(len(errors), 0) + + # Get list of inserted rows + row_1, row_2 = [row for row in list(Config.CLIENT.list_rows(table))] + + # Assert that row values are populated with default value expression + self.assertIsInstance(row_1.get(timestamp_field_name), datetime.datetime) + self.assertEqual("FOO", row_2.get("username")) + def test_create_table_w_time_partitioning_w_clustering_fields(self): from google.cloud.bigquery.table import TimePartitioning from google.cloud.bigquery.table import TimePartitioningType diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 30bab8fa9..f4552cda2 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -8395,9 +8395,19 @@ def test_schema_from_json_with_file_path(self): ]""" expected = [ - SchemaField("qtr", "STRING", "REQUIRED", "quarter"), - SchemaField("rep", "STRING", "NULLABLE", "sales representative"), - SchemaField("sales", "FLOAT", "NULLABLE", "total sales"), + SchemaField("qtr", "STRING", "REQUIRED", description="quarter"), + SchemaField( + "rep", + "STRING", + "NULLABLE", + description="sales representative", + ), + SchemaField( + "sales", + "FLOAT", + "NULLABLE", + description="total sales", + ), ] client = self._make_client() @@ -8441,9 +8451,11 @@ def test_schema_from_json_with_file_object(self): ]""" expected = [ - SchemaField("qtr", "STRING", "REQUIRED", "quarter"), - SchemaField("rep", "STRING", "NULLABLE", "sales representative"), - SchemaField("sales", "FLOAT", "NULLABLE", "total sales"), + SchemaField("qtr", "STRING", "REQUIRED", description="quarter"), + SchemaField( + "rep", "STRING", "NULLABLE", description="sales representative" + ), + SchemaField("sales", "FLOAT", "NULLABLE", description="total sales"), ] client = self._make_client() @@ -8477,9 +8489,11 @@ def test_schema_to_json_with_file_path(self): ] schema_list = [ - SchemaField("qtr", "STRING", "REQUIRED", "quarter"), - SchemaField("rep", "STRING", "NULLABLE", "sales representative"), - SchemaField("sales", "FLOAT", "NULLABLE", "total sales"), + SchemaField("qtr", "STRING", "REQUIRED", description="quarter"), + SchemaField( + "rep", "STRING", "NULLABLE", description="sales representative" + ), + SchemaField("sales", "FLOAT", "NULLABLE", description="total sales"), ] client = self._make_client() @@ -8521,9 +8535,11 @@ def test_schema_to_json_with_file_object(self): ] schema_list = [ - SchemaField("qtr", "STRING", "REQUIRED", "quarter"), - SchemaField("rep", "STRING", "NULLABLE", "sales representative"), - SchemaField("sales", "FLOAT", "NULLABLE", "total sales"), + SchemaField("qtr", "STRING", "REQUIRED", description="quarter"), + SchemaField( + "rep", "STRING", "NULLABLE", description="sales representative" + ), + SchemaField("sales", "FLOAT", "NULLABLE", description="total sales"), ] fake_file = io.StringIO() diff --git a/tests/unit/test_schema.py b/tests/unit/test_schema.py index 6a547cb13..c6593e1b4 100644 --- a/tests/unit/test_schema.py +++ b/tests/unit/test_schema.py @@ -45,8 +45,10 @@ def test_constructor_defaults(self): self.assertIsNone(field.description) self.assertEqual(field.fields, ()) self.assertIsNone(field.policy_tags) + self.assertIsNone(field.default_value_expression) def test_constructor_explicit(self): + FIELD_DEFAULT_VALUE_EXPRESSION = "This is the default value for this field" field = self._make_one( "test", "STRING", @@ -58,10 +60,12 @@ def test_constructor_explicit(self): "projects/f/locations/g/taxonomies/h/policyTags/i", ) ), + default_value_expression=FIELD_DEFAULT_VALUE_EXPRESSION, ) self.assertEqual(field.name, "test") self.assertEqual(field.field_type, "STRING") self.assertEqual(field.mode, "REQUIRED") + self.assertEqual(field.default_value_expression, FIELD_DEFAULT_VALUE_EXPRESSION) self.assertEqual(field.description, "Testing") self.assertEqual(field.fields, ()) self.assertEqual( @@ -182,6 +186,7 @@ def test_from_api_repr_defaults(self): self.assertEqual(field.field_type, "RECORD") self.assertEqual(field.mode, "NULLABLE") self.assertEqual(len(field.fields), 0) + self.assertEqual(field.default_value_expression, None) # Keys not present in API representation shouldn't be included in # _properties. @@ -527,12 +532,12 @@ def test___hash__not_equals(self): def test___repr__(self): field1 = self._make_one("field1", "STRING") - expected = "SchemaField('field1', 'STRING', 'NULLABLE', None, (), None)" + expected = "SchemaField('field1', 'STRING', 'NULLABLE', None, None, (), None)" self.assertEqual(repr(field1), expected) def test___repr__type_not_set(self): field1 = self._make_one("field1", field_type=None) - expected = "SchemaField('field1', None, 'NULLABLE', None, (), None)" + expected = "SchemaField('field1', None, 'NULLABLE', None, None, (), None)" self.assertEqual(repr(field1), expected) def test___repr__evaluable_no_policy_tags(self): From eb49873176dee478617eb50472d44703abca53b5 Mon Sep 17 00:00:00 2001 From: Walt Askew Date: Wed, 16 Nov 2022 05:44:06 -0800 Subject: [PATCH 3/5] feat: Add More Specific Type Annotations for Row Dictionaries (#1295) The keys must be strings as they represent column names. Update type annotations to reflect this. Co-authored-by: aribray <45905583+aribray@users.noreply.github.com> --- google/cloud/bigquery/client.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 1200d78f9..b72505a15 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -3349,10 +3349,10 @@ def query( def insert_rows( self, table: Union[Table, TableReference, str], - rows: Union[Iterable[Tuple], Iterable[Dict]], + rows: Union[Iterable[Tuple], Iterable[Mapping[str, Any]]], selected_fields: Sequence[SchemaField] = None, **kwargs, - ) -> Sequence[dict]: + ) -> Sequence[Dict[str, Any]]: """Insert rows into a table via the streaming API. See @@ -3470,7 +3470,7 @@ def insert_rows_from_dataframe( def insert_rows_json( self, table: Union[Table, TableReference, TableListItem, str], - json_rows: Sequence[Dict], + json_rows: Sequence[Mapping[str, Any]], row_ids: Union[ Iterable[Optional[str]], AutoRowIDs, None ] = AutoRowIDs.GENERATE_UUID, From cbab5acf971e67ca74ad9df1f62716903d234a1c Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Thu, 17 Nov 2022 15:08:37 -0500 Subject: [PATCH 4/5] chore(setup.py): remove python upper bound (#1413) --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index c8bf640c2..5fc694c6f 100644 --- a/setup.py +++ b/setup.py @@ -124,7 +124,7 @@ namespace_packages=namespaces, install_requires=dependencies, extras_require=extras, - python_requires=">=3.7, <3.11", + python_requires=">=3.7", include_package_data=True, zip_safe=False, ) From 4e6cc67409ceb58eec44c9ff1fbeeaf5de341fd8 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Fri, 18 Nov 2022 10:56:42 -0600 Subject: [PATCH 5/5] chore(main): release 3.4.0 (#1407) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 9 +++++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 869d063e5..294e5b42f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,15 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.4.0](https://github.com/googleapis/python-bigquery/compare/v3.3.6...v3.4.0) (2022-11-17) + + +### Features + +* Add `reference_file_schema_uri` to LoadJobConfig, ExternalConfig ([#1399](https://github.com/googleapis/python-bigquery/issues/1399)) ([931285f](https://github.com/googleapis/python-bigquery/commit/931285ff85842ab07a0ef2ff9db808181ea3c5e4)) +* Add default value expression ([#1408](https://github.com/googleapis/python-bigquery/issues/1408)) ([207aa50](https://github.com/googleapis/python-bigquery/commit/207aa506ab634bdb13256fa5bd8745ec9de23290)) +* Add More Specific Type Annotations for Row Dictionaries ([#1295](https://github.com/googleapis/python-bigquery/issues/1295)) ([eb49873](https://github.com/googleapis/python-bigquery/commit/eb49873176dee478617eb50472d44703abca53b5)) + ## [3.3.6](https://github.com/googleapis/python-bigquery/compare/v3.3.4...v3.3.6) (2022-11-02) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 43360a201..6b822f0c1 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.3.6" +__version__ = "3.4.0"