Skip to content

Commit 978d4a1

Browse files
authored
feat(bigquery): json support on managedwriter/adapt pkg (#10542)
1 parent fd53d47 commit 978d4a1

File tree

8 files changed

+230
-56
lines changed

8 files changed

+230
-56
lines changed

bigquery/storage/managedwriter/adapt/protoconversion.go

+2
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ var bqTypeToFieldTypeMap = map[storagepb.TableFieldSchema_Type]descriptorpb.Fiel
6363
storagepb.TableFieldSchema_TIME: descriptorpb.FieldDescriptorProto_TYPE_INT64,
6464
storagepb.TableFieldSchema_TIMESTAMP: descriptorpb.FieldDescriptorProto_TYPE_INT64,
6565
storagepb.TableFieldSchema_RANGE: descriptorpb.FieldDescriptorProto_TYPE_MESSAGE,
66+
storagepb.TableFieldSchema_JSON: descriptorpb.FieldDescriptorProto_TYPE_STRING,
6667
}
6768

6869
var allowedRangeTypes = []storagepb.TableFieldSchema_Type{
@@ -107,6 +108,7 @@ var bqTypeToWrapperMap = map[storagepb.TableFieldSchema_Type]string{
107108
storagepb.TableFieldSchema_STRING: ".google.protobuf.StringValue",
108109
storagepb.TableFieldSchema_TIME: ".google.protobuf.Int64Value",
109110
storagepb.TableFieldSchema_TIMESTAMP: ".google.protobuf.Int64Value",
111+
storagepb.TableFieldSchema_JSON: ".google.protobuf.StringValue",
110112
}
111113

112114
// filename used by well known types proto

bigquery/storage/managedwriter/adapt/protoconversion_test.go

+66
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,65 @@ func TestSchemaToProtoConversion(t *testing.T) {
100100
},
101101
},
102102
},
103+
{
104+
description: "json type",
105+
bq: &storagepb.TableSchema{
106+
Fields: []*storagepb.TableFieldSchema{
107+
{Name: "json_required", Type: storagepb.TableFieldSchema_JSON, Mode: storagepb.TableFieldSchema_REQUIRED},
108+
{Name: "json_optional", Type: storagepb.TableFieldSchema_JSON, Mode: storagepb.TableFieldSchema_NULLABLE},
109+
}},
110+
wantProto2: &descriptorpb.DescriptorProto{
111+
Name: proto.String("root"),
112+
Field: []*descriptorpb.FieldDescriptorProto{
113+
{
114+
Name: proto.String("json_required"),
115+
Number: proto.Int32(1),
116+
Type: descriptorpb.FieldDescriptorProto_TYPE_STRING.Enum(),
117+
Label: descriptorpb.FieldDescriptorProto_LABEL_REQUIRED.Enum()},
118+
{
119+
Name: proto.String("json_optional"),
120+
Number: proto.Int32(2),
121+
Type: descriptorpb.FieldDescriptorProto_TYPE_STRING.Enum(),
122+
Label: descriptorpb.FieldDescriptorProto_LABEL_OPTIONAL.Enum(),
123+
},
124+
},
125+
},
126+
wantProto2Normalized: &descriptorpb.DescriptorProto{
127+
Name: proto.String("root"),
128+
Field: []*descriptorpb.FieldDescriptorProto{
129+
{
130+
Name: proto.String("json_required"),
131+
Number: proto.Int32(1),
132+
Type: descriptorpb.FieldDescriptorProto_TYPE_STRING.Enum(),
133+
Label: descriptorpb.FieldDescriptorProto_LABEL_REQUIRED.Enum(),
134+
},
135+
{
136+
Name: proto.String("json_optional"),
137+
Number: proto.Int32(2),
138+
Type: descriptorpb.FieldDescriptorProto_TYPE_STRING.Enum(),
139+
Label: descriptorpb.FieldDescriptorProto_LABEL_OPTIONAL.Enum(),
140+
},
141+
},
142+
},
143+
wantProto3: &descriptorpb.DescriptorProto{
144+
Name: proto.String("root"),
145+
Field: []*descriptorpb.FieldDescriptorProto{
146+
{
147+
Name: proto.String("json_required"),
148+
Number: proto.Int32(1),
149+
Type: descriptorpb.FieldDescriptorProto_TYPE_STRING.Enum(),
150+
Label: descriptorpb.FieldDescriptorProto_LABEL_OPTIONAL.Enum(),
151+
},
152+
{
153+
Name: proto.String("json_optional"),
154+
Number: proto.Int32(2),
155+
Type: descriptorpb.FieldDescriptorProto_TYPE_MESSAGE.Enum(),
156+
TypeName: proto.String(".google.protobuf.StringValue"),
157+
Label: descriptorpb.FieldDescriptorProto_LABEL_OPTIONAL.Enum(),
158+
},
159+
},
160+
},
161+
},
103162
{
104163
// exercise construct of a submessage
105164
description: "nested",
@@ -1236,6 +1295,13 @@ func TestNormalizeDescriptor(t *testing.T) {
12361295
Type: descriptorpb.FieldDescriptorProto_TYPE_MESSAGE.Enum(),
12371296
Label: descriptorpb.FieldDescriptorProto_LABEL_OPTIONAL.Enum(),
12381297
},
1298+
{
1299+
Name: proto.String("json_type"),
1300+
JsonName: proto.String("jsonType"),
1301+
Number: proto.Int32(4),
1302+
Type: descriptorpb.FieldDescriptorProto_TYPE_STRING.Enum(),
1303+
Label: descriptorpb.FieldDescriptorProto_LABEL_OPTIONAL.Enum(),
1304+
},
12391305
},
12401306
NestedType: []*descriptorpb.DescriptorProto{
12411307
{

bigquery/storage/managedwriter/adapt/schemaconversion.go

+1
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ var fieldTypeMap = map[bigquery.FieldType]storagepb.TableFieldSchema_Type{
3636
bigquery.BigNumericFieldType: storagepb.TableFieldSchema_BIGNUMERIC,
3737
bigquery.GeographyFieldType: storagepb.TableFieldSchema_GEOGRAPHY,
3838
bigquery.RangeFieldType: storagepb.TableFieldSchema_RANGE,
39+
bigquery.JSONFieldType: storagepb.TableFieldSchema_JSON,
3940
}
4041

4142
func bqFieldToProto(in *bigquery.FieldSchema) (*storagepb.TableFieldSchema, error) {

bigquery/storage/managedwriter/adapt/schemaconversion_test.go

+26
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,21 @@ func TestFieldConversions(t *testing.T) {
9595
},
9696
},
9797
},
98+
{
99+
desc: "json type",
100+
bq: &bigquery.FieldSchema{
101+
Name: "name",
102+
Type: bigquery.JSONFieldType,
103+
Description: "description",
104+
Required: true,
105+
},
106+
proto: &storagepb.TableFieldSchema{
107+
Name: "name",
108+
Type: storagepb.TableFieldSchema_JSON,
109+
Description: "description",
110+
Mode: storagepb.TableFieldSchema_REQUIRED,
111+
},
112+
},
98113
{
99114
desc: "range type",
100115
bq: &bigquery.FieldSchema{
@@ -200,6 +215,17 @@ func TestSchemaConversion(t *testing.T) {
200215
},
201216
},
202217
},
218+
{
219+
description: "json type",
220+
bqSchema: bigquery.Schema{
221+
{Name: "json", Type: bigquery.JSONFieldType},
222+
},
223+
storageSchema: &storagepb.TableSchema{
224+
Fields: []*storagepb.TableFieldSchema{
225+
{Name: "json", Type: storagepb.TableFieldSchema_JSON, Mode: storagepb.TableFieldSchema_NULLABLE},
226+
},
227+
},
228+
},
203229
{
204230
description: "range types",
205231
bqSchema: bigquery.Schema{

bigquery/storage/managedwriter/integration_test.go

+63
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,10 @@ func TestIntegration_ManagedWriter(t *testing.T) {
229229
t.Parallel()
230230
testDefaultStreamDynamicJSON(ctx, t, mwClient, bqClient, dataset)
231231
})
232+
t.Run("DefaultStreamJSONData", func(t *testing.T) {
233+
t.Parallel()
234+
testDefaultStreamJSONData(ctx, t, mwClient, bqClient, dataset)
235+
})
232236
t.Run("CommittedStream", func(t *testing.T) {
233237
t.Parallel()
234238
testCommittedStream(ctx, t, mwClient, bqClient, dataset)
@@ -455,6 +459,63 @@ func testDefaultStreamDynamicJSON(ctx context.Context, t *testing.T, mwClient *C
455459
withDistinctValues("public", int64(2)))
456460
}
457461

462+
func testDefaultStreamJSONData(ctx context.Context, t *testing.T, mwClient *Client, bqClient *bigquery.Client, dataset *bigquery.Dataset) {
463+
testTable := dataset.Table(tableIDs.New())
464+
if err := testTable.Create(ctx, &bigquery.TableMetadata{Schema: testdata.ComplexTypeSchema}); err != nil {
465+
t.Fatalf("failed to create test table %s: %v", testTable.FullyQualifiedName(), err)
466+
}
467+
468+
md, descriptorProto := setupDynamicDescriptors(t, testdata.ComplexTypeSchema)
469+
470+
ms, err := mwClient.NewManagedStream(ctx,
471+
WithDestinationTable(TableParentFromParts(testTable.ProjectID, testTable.DatasetID, testTable.TableID)),
472+
WithType(DefaultStream),
473+
WithSchemaDescriptor(descriptorProto),
474+
)
475+
if err != nil {
476+
t.Fatalf("NewManagedStream: %v", err)
477+
}
478+
validateTableConstraints(ctx, t, bqClient, testTable, "before send",
479+
withExactRowCount(0))
480+
481+
sampleJSONData := [][]byte{
482+
[]byte(`{"json_type":"{\"foo\": \"bar\"}"}`),
483+
[]byte(`{"json_type":"{\"key\": \"value\"}"}`),
484+
[]byte(`{"json_type":"\"a string\""}`),
485+
}
486+
487+
var result *AppendResult
488+
for k, v := range sampleJSONData {
489+
message := dynamicpb.NewMessage(md)
490+
491+
// First, json->proto message
492+
err = protojson.Unmarshal(v, message)
493+
if err != nil {
494+
t.Fatalf("failed to Unmarshal json message for row %d: %v", k, err)
495+
}
496+
// Then, proto message -> bytes.
497+
b, err := proto.Marshal(message)
498+
if err != nil {
499+
t.Fatalf("failed to marshal proto bytes for row %d: %v", k, err)
500+
}
501+
result, err = ms.AppendRows(ctx, [][]byte{b})
502+
if err != nil {
503+
t.Errorf("single-row append %d failed: %v", k, err)
504+
}
505+
}
506+
507+
// Wait for the result to indicate ready, then validate.
508+
o, err := result.GetResult(ctx)
509+
if err != nil {
510+
t.Errorf("result error for last send: %v", err)
511+
}
512+
if o != NoStreamOffset {
513+
t.Errorf("offset mismatch, got %d want %d", o, NoStreamOffset)
514+
}
515+
validateTableConstraints(ctx, t, bqClient, testTable, "after send",
516+
withExactRowCount(int64(len(sampleJSONData))))
517+
}
518+
458519
func testBufferedStream(ctx context.Context, t *testing.T, mwClient *Client, bqClient *bigquery.Client, dataset *bigquery.Dataset) {
459520
testTable := dataset.Table(tableIDs.New())
460521
if err := testTable.Create(ctx, &bigquery.TableMetadata{Schema: testdata.SimpleMessageSchema}); err != nil {
@@ -1389,6 +1450,7 @@ func TestIntegration_ProtoNormalization(t *testing.T) {
13891450
t.Run("ComplexType", func(t *testing.T) {
13901451
t.Parallel()
13911452
schema := testdata.ComplexTypeSchema
1453+
jsonData := "{\"foo\": \"bar\"}"
13921454
mesg := &testdata.ComplexType{
13931455
NestedRepeatedType: []*testdata.NestedType{
13941456
{
@@ -1404,6 +1466,7 @@ func TestIntegration_ProtoNormalization(t *testing.T) {
14041466
RangeType: &testdata.RangeTypeTimestamp{
14051467
End: proto.Int64(time.Now().UnixNano()),
14061468
},
1469+
JsonType: &jsonData,
14071470
}
14081471
b, err := proto.Marshal(mesg)
14091472
if err != nil {

bigquery/storage/managedwriter/testdata/schemas.go

+4
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,10 @@ var (
9999
Type: bigquery.TimestampFieldType,
100100
},
101101
},
102+
{
103+
Name: "json_type",
104+
Type: bigquery.JSONFieldType,
105+
},
102106
}
103107

104108
// We currently follow proto2 rules here, hence the well known types getting treated as records.

0 commit comments

Comments
 (0)