Skip to content

Commit 0bf80d7

Browse files
authored
fix(bigquery): fetch dst table for jobs when readings with Storage API (#7325)
Pay the cost of re-fetching job information when using the Storage API, to guarantee availability of Destination Table. Some cases were not covered before, like when the Job take a long time to run. Fixes #7322
1 parent 2f45776 commit 0bf80d7

File tree

6 files changed

+29
-16
lines changed

6 files changed

+29
-16
lines changed

bigquery/bigquery.go

+6-2
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ func NewClient(ctx context.Context, projectID string, opts ...option.ClientOptio
102102
// large datasets from tables, jobs or queries.
103103
// Calling this method twice will return an error.
104104
func (c *Client) EnableStorageReadClient(ctx context.Context, opts ...option.ClientOption) error {
105-
if c.rc != nil {
105+
if c.isStorageReadAvailable() {
106106
return fmt.Errorf("failed: storage read client already set up")
107107
}
108108
rc, err := newReadClient(ctx, c.projectID, opts...)
@@ -113,6 +113,10 @@ func (c *Client) EnableStorageReadClient(ctx context.Context, opts ...option.Cli
113113
return nil
114114
}
115115

116+
func (c *Client) isStorageReadAvailable() bool {
117+
return c.rc != nil
118+
}
119+
116120
// Project returns the project ID or number for this instance of the client, which may have
117121
// either been explicitly specified or autodetected.
118122
func (c *Client) Project() string {
@@ -123,7 +127,7 @@ func (c *Client) Project() string {
123127
// Close should be called when the client is no longer needed.
124128
// It need not be called at program exit.
125129
func (c *Client) Close() error {
126-
if c.rc != nil {
130+
if c.isStorageReadAvailable() {
127131
err := c.rc.close()
128132
if err != nil {
129133
return err

bigquery/job.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -322,7 +322,7 @@ func (j *Job) read(ctx context.Context, waitForQuery func(context.Context, strin
322322
return nil, err
323323
}
324324
var it *RowIterator
325-
if j.c.rc != nil {
325+
if j.c.isStorageReadAvailable() {
326326
it, err = newStorageRowIteratorFromJob(ctx, j)
327327
if err != nil {
328328
it = nil

bigquery/query.go

+3-8
Original file line numberDiff line numberDiff line change
@@ -402,13 +402,8 @@ func (q *Query) Read(ctx context.Context) (it *RowIterator, err error) {
402402

403403
if resp.JobComplete {
404404
// If more pages are available, discard and use the Storage API instead
405-
if resp.PageToken != "" && q.client.rc != nil {
406-
// Needed to fetch destination table
407-
job, err := q.client.JobFromID(ctx, resp.JobReference.JobId)
408-
if err != nil {
409-
return nil, err
410-
}
411-
it, err = newStorageRowIteratorFromJob(ctx, job)
405+
if resp.PageToken != "" && q.client.isStorageReadAvailable() {
406+
it, err = newStorageRowIteratorFromJob(ctx, minimalJob)
412407
if err == nil {
413408
return it, nil
414409
}
@@ -439,7 +434,7 @@ func (q *Query) Read(ctx context.Context) (it *RowIterator, err error) {
439434
// user's Query configuration. If all the options set on the job are supported on the
440435
// faster query path, this method returns a QueryRequest suitable for execution.
441436
func (q *Query) probeFastPath() (*bq.QueryRequest, error) {
442-
if q.forceStorageAPI && q.client.rc != nil {
437+
if q.forceStorageAPI && q.client.isStorageReadAvailable() {
443438
return nil, fmt.Errorf("force Storage API usage")
444439
}
445440
// This is a denylist of settings which prevent us from composing an equivalent

bigquery/storage_integration_test.go

+12-3
Original file line numberDiff line numberDiff line change
@@ -272,6 +272,7 @@ func TestIntegration_StorageReadQueryMorePages(t *testing.T) {
272272
sql := fmt.Sprintf(`SELECT repository_url as url, repository_owner as owner, repository_forks as forks FROM %s`, table)
273273
// Don't forceStorageAPI usage and still see internally Storage API is selected
274274
q := storageOptimizedClient.Query(sql)
275+
q.DisableQueryCache = true
275276
it, err := q.Read(ctx)
276277
if err != nil {
277278
t.Fatal(err)
@@ -304,12 +305,13 @@ func TestIntegration_StorageReadCancel(t *testing.T) {
304305
t.Skip("Integration tests skipped")
305306
}
306307
ctx := context.Background()
307-
ctx, cancel := context.WithTimeout(ctx, 5*time.Second)
308+
ctx, cancel := context.WithCancel(ctx)
308309
defer cancel()
309310
table := "`bigquery-public-data.samples.github_timeline`"
310311
sql := fmt.Sprintf(`SELECT repository_url as url, repository_owner as owner, repository_forks as forks FROM %s`, table)
311312
storageOptimizedClient.rc.settings.maxWorkerCount = 1
312313
q := storageOptimizedClient.Query(sql)
314+
q.DisableQueryCache = true
313315
q.forceStorageAPI = true
314316
it, err := q.Read(ctx)
315317
if err != nil {
@@ -319,21 +321,28 @@ func TestIntegration_StorageReadCancel(t *testing.T) {
319321
t.Fatal("expected query to use Storage API")
320322
}
321323

324+
// Cancel read after readings 1000 rows
325+
rowsRead := 0
322326
for {
323327
var dst []Value
324328
err := it.Next(&dst)
325329
if err == iterator.Done {
326330
break
327331
}
328332
if err != nil {
329-
if errors.Is(err, context.DeadlineExceeded) {
333+
if errors.Is(err, context.DeadlineExceeded) ||
334+
errors.Is(err, context.Canceled) {
330335
break
331336
}
332337
t.Fatalf("failed to fetch via storage API: %v", err)
333338
}
339+
rowsRead++
340+
if rowsRead > 1000 {
341+
cancel()
342+
}
334343
}
335344
// resources are cleaned asynchronously
336-
time.Sleep(500 * time.Millisecond)
345+
time.Sleep(time.Second)
337346
if !it.arrowIterator.isDone() {
338347
t.Fatal("expected stream to be done")
339348
}

bigquery/storage_iterator.go

+6-1
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,12 @@ func newStorageRowIteratorFromTable(ctx context.Context, table *Table, ordered b
6464
return it, nil
6565
}
6666

67-
func newStorageRowIteratorFromJob(ctx context.Context, job *Job) (*RowIterator, error) {
67+
func newStorageRowIteratorFromJob(ctx context.Context, j *Job) (*RowIterator, error) {
68+
// Needed to fetch destination table
69+
job, err := j.c.JobFromID(ctx, j.jobID)
70+
if err != nil {
71+
return nil, err
72+
}
6873
cfg, err := job.Config()
6974
if err != nil {
7075
return nil, err

bigquery/table.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -813,7 +813,7 @@ func (t *Table) Read(ctx context.Context) *RowIterator {
813813
}
814814

815815
func (t *Table) read(ctx context.Context, pf pageFetcher) *RowIterator {
816-
if t.c.rc != nil {
816+
if t.c.isStorageReadAvailable() {
817817
it, err := newStorageRowIteratorFromTable(ctx, t, false)
818818
if err == nil {
819819
return it

0 commit comments

Comments
 (0)