Skip to content

Commit 53a56be

Browse files
stephaniewang526Praful Makani
and
Praful Makani
authored
docs(samples): create and query Amazon s3 data using external table (#835)
* docs(samples): add omni query on external table aws * docs(samples): update code * docs(samples): add user id and external id * docs(samples): fix connection name * docs(samples): update create external table AWS sample * update create dataset AWS sample * nit clean up * update query external table sample * fix checkstyle errors * update based on comments Co-authored-by: Praful Makani <[email protected]>
1 parent 36b5f06 commit 53a56be

File tree

5 files changed

+203
-51
lines changed

5 files changed

+203
-51
lines changed

samples/snippets/src/main/java/com/example/bigquery/CreateExternalTableAws.java

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,46 +16,62 @@
1616

1717
package com.example.bigquery;
1818

19+
// [START bigquery_omni_create_external_table]
1920
import com.google.cloud.bigquery.BigQuery;
2021
import com.google.cloud.bigquery.BigQueryException;
2122
import com.google.cloud.bigquery.BigQueryOptions;
2223
import com.google.cloud.bigquery.CsvOptions;
2324
import com.google.cloud.bigquery.ExternalTableDefinition;
25+
import com.google.cloud.bigquery.Field;
26+
import com.google.cloud.bigquery.Schema;
27+
import com.google.cloud.bigquery.StandardSQLTypeName;
2428
import com.google.cloud.bigquery.TableId;
2529
import com.google.cloud.bigquery.TableInfo;
2630

31+
// Sample to create an external aws table
2732
public class CreateExternalTableAws {
2833

2934
public static void main(String[] args) {
3035
// TODO(developer): Replace these variables before running the sample.
36+
String projectId = "MY_PROJECT_ID";
3137
String datasetName = "MY_DATASET_NAME";
3238
String tableName = "MY_TABLE_NAME";
33-
// Create a aws connection
34-
// projects/{project_id}/locations/{location_id}/connections/{connection_id}
35-
String connectionId = "MY_CONNECTION_NAME";
39+
String connectionId = "MY_CONNECTION_ID";
3640
String sourceUri = "s3://your-bucket-name/";
3741
CsvOptions options = CsvOptions.newBuilder().setSkipLeadingRows(1).build();
42+
Schema schema =
43+
Schema.of(
44+
Field.of("name", StandardSQLTypeName.STRING),
45+
Field.of("post_abbr", StandardSQLTypeName.STRING));
3846
ExternalTableDefinition externalTableDefinition =
3947
ExternalTableDefinition.newBuilder(sourceUri, options)
4048
.setConnectionId(connectionId)
49+
.setSchema(schema)
4150
.build();
42-
createExternalTableAws(datasetName, tableName, externalTableDefinition);
51+
createExternalTableAws(projectId, datasetName, tableName, externalTableDefinition);
4352
}
4453

4554
public static void createExternalTableAws(
46-
String datasetName, String tableName, ExternalTableDefinition externalTableDefinition) {
55+
String projectId,
56+
String datasetName,
57+
String tableName,
58+
ExternalTableDefinition externalTableDefinition) {
4759
try {
4860
// Initialize client that will be used to send requests. This client only needs to be created
4961
// once, and can be reused for multiple requests.
5062
BigQuery bigquery = BigQueryOptions.getDefaultInstance().getService();
5163

52-
TableId tableId = TableId.of(datasetName, tableName);
64+
TableId tableId = TableId.of(projectId, datasetName, tableName);
5365
TableInfo tableInfo = TableInfo.newBuilder(tableId, externalTableDefinition).build();
5466

5567
bigquery.create(tableInfo);
5668
System.out.println("Aws external table created successfully");
69+
70+
// Clean up
71+
bigquery.delete(TableId.of(projectId, datasetName, tableName));
5772
} catch (BigQueryException e) {
5873
System.out.println("Aws external was not created." + e.toString());
5974
}
6075
}
6176
}
77+
// [END bigquery_omni_create_external_table]
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
/*
2+
* Copyright 2020 Google LLC
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package com.example.bigquery;
18+
19+
// [START bigquery_omni_query_external_aws_s3]
20+
import com.google.cloud.bigquery.BigQuery;
21+
import com.google.cloud.bigquery.BigQueryException;
22+
import com.google.cloud.bigquery.BigQueryOptions;
23+
import com.google.cloud.bigquery.CsvOptions;
24+
import com.google.cloud.bigquery.DatasetId;
25+
import com.google.cloud.bigquery.ExternalTableDefinition;
26+
import com.google.cloud.bigquery.Field;
27+
import com.google.cloud.bigquery.QueryJobConfiguration;
28+
import com.google.cloud.bigquery.Schema;
29+
import com.google.cloud.bigquery.StandardSQLTypeName;
30+
import com.google.cloud.bigquery.TableId;
31+
import com.google.cloud.bigquery.TableInfo;
32+
import com.google.cloud.bigquery.TableResult;
33+
34+
// Sample to queries an external data source aws s3 using a permanent table
35+
public class QueryExternalTableAws {
36+
37+
public static void main(String[] args) throws InterruptedException {
38+
// TODO(developer): Replace these variables before running the sample.
39+
String projectId = "MY_PROJECT_ID";
40+
String datasetName = "MY_DATASET_NAME";
41+
String externalTableName = "MY_EXTERNAL_TABLE_NAME";
42+
// Query to find states starting with 'W'
43+
String query =
44+
String.format(
45+
"SELECT * FROM s%.%s.%s WHERE name LIKE 'W%%'",
46+
projectId, datasetName, externalTableName);
47+
queryExternalTableAws(query);
48+
}
49+
50+
public static void queryExternalTableAws(String query) throws InterruptedException {
51+
try {
52+
// Initialize client that will be used to send requests. This client only needs to be created
53+
// once, and can be reused for multiple requests.
54+
BigQuery bigquery = BigQueryOptions.getDefaultInstance().getService();
55+
56+
TableResult results = bigquery.query(QueryJobConfiguration.of(query));
57+
58+
results
59+
.iterateAll()
60+
.forEach(row -> row.forEach(val -> System.out.printf("%s,", val.toString())));
61+
62+
System.out.println("Query on aws external permanent table performed successfully.");
63+
} catch (BigQueryException e) {
64+
System.out.println("Query not performed \n" + e.toString());
65+
}
66+
}
67+
}
68+
// [END bigquery_omni_query_external_aws_s3]

samples/snippets/src/test/java/com/example/bigquery/CreateDatasetAwsIT.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ public class CreateDatasetAwsIT {
3939
private PrintStream out;
4040
private PrintStream originalPrintStream;
4141

42-
private static final String PROJECT_ID = requireEnvVar("OMNI_PROJECT_ID");
42+
private static final String OMNI_PROJECT_ID = requireEnvVar("OMNI_PROJECT_ID");
4343

4444
private static String requireEnvVar(String varName) {
4545
String value = System.getenv(varName);
@@ -51,7 +51,7 @@ private static String requireEnvVar(String varName) {
5151

5252
@BeforeClass
5353
public static void checkRequirements() {
54-
requireEnvVar("GOOGLE_CLOUD_PROJECT");
54+
requireEnvVar("OMNI_PROJECT_ID");
5555
}
5656

5757
@Before
@@ -66,7 +66,7 @@ public void setUp() {
6666
@After
6767
public void tearDown() {
6868
// Clean up
69-
DeleteDataset.deleteDataset(PROJECT_ID, datasetName);
69+
DeleteDataset.deleteDataset(OMNI_PROJECT_ID, datasetName);
7070
// restores print statements in the original method
7171
System.out.flush();
7272
System.setOut(originalPrintStream);
@@ -75,7 +75,7 @@ public void tearDown() {
7575

7676
@Test
7777
public void testCreateDatasetAws() {
78-
CreateDatasetAws.createDatasetAws(PROJECT_ID, datasetName, LOCATION);
78+
CreateDatasetAws.createDatasetAws(OMNI_PROJECT_ID, datasetName, LOCATION);
7979
assertThat(bout.toString()).contains("Aws dataset created successfully :");
8080
}
8181
}

samples/snippets/src/test/java/com/example/bigquery/CreateExternalTableAwsIT.java

Lines changed: 12 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -47,16 +47,14 @@ public class CreateExternalTableAwsIT {
4747
private static final String ID = UUID.randomUUID().toString().substring(0, 8);
4848
private static final String LOCATION = "aws-us-east-1";
4949
private final Logger log = Logger.getLogger(this.getClass().getName());
50-
private String datasetName;
5150
private String tableName;
52-
private String connectionName;
5351
private ByteArrayOutputStream bout;
5452
private PrintStream out;
5553
private PrintStream originalPrintStream;
5654

57-
private static final String PROJECT_ID = requireEnvVar("OMNI_PROJECT_ID");
58-
private static final String AWS_ACCOUNT_ID = requireEnvVar("AWS_ACCOUNT_ID");
59-
private static final String AWS_ROLE_ID = requireEnvVar("AWS_ROLE_ID");
55+
private static final String OMNI_PROJECT_ID = requireEnvVar("OMNI_PROJECT_ID");
56+
private static final String OMNI_DATASET_NAME = requireEnvVar("OMNI_DATASET_NAME");
57+
private static final String AWS_READ_CONNECTION_ID = requireEnvVar("AWS_READ_CONNECTION_ID");
6058

6159
private static String requireEnvVar(String varName) {
6260
String value = System.getenv(varName);
@@ -69,49 +67,21 @@ private static String requireEnvVar(String varName) {
6967
@BeforeClass
7068
public static void checkRequirements() {
7169
requireEnvVar("OMNI_PROJECT_ID");
72-
requireEnvVar("AWS_ACCOUNT_ID");
73-
requireEnvVar("AWS_ROLE_ID");
70+
requireEnvVar("OMNI_DATASET_NAME");
71+
requireEnvVar("AWS_READ_CONNECTION_ID");
7472
}
7573

7674
@Before
77-
public void setUp() throws IOException {
78-
datasetName = "CREATE_EXTERNAL_TABLE_AWS_TEST_" + ID;
75+
public void setUp() {
7976
tableName = "CREATE_EXTERNAL_TABLE_AWS_TEST_" + ID;
80-
connectionName = "CREATE_EXTERNAL_TABLE_AWS_TEST_" + ID;
8177
bout = new ByteArrayOutputStream();
8278
out = new PrintStream(bout);
8379
originalPrintStream = System.out;
8480
System.setOut(out);
85-
// create a temporary aws connection
86-
try (ConnectionServiceClient client = ConnectionServiceClient.create()) {
87-
LocationName parent = LocationName.of(PROJECT_ID, LOCATION);
88-
String iamRoleId = String.format("arn:aws:iam::%s:role/%s", AWS_ACCOUNT_ID, AWS_ROLE_ID);
89-
AwsCrossAccountRole role = AwsCrossAccountRole.newBuilder().setIamRoleId(iamRoleId).build();
90-
AwsProperties awsProperties = AwsProperties.newBuilder().setCrossAccountRole(role).build();
91-
Connection connection = Connection.newBuilder().setAws(awsProperties).build();
92-
CreateConnectionRequest request =
93-
CreateConnectionRequest.newBuilder()
94-
.setParent(parent.toString())
95-
.setConnection(connection)
96-
.setConnectionId(connectionName)
97-
.build();
98-
connectionName = client.createConnection(request).getName();
99-
}
100-
// create a temporary dataset
101-
CreateDatasetAws.createDatasetAws(PROJECT_ID, datasetName, LOCATION);
10281
}
10382

10483
@After
105-
public void tearDown() throws IOException {
106-
// delete a temporary aws connection
107-
try (ConnectionServiceClient client = ConnectionServiceClient.create()) {
108-
DeleteConnectionRequest request =
109-
DeleteConnectionRequest.newBuilder().setName(connectionName).build();
110-
client.deleteConnection(request);
111-
}
112-
// Clean up
113-
DeleteTable.deleteTable(datasetName, tableName);
114-
DeleteDataset.deleteDataset(PROJECT_ID, datasetName);
84+
public void tearDown() {
11585
// restores print statements in the original method
11686
System.out.flush();
11787
System.setOut(originalPrintStream);
@@ -120,18 +90,19 @@ public void tearDown() throws IOException {
12090

12191
@Test
12292
public void testCreateExternalTableAws() {
123-
String sourceUri = "s3://cloud-samples-tests/us-states.csv";
93+
String sourceUri = "s3://omni-samples-test-bucket/us-states.csv";
12494
Schema schema =
12595
Schema.of(
12696
Field.of("name", StandardSQLTypeName.STRING),
12797
Field.of("post_abbr", StandardSQLTypeName.STRING));
12898
CsvOptions options = CsvOptions.newBuilder().setSkipLeadingRows(1).build();
129-
ExternalTableDefinition externalTable =
99+
ExternalTableDefinition externalTableDefinition =
130100
ExternalTableDefinition.newBuilder(sourceUri, options)
131-
.setConnectionId(connectionName)
101+
.setConnectionId(AWS_READ_CONNECTION_ID)
132102
.setSchema(schema)
133103
.build();
134-
CreateExternalTableAws.createExternalTableAws(datasetName, tableName, externalTable);
104+
CreateExternalTableAws.createExternalTableAws(
105+
OMNI_PROJECT_ID, OMNI_DATASET_NAME, tableName, externalTableDefinition);
135106
assertThat(bout.toString()).contains("Aws external table created successfully");
136107
}
137108
}
Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
/*
2+
* Copyright 2020 Google LLC
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package com.example.bigquery;
18+
19+
import static com.google.common.truth.Truth.assertThat;
20+
import static junit.framework.TestCase.assertNotNull;
21+
22+
import com.google.cloud.bigquery.CsvOptions;
23+
import com.google.cloud.bigquery.ExternalTableDefinition;
24+
import com.google.cloud.bigquery.Field;
25+
import com.google.cloud.bigquery.Schema;
26+
import com.google.cloud.bigquery.StandardSQLTypeName;
27+
import com.google.cloud.bigquery.connection.v1.AwsCrossAccountRole;
28+
import com.google.cloud.bigquery.connection.v1.AwsProperties;
29+
import com.google.cloud.bigquery.connection.v1.Connection;
30+
import com.google.cloud.bigquery.connection.v1.CreateConnectionRequest;
31+
import com.google.cloud.bigquery.connection.v1.DeleteConnectionRequest;
32+
import com.google.cloud.bigquery.connection.v1.LocationName;
33+
import com.google.cloud.bigqueryconnection.v1.ConnectionServiceClient;
34+
import java.io.ByteArrayOutputStream;
35+
import java.io.IOException;
36+
import java.io.PrintStream;
37+
import java.util.UUID;
38+
import java.util.logging.Level;
39+
import java.util.logging.Logger;
40+
import org.junit.After;
41+
import org.junit.Before;
42+
import org.junit.BeforeClass;
43+
import org.junit.Test;
44+
45+
public class QueryExternalTableAwsIT {
46+
47+
private final Logger log = Logger.getLogger(this.getClass().getName());
48+
private ByteArrayOutputStream bout;
49+
private PrintStream out;
50+
private PrintStream originalPrintStream;
51+
52+
private static final String OMNI_PROJECT_ID = requireEnvVar("OMNI_PROJECT_ID");
53+
private static final String OMNI_DATASET_NAME = requireEnvVar("OMNI_DATASET_NAME");
54+
private static final String OMNI_EXTERNAL_TABLE_NAME = requireEnvVar("OMNI_EXTERNAL_TABLE_NAME");
55+
56+
private static String requireEnvVar(String varName) {
57+
String value = System.getenv(varName);
58+
assertNotNull(
59+
"Environment variable " + varName + " is required to perform these tests.",
60+
System.getenv(varName));
61+
return value;
62+
}
63+
64+
@BeforeClass
65+
public static void checkRequirements() {
66+
requireEnvVar("OMNI_PROJECT_ID");
67+
requireEnvVar("OMNI_DATASET_NAME");
68+
requireEnvVar("OMNI_EXTERNAL_TABLE_NAME");
69+
}
70+
71+
@Before
72+
public void setUp() {
73+
bout = new ByteArrayOutputStream();
74+
out = new PrintStream(bout);
75+
originalPrintStream = System.out;
76+
System.setOut(out);
77+
}
78+
79+
@After
80+
public void tearDown() {
81+
// restores print statements in the original method
82+
System.out.flush();
83+
System.setOut(originalPrintStream);
84+
log.log(Level.INFO, bout.toString());
85+
}
86+
87+
@Test
88+
public void testQueryExternalTableAws() throws InterruptedException {
89+
String query =
90+
String.format(
91+
"SELECT * FROM %s.%s.%s WHERE name LIKE 'W%%'",
92+
OMNI_PROJECT_ID, OMNI_DATASET_NAME, OMNI_EXTERNAL_TABLE_NAME);
93+
QueryExternalTableAws.queryExternalTableAws(query);
94+
assertThat(bout.toString())
95+
.contains("Query on aws external permanent table performed successfully.");
96+
}
97+
}

0 commit comments

Comments
 (0)