Skip to content

Commit 856893f

Browse files
feat: Add preserveAsciiControlCharacters to CsvOptions (#2143)
* feat: Add preserveAsciiControlCharacters to CsvOptions * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * Add IT * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
1 parent 77f2bae commit 856893f

File tree

4 files changed

+57
-2
lines changed

4 files changed

+57
-2
lines changed

google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/CsvOptions.java

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ public final class CsvOptions extends FormatOptions {
3434
private final String fieldDelimiter;
3535
private final String quote;
3636
private final Long skipLeadingRows;
37+
private final Boolean preserveAsciiControlCharacters;
3738

3839
public static final class Builder {
3940

@@ -43,6 +44,7 @@ public static final class Builder {
4344
private String fieldDelimiter;
4445
private String quote;
4546
private Long skipLeadingRows;
47+
private Boolean preserveAsciiControlCharacters;
4648

4749
private Builder() {}
4850

@@ -53,6 +55,7 @@ private Builder(CsvOptions csvOptions) {
5355
this.fieldDelimiter = csvOptions.fieldDelimiter;
5456
this.quote = csvOptions.quote;
5557
this.skipLeadingRows = csvOptions.skipLeadingRows;
58+
this.preserveAsciiControlCharacters = csvOptions.preserveAsciiControlCharacters;
5659
}
5760

5861
/**
@@ -130,6 +133,15 @@ public Builder setSkipLeadingRows(long skipLeadingRows) {
130133
return this;
131134
}
132135

136+
/**
137+
* Sets whether BigQuery should allow ascii control characters in a CSV file. By default ascii
138+
* control characters are not allowed.
139+
*/
140+
public Builder setPreserveAsciiControlCharacters(boolean preserveAsciiControlCharacters) {
141+
this.preserveAsciiControlCharacters = preserveAsciiControlCharacters;
142+
return this;
143+
}
144+
133145
/** Creates a {@code CsvOptions} object. */
134146
public CsvOptions build() {
135147
return new CsvOptions(this);
@@ -144,6 +156,7 @@ private CsvOptions(Builder builder) {
144156
this.fieldDelimiter = builder.fieldDelimiter;
145157
this.quote = builder.quote;
146158
this.skipLeadingRows = builder.skipLeadingRows;
159+
this.preserveAsciiControlCharacters = builder.preserveAsciiControlCharacters;
147160
}
148161

149162
/**
@@ -192,6 +205,14 @@ public Long getSkipLeadingRows() {
192205
return skipLeadingRows;
193206
}
194207

208+
/**
209+
* Returns whether BigQuery should allow ascii control characters in a CSV file. By default ascii
210+
* control characters are not allowed.
211+
*/
212+
public Boolean getPreserveAsciiControlCharacters() {
213+
return preserveAsciiControlCharacters;
214+
}
215+
195216
/** Returns a builder for the {@code CsvOptions} object. */
196217
public Builder toBuilder() {
197218
return new Builder(this);
@@ -207,6 +228,7 @@ public String toString() {
207228
.add("fieldDelimiter", fieldDelimiter)
208229
.add("quote", quote)
209230
.add("skipLeadingRows", skipLeadingRows)
231+
.add("preserveAsciiControlCharacters", preserveAsciiControlCharacters)
210232
.toString();
211233
}
212234

@@ -219,7 +241,8 @@ public int hashCode() {
219241
encoding,
220242
fieldDelimiter,
221243
quote,
222-
skipLeadingRows);
244+
skipLeadingRows,
245+
preserveAsciiControlCharacters);
223246
}
224247

225248
@Override

google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/WriteChannelConfiguration.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -475,7 +475,8 @@ com.google.api.services.bigquery.model.JobConfiguration toPb() {
475475
.setAllowJaggedRows(csvOptions.allowJaggedRows())
476476
.setAllowQuotedNewlines(csvOptions.allowQuotedNewLines())
477477
.setEncoding(csvOptions.getEncoding())
478-
.setQuote(csvOptions.getQuote());
478+
.setQuote(csvOptions.getQuote())
479+
.setPreserveAsciiControlCharacters(csvOptions.getPreserveAsciiControlCharacters());
479480
if (csvOptions.getSkipLeadingRows() != null) {
480481
// todo(mziccard) remove checked cast or comment when #1044 is closed
481482
loadConfigurationPb.setSkipLeadingRows(Ints.checkedCast(csvOptions.getSkipLeadingRows()));

google-cloud-bigquery/src/test/java/com/google/cloud/bigquery/CsvOptionsTest.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ public class CsvOptionsTest {
3030
private static final String FIELD_DELIMITER = ",";
3131
private static final String QUOTE = "\"";
3232
private static final long SKIP_LEADING_ROWS = 42L;
33+
34+
private static final boolean PRESERVE_ASCII_CONTROL_CHARACTERS = true;
3335
private static final CsvOptions CSV_OPTIONS =
3436
CsvOptions.newBuilder()
3537
.setAllowJaggedRows(ALLOW_JAGGED_ROWS)
@@ -38,6 +40,7 @@ public class CsvOptionsTest {
3840
.setFieldDelimiter(FIELD_DELIMITER)
3941
.setQuote(QUOTE)
4042
.setSkipLeadingRows(SKIP_LEADING_ROWS)
43+
.setPreserveAsciiControlCharacters(PRESERVE_ASCII_CONTROL_CHARACTERS)
4144
.build();
4245

4346
@Test
@@ -64,6 +67,8 @@ public void testBuilder() {
6467
assertEquals(FIELD_DELIMITER, CSV_OPTIONS.getFieldDelimiter());
6568
assertEquals(QUOTE, CSV_OPTIONS.getQuote());
6669
assertEquals(SKIP_LEADING_ROWS, (long) CSV_OPTIONS.getSkipLeadingRows());
70+
assertEquals(
71+
PRESERVE_ASCII_CONTROL_CHARACTERS, CSV_OPTIONS.getPreserveAsciiControlCharacters());
6772
}
6873

6974
@Test

google-cloud-bigquery/src/test/java/com/google/cloud/bigquery/it/ITBigQueryTest.java

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4560,4 +4560,30 @@ public void testLocation() throws Exception {
45604560
bigquery.delete(dataset.getDatasetId(), DatasetDeleteOption.deleteContents());
45614561
}
45624562
}
4563+
4564+
@Test
4565+
public void testPreserveAsciiControlCharacters()
4566+
throws InterruptedException, IOException, TimeoutException {
4567+
String destinationTableName = "test_preserve_ascii_control_characters";
4568+
TableId tableId = TableId.of(DATASET, destinationTableName);
4569+
WriteChannelConfiguration configuration =
4570+
WriteChannelConfiguration.newBuilder(tableId)
4571+
.setFormatOptions(
4572+
FormatOptions.csv().toBuilder().setPreserveAsciiControlCharacters(true).build())
4573+
.setCreateDisposition(JobInfo.CreateDisposition.CREATE_IF_NEEDED)
4574+
.setSchema(SIMPLE_SCHEMA)
4575+
.build();
4576+
TableDataWriteChannel channel = bigquery.writer(configuration);
4577+
try {
4578+
channel.write(ByteBuffer.wrap("\u0000".getBytes(StandardCharsets.UTF_8)));
4579+
} finally {
4580+
channel.close();
4581+
}
4582+
Job job = channel.getJob().waitFor();
4583+
assertNull(job.getStatus().getError());
4584+
Page<FieldValueList> rows = bigquery.listTableData(tableId);
4585+
FieldValueList row = rows.getValues().iterator().next();
4586+
assertEquals("\u0000", row.get(0).getStringValue());
4587+
assertTrue(bigquery.delete(tableId));
4588+
}
45634589
}

0 commit comments

Comments
 (0)