From 3067e6c69bb5a1045a856c4535133d0aac66b959 Mon Sep 17 00:00:00 2001 From: Martin Mladenovski Date: Thu, 23 May 2019 13:07:45 -0700 Subject: [PATCH] Add initial integration test for BigQuery Storage API. Integration tests in this change cover: - simple/non-filtered read from a table - filtered read from a table - read that only selects certain columns In addition to adding integration tests, this change also includes the Kokoro setup for presubmit, nighthly and continuous builds. --- .kokoro/continuous/bigquery-storage-it.cfg | 27 ++ .kokoro/nightly/bigquery-storage-it.cfg | 27 ++ .kokoro/presubmit/bigquery-storage-it.cfg | 27 ++ .../google-cloud-bigquerystorage/pom.xml | 6 + .../v1beta1/it/ITBigQueryStorageTest.java | 257 ++++++++++++++++++ .../storage/v1beta1/it/SimpleRowReader.java | 70 +++++ 6 files changed, 414 insertions(+) create mode 100644 .kokoro/continuous/bigquery-storage-it.cfg create mode 100644 .kokoro/nightly/bigquery-storage-it.cfg create mode 100644 .kokoro/presubmit/bigquery-storage-it.cfg create mode 100644 google-cloud-clients/google-cloud-bigquerystorage/src/test/java/com/google/cloud/bigquery/storage/v1beta1/it/ITBigQueryStorageTest.java create mode 100644 google-cloud-clients/google-cloud-bigquerystorage/src/test/java/com/google/cloud/bigquery/storage/v1beta1/it/SimpleRowReader.java diff --git a/.kokoro/continuous/bigquery-storage-it.cfg b/.kokoro/continuous/bigquery-storage-it.cfg new file mode 100644 index 000000000000..3ec55dd2af06 --- /dev/null +++ b/.kokoro/continuous/bigquery-storage-it.cfg @@ -0,0 +1,27 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +# Configure the docker image for kokoro-trampoline. +env_vars: { + key: "TRAMPOLINE_IMAGE" + value: "gcr.io/cloud-devrel-kokoro-resources/java8" +} + +env_vars: { + key: "INTEGRATION_TEST_ARGS" + value: "google-cloud-clients/google-cloud-bigquerystorage" +} + +env_vars: { + key: "JOB_TYPE" + value: "integration" +} + +env_vars: { + key: "GCLOUD_PROJECT" + value: "gcloud-devel" +} + +env_vars: { + key: "GOOGLE_APPLICATION_CREDENTIALS" + value: "keystore/73713_java_it_service_account" +} diff --git a/.kokoro/nightly/bigquery-storage-it.cfg b/.kokoro/nightly/bigquery-storage-it.cfg new file mode 100644 index 000000000000..3ec55dd2af06 --- /dev/null +++ b/.kokoro/nightly/bigquery-storage-it.cfg @@ -0,0 +1,27 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +# Configure the docker image for kokoro-trampoline. +env_vars: { + key: "TRAMPOLINE_IMAGE" + value: "gcr.io/cloud-devrel-kokoro-resources/java8" +} + +env_vars: { + key: "INTEGRATION_TEST_ARGS" + value: "google-cloud-clients/google-cloud-bigquerystorage" +} + +env_vars: { + key: "JOB_TYPE" + value: "integration" +} + +env_vars: { + key: "GCLOUD_PROJECT" + value: "gcloud-devel" +} + +env_vars: { + key: "GOOGLE_APPLICATION_CREDENTIALS" + value: "keystore/73713_java_it_service_account" +} diff --git a/.kokoro/presubmit/bigquery-storage-it.cfg b/.kokoro/presubmit/bigquery-storage-it.cfg new file mode 100644 index 000000000000..3ec55dd2af06 --- /dev/null +++ b/.kokoro/presubmit/bigquery-storage-it.cfg @@ -0,0 +1,27 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +# Configure the docker image for kokoro-trampoline. +env_vars: { + key: "TRAMPOLINE_IMAGE" + value: "gcr.io/cloud-devrel-kokoro-resources/java8" +} + +env_vars: { + key: "INTEGRATION_TEST_ARGS" + value: "google-cloud-clients/google-cloud-bigquerystorage" +} + +env_vars: { + key: "JOB_TYPE" + value: "integration" +} + +env_vars: { + key: "GCLOUD_PROJECT" + value: "gcloud-devel" +} + +env_vars: { + key: "GOOGLE_APPLICATION_CREDENTIALS" + value: "keystore/73713_java_it_service_account" +} diff --git a/google-cloud-clients/google-cloud-bigquerystorage/pom.xml b/google-cloud-clients/google-cloud-bigquerystorage/pom.xml index 6d236d5d4ba2..9cca6a11c673 100644 --- a/google-cloud-clients/google-cloud-bigquerystorage/pom.xml +++ b/google-cloud-clients/google-cloud-bigquerystorage/pom.xml @@ -41,6 +41,12 @@ gax-grpc test + + org.apache.avro + avro + 1.9.0 + test + com.google.api diff --git a/google-cloud-clients/google-cloud-bigquerystorage/src/test/java/com/google/cloud/bigquery/storage/v1beta1/it/ITBigQueryStorageTest.java b/google-cloud-clients/google-cloud-bigquerystorage/src/test/java/com/google/cloud/bigquery/storage/v1beta1/it/ITBigQueryStorageTest.java new file mode 100644 index 000000000000..b986b72e6395 --- /dev/null +++ b/google-cloud-clients/google-cloud-bigquerystorage/src/test/java/com/google/cloud/bigquery/storage/v1beta1/it/ITBigQueryStorageTest.java @@ -0,0 +1,257 @@ +/* + * Copyright 2019 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.google.cloud.bigquery.storage.v1beta1.it; + +import static com.google.common.truth.Truth.assertWithMessage; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import com.google.api.gax.rpc.ServerStream; +import com.google.cloud.ServiceOptions; +import com.google.cloud.bigquery.storage.v1beta1.BigQueryStorageClient; +import com.google.cloud.bigquery.storage.v1beta1.ReadOptions.TableReadOptions; +import com.google.cloud.bigquery.storage.v1beta1.Storage.CreateReadSessionRequest; +import com.google.cloud.bigquery.storage.v1beta1.Storage.DataFormat; +import com.google.cloud.bigquery.storage.v1beta1.Storage.ReadRowsRequest; +import com.google.cloud.bigquery.storage.v1beta1.Storage.ReadRowsResponse; +import com.google.cloud.bigquery.storage.v1beta1.Storage.ReadSession; +import com.google.cloud.bigquery.storage.v1beta1.Storage.StreamPosition; +import com.google.cloud.bigquery.storage.v1beta1.TableReferenceProto.TableReference; +import com.google.protobuf.TextFormat; +import java.io.IOException; +import java.util.logging.Logger; +import org.apache.avro.Schema; +import org.apache.avro.generic.GenericRecord; +import org.apache.avro.util.Utf8; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; + +/** Integration tests for BigQuery Storage API. */ +public class ITBigQueryStorageTest { + + private static final Logger LOG = Logger.getLogger(ITBigQueryStorageTest.class.getName()); + + private static BigQueryStorageClient client; + private static String parentProjectId; + + @BeforeClass + public static void beforeClass() throws IOException { + client = BigQueryStorageClient.create(); + parentProjectId = String.format("projects/%s", ServiceOptions.getDefaultProjectId()); + + LOG.info( + String.format( + "%s tests running with parent project: %s", + ITBigQueryStorageTest.class.getSimpleName(), parentProjectId)); + } + + @AfterClass + public static void afterClass() { + if (client != null) { + client.close(); + } + } + + @Test + public void testSimpleRead() { + TableReference tableReference = + TableReference.newBuilder() + .setProjectId("bigquery-public-data") + .setDatasetId("samples") + .setTableId("shakespeare") + .build(); + + ReadSession session = client.createReadSession(tableReference, parentProjectId, 1); + assertEquals( + String.format( + "Did not receive expected number of streams for table reference '%s' CreateReadSession response:%n%s", + TextFormat.shortDebugString(tableReference), session.toString()), + 1, + session.getStreamsCount()); + + StreamPosition readPosition = + StreamPosition.newBuilder().setStream(session.getStreams(0)).build(); + + ReadRowsRequest readRowsRequest = + ReadRowsRequest.newBuilder().setReadPosition(readPosition).build(); + + long avroRowCount = 0; + ServerStream stream = client.readRowsCallable().call(readRowsRequest); + for (ReadRowsResponse response : stream) { + assertTrue( + String.format( + "Response is missing 'avro_rows'. Read %d rows so far. ReadRows response:%n%s", + avroRowCount, response.toString()), + response.hasAvroRows()); + avroRowCount += response.getAvroRows().getRowCount(); + } + + assertEquals(164_656, avroRowCount); + } + + @Test + public void testFilter() throws IOException { + TableReference tableReference = + TableReference.newBuilder() + .setProjectId("bigquery-public-data") + .setDatasetId("samples") + .setTableId("shakespeare") + .build(); + + TableReadOptions options = + TableReadOptions.newBuilder().setRowRestriction("word_count > 100").build(); + + CreateReadSessionRequest request = + CreateReadSessionRequest.newBuilder() + .setParent(parentProjectId) + .setRequestedStreams(1) + .setTableReference(tableReference) + .setReadOptions(options) + .setFormat(DataFormat.AVRO) + .build(); + + ReadSession session = client.createReadSession(request); + assertEquals( + String.format( + "Did not receive expected number of streams for table reference '%s' CreateReadSession response:%n%s", + TextFormat.shortDebugString(tableReference), session.toString()), + 1, + session.getStreamsCount()); + + StreamPosition readPosition = + StreamPosition.newBuilder().setStream(session.getStreams(0)).build(); + + ReadRowsRequest readRowsRequest = + ReadRowsRequest.newBuilder().setReadPosition(readPosition).build(); + + SimpleRowReader reader = + new SimpleRowReader(new Schema.Parser().parse(session.getAvroSchema().getSchema())); + + long avroRowCount = 0; + + ServerStream stream = client.readRowsCallable().call(readRowsRequest); + for (ReadRowsResponse response : stream) { + assertTrue( + String.format( + "Response is missing 'avro_rows'. Read %d rows so far. ReadRows response:%n%s", + avroRowCount, response.toString()), + response.hasAvroRows()); + avroRowCount += response.getAvroRows().getRowCount(); + + reader.processRows( + response.getAvroRows(), + new SimpleRowReader.AvroRowConsumer() { + @Override + public void accept(GenericRecord record) { + Long wordCount = (Long) record.get("word_count"); + assertWithMessage("Row not matching expectations: %s", record.toString()) + .that(wordCount) + .isGreaterThan(100L); + } + }); + } + + assertEquals(1_333, avroRowCount); + } + + @Test + public void testColumnSelection() throws IOException { + TableReference tableReference = + TableReference.newBuilder() + .setProjectId("bigquery-public-data") + .setDatasetId("samples") + .setTableId("shakespeare") + .build(); + + TableReadOptions options = + TableReadOptions.newBuilder() + .addSelectedFields("word") + .addSelectedFields("word_count") + .setRowRestriction("word_count > 100") + .build(); + + CreateReadSessionRequest request = + CreateReadSessionRequest.newBuilder() + .setParent(parentProjectId) + .setRequestedStreams(1) + .setTableReference(tableReference) + .setReadOptions(options) + .setFormat(DataFormat.AVRO) + .build(); + + ReadSession session = client.createReadSession(request); + assertEquals( + String.format( + "Did not receive expected number of streams for table reference '%s' CreateReadSession response:%n%s", + TextFormat.shortDebugString(tableReference), session.toString()), + 1, + session.getStreamsCount()); + + StreamPosition readPosition = + StreamPosition.newBuilder().setStream(session.getStreams(0)).build(); + + ReadRowsRequest readRowsRequest = + ReadRowsRequest.newBuilder().setReadPosition(readPosition).build(); + + Schema avroSchema = new Schema.Parser().parse(session.getAvroSchema().getSchema()); + + String actualSchemaMessage = + String.format( + "Unexpected schema. Actual schema:%n%s", avroSchema.toString(/* pretty = */ true)); + assertEquals(actualSchemaMessage, Schema.Type.RECORD, avroSchema.getType()); + assertEquals(actualSchemaMessage, "__root__", avroSchema.getName()); + + assertEquals(actualSchemaMessage, 2, avroSchema.getFields().size()); + assertEquals( + actualSchemaMessage, Schema.Type.STRING, avroSchema.getField("word").schema().getType()); + assertEquals( + actualSchemaMessage, + Schema.Type.LONG, + avroSchema.getField("word_count").schema().getType()); + + SimpleRowReader reader = new SimpleRowReader(avroSchema); + + long avroRowCount = 0; + ServerStream stream = client.readRowsCallable().call(readRowsRequest); + for (ReadRowsResponse response : stream) { + assertTrue( + String.format( + "Response is missing 'avro_rows'. Read %d rows so far. ReadRows response:%n%s", + avroRowCount, response.toString()), + response.hasAvroRows()); + avroRowCount += response.getAvroRows().getRowCount(); + reader.processRows( + response.getAvroRows(), + new SimpleRowReader.AvroRowConsumer() { + @Override + public void accept(GenericRecord record) { + String rowAssertMessage = + String.format("Row not matching expectations: %s", record.toString()); + + Long wordCount = (Long) record.get("word_count"); + assertWithMessage(rowAssertMessage).that(wordCount).isGreaterThan(100L); + + Utf8 word = (Utf8) record.get("word"); + assertWithMessage(rowAssertMessage).that(word.length()).isGreaterThan(0); + } + }); + } + + assertEquals(1_333, avroRowCount); + } +} diff --git a/google-cloud-clients/google-cloud-bigquerystorage/src/test/java/com/google/cloud/bigquery/storage/v1beta1/it/SimpleRowReader.java b/google-cloud-clients/google-cloud-bigquerystorage/src/test/java/com/google/cloud/bigquery/storage/v1beta1/it/SimpleRowReader.java new file mode 100644 index 000000000000..f55189f6fe2d --- /dev/null +++ b/google-cloud-clients/google-cloud-bigquerystorage/src/test/java/com/google/cloud/bigquery/storage/v1beta1/it/SimpleRowReader.java @@ -0,0 +1,70 @@ +/* + * Copyright 2019 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.google.cloud.bigquery.storage.v1beta1.it; + +import com.google.cloud.bigquery.storage.v1beta1.AvroProto.AvroRows; +import com.google.common.base.Preconditions; +import java.io.IOException; +import org.apache.avro.Schema; +import org.apache.avro.generic.GenericDatumReader; +import org.apache.avro.generic.GenericRecord; +import org.apache.avro.io.BinaryDecoder; +import org.apache.avro.io.DatumReader; +import org.apache.avro.io.DecoderFactory; + +/* + * SimpleRowReader handles deserialization of the Avro-encoded row blocks transmitted + * from the storage API using a generic datum decoder. + */ +public class SimpleRowReader { + + public interface AvroRowConsumer { + void accept(GenericRecord record); + } + + private final DatumReader datumReader; + + // Decoder object will be reused to avoid re-allocation and too much garbage collection. + private BinaryDecoder decoder = null; + + // GenericRecord object will be reused. + private GenericRecord row = null; + + public SimpleRowReader(Schema schema) { + Preconditions.checkNotNull(schema); + datumReader = new GenericDatumReader<>(schema); + } + + /** + * Processes Avro rows by calling a consumer for each decoded row. + * + * @param avroRows object returned from the ReadRowsResponse. + * @param rowConsumer consumer that accepts GenericRecord. + */ + public void processRows(AvroRows avroRows, AvroRowConsumer rowConsumer) throws IOException { + Preconditions.checkNotNull(avroRows); + Preconditions.checkNotNull(rowConsumer); + decoder = + DecoderFactory.get() + .binaryDecoder(avroRows.getSerializedBinaryRows().toByteArray(), decoder); + + while (!decoder.isEnd()) { + row = datumReader.read(row, decoder); + rowConsumer.accept(row); + } + } +}