From 86634d0a4c9dbfa11afac1665401047d19d6864c Mon Sep 17 00:00:00 2001 From: nnegrey Date: Thu, 2 Jan 2020 13:24:47 -0700 Subject: [PATCH 1/2] automl: add natural language text classification ga samples --- ...uage_text_classification_create_dataset.py | 49 +++++++++++++++++++ ...text_classification_create_dataset_test.py | 41 ++++++++++++++++ ...nguage_text_classification_create_model.py | 43 ++++++++++++++++ ...e_text_classification_create_model_test.py | 37 ++++++++++++++ .../language_text_classification_predict.py | 49 +++++++++++++++++++ ...nguage_text_classification_predict_test.py | 43 ++++++++++++++++ 6 files changed, 262 insertions(+) create mode 100644 automl/cloud-client/language_text_classification_create_dataset.py create mode 100644 automl/cloud-client/language_text_classification_create_dataset_test.py create mode 100644 automl/cloud-client/language_text_classification_create_model.py create mode 100644 automl/cloud-client/language_text_classification_create_model_test.py create mode 100644 automl/cloud-client/language_text_classification_predict.py create mode 100644 automl/cloud-client/language_text_classification_predict_test.py diff --git a/automl/cloud-client/language_text_classification_create_dataset.py b/automl/cloud-client/language_text_classification_create_dataset.py new file mode 100644 index 00000000000..f4a2add495e --- /dev/null +++ b/automl/cloud-client/language_text_classification_create_dataset.py @@ -0,0 +1,49 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def create_dataset(project_id, display_name): + """Create a dataset.""" + # [START automl_language_text_classification_create_dataset] + from google.cloud import automl + + # TODO(developer): Uncomment and set the following variables + # project_id = "YOUR_PROJECT_ID" + # display_name = "YOUR_DATASET_NAME" + + client = automl.AutoMlClient() + + # A resource that represents Google Cloud Platform location. + project_location = client.location_path(project_id, "us-central1") + # Specify the classification type + # Types: + # MultiLabel: Multiple labels are allowed for one example. + # MultiClass: At most one label is allowed per example. + metadata = automl.types.TextClassificationDatasetMetadata( + classification_type=automl.enums.ClassificationType.MULTICLASS + ) + dataset = automl.types.Dataset( + display_name=display_name, + text_classification_dataset_metadata=metadata, + ) + + # Create a dataset with the dataset metadata in the region. + response = client.create_dataset(project_location, dataset) + + created_dataset = response.result() + + # Display the dataset information + print("Dataset name: {}".format(created_dataset.name)) + print("Dataset id: {}".format(created_dataset.name.split("/")[-1])) + # [END automl_language_text_classification_create_dataset] diff --git a/automl/cloud-client/language_text_classification_create_dataset_test.py b/automl/cloud-client/language_text_classification_create_dataset_test.py new file mode 100644 index 00000000000..923e75b8ae8 --- /dev/null +++ b/automl/cloud-client/language_text_classification_create_dataset_test.py @@ -0,0 +1,41 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import datetime +import os + +from google.cloud import automl + +import language_text_classification_create_dataset + + +PROJECT_ID = os.environ["GCLOUD_PROJECT"] + + +def test_text_classification_create_dataset(capsys): + dataset_name = "test_" + datetime.datetime.now().strftime("%Y%m%d%H%M%S") + language_text_classification_create_dataset.create_dataset( + PROJECT_ID, dataset_name + ) + out, _ = capsys.readouterr() + assert "Dataset id: " in out + + # Delete the created dataset + dataset_id = out.splitlines()[1].split()[2] + client = automl.AutoMlClient() + dataset_full_id = client.dataset_path( + PROJECT_ID, "us-central1", dataset_id + ) + response = client.delete_dataset(dataset_full_id) + response.result() diff --git a/automl/cloud-client/language_text_classification_create_model.py b/automl/cloud-client/language_text_classification_create_model.py new file mode 100644 index 00000000000..b72ec8b4538 --- /dev/null +++ b/automl/cloud-client/language_text_classification_create_model.py @@ -0,0 +1,43 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def create_model(project_id, dataset_id, display_name): + """Create a model.""" + # [START automl_language_text_classification_create_model] + from google.cloud import automl + + # TODO(developer): Uncomment and set the following variables + # project_id = "YOUR_PROJECT_ID" + # dataset_id = "YOUR_DATASET_ID" + # display_name = "YOUR_MODEL_NAME" + + client = automl.AutoMlClient() + + # A resource that represents Google Cloud Platform location. + project_location = client.location_path(project_id, "us-central1") + # Leave model unset to use the default base model provided by Google + metadata = automl.types.TextClassificationModelMetadata() + model = automl.types.Model( + display_name=display_name, + dataset_id=dataset_id, + text_classification_model_metadata=metadata, + ) + + # Create a model with the model metadata in the region. + response = client.create_model(project_location, model) + + print(u"Training operation name: {}".format(response.operation.name)) + print("Training started...") + # [END automl_language_text_classification_create_model] diff --git a/automl/cloud-client/language_text_classification_create_model_test.py b/automl/cloud-client/language_text_classification_create_model_test.py new file mode 100644 index 00000000000..d836d2577d1 --- /dev/null +++ b/automl/cloud-client/language_text_classification_create_model_test.py @@ -0,0 +1,37 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +from google.cloud import automl +import pytest + +import language_text_classification_create_model + +PROJECT_ID = os.environ["GCLOUD_PROJECT"] +DATASET_ID = "TST3960250460385409610" + + +@pytest.mark.slow +def test_text_classification_create_model(capsys): + language_text_classification_create_model.create_model( + PROJECT_ID, DATASET_ID, "object_test_create_model" + ) + out, _ = capsys.readouterr() + assert "Training started" in out + + # Cancel the operation + operation_id = out.split("Training operation name: ")[1].split("\n")[0] + client = automl.AutoMlClient() + client.transport._operations_client.cancel_operation(operation_id) diff --git a/automl/cloud-client/language_text_classification_predict.py b/automl/cloud-client/language_text_classification_predict.py new file mode 100644 index 00000000000..856a113bae6 --- /dev/null +++ b/automl/cloud-client/language_text_classification_predict.py @@ -0,0 +1,49 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def predict(project_id, model_id, content): + """Predict.""" + # [START automl_language_text_classification_predict] + from google.cloud import automl + + # TODO(developer): Uncomment and set the following variables + # project_id = "YOUR_PROJECT_ID" + # model_id = "YOUR_MODEL_ID" + # content = "text to predict" + + prediction_client = automl.PredictionServiceClient() + + # Get the full path of the model. + model_full_id = prediction_client.model_path( + project_id, "us-central1", model_id + ) + + text_snippet = automl.types.TextSnippet( + content=content, mime_type="text/plain" + ) # Types: 'text/plain', 'text/html' + payload = automl.types.ExamplePayload(text_snippet=text_snippet) + + response = prediction_client.predict(model_full_id, payload) + + for annotation_payload in response.payload: + print( + u"Predicted class name: {}".format(annotation_payload.display_name) + ) + print( + u"Predicted class score: {}".format( + annotation_payload.classification.score + ) + ) + # [END automl_language_text_classification_predict] diff --git a/automl/cloud-client/language_text_classification_predict_test.py b/automl/cloud-client/language_text_classification_predict_test.py new file mode 100644 index 00000000000..5a698f8635c --- /dev/null +++ b/automl/cloud-client/language_text_classification_predict_test.py @@ -0,0 +1,43 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +from google.cloud import automl +import pytest + +import language_text_classification_predict + +PROJECT_ID = os.environ["GCLOUD_PROJECT"] +MODEL_ID = "TCN3472481026502981088" + + +@pytest.fixture(scope="function") +def verify_model_state(): + client = automl.AutoMlClient() + model_full_id = client.model_path(PROJECT_ID, "us-central1", MODEL_ID) + + model = client.get_model(model_full_id) + if model.deployment_state == automl.enums.Model.DeploymentState.UNDEPLOYED: + # Deploy model if it is not deployed + response = client.deploy_model(model_full_id) + response.result() + + +def test_predict(capsys, verify_model_state): + verify_model_state + text = "Fruit and nut flavour" + language_text_classification_predict.predict(PROJECT_ID, MODEL_ID, text) + out, _ = capsys.readouterr() + assert "Predicted class name: " in out From e0a078da7627f1adf7b5d163c9f2bcb2f0279a89 Mon Sep 17 00:00:00 2001 From: nnegrey Date: Tue, 7 Jan 2020 13:22:53 -0700 Subject: [PATCH 2/2] use centralized automl testing project, update doc comments, update test method names --- .../language_sentiment_analysis_create_model_test.py | 2 +- .../language_sentiment_analysis_predict_test.py | 2 +- .../language_text_classification_create_dataset_test.py | 2 +- .../language_text_classification_create_model_test.py | 6 +++--- automl/cloud-client/language_text_classification_predict.py | 4 +++- .../language_text_classification_predict_test.py | 6 +++--- automl/cloud-client/list_model_evaluations_test.py | 2 +- automl/cloud-client/list_models_test.py | 2 +- automl/cloud-client/translate_predict_test.py | 2 +- .../vision_classification_create_dataset_test.py | 2 +- 10 files changed, 16 insertions(+), 14 deletions(-) diff --git a/automl/cloud-client/language_sentiment_analysis_create_model_test.py b/automl/cloud-client/language_sentiment_analysis_create_model_test.py index cbb79533efc..bf9d19788b5 100644 --- a/automl/cloud-client/language_sentiment_analysis_create_model_test.py +++ b/automl/cloud-client/language_sentiment_analysis_create_model_test.py @@ -26,7 +26,7 @@ @pytest.mark.slow def test_sentiment_analysis_create_model(capsys): language_sentiment_analysis_create_model.create_model( - PROJECT_ID, DATASET_ID, "object_test_create_model" + PROJECT_ID, DATASET_ID, "sentiment_test_create_model" ) out, _ = capsys.readouterr() assert "Training started" in out diff --git a/automl/cloud-client/language_sentiment_analysis_predict_test.py b/automl/cloud-client/language_sentiment_analysis_predict_test.py index d4fffc7f086..63a372647d4 100644 --- a/automl/cloud-client/language_sentiment_analysis_predict_test.py +++ b/automl/cloud-client/language_sentiment_analysis_predict_test.py @@ -35,7 +35,7 @@ def verify_model_state(): response.result() -def test_predict(capsys, verify_model_state): +def test_sentiment_analysis_predict(capsys, verify_model_state): verify_model_state text = "Hopefully this Claritin kicks in soon" language_sentiment_analysis_predict.predict(PROJECT_ID, MODEL_ID, text) diff --git a/automl/cloud-client/language_text_classification_create_dataset_test.py b/automl/cloud-client/language_text_classification_create_dataset_test.py index 923e75b8ae8..771945eeafb 100644 --- a/automl/cloud-client/language_text_classification_create_dataset_test.py +++ b/automl/cloud-client/language_text_classification_create_dataset_test.py @@ -20,7 +20,7 @@ import language_text_classification_create_dataset -PROJECT_ID = os.environ["GCLOUD_PROJECT"] +PROJECT_ID = os.environ["AUTOML_PROJECT_ID"] def test_text_classification_create_dataset(capsys): diff --git a/automl/cloud-client/language_text_classification_create_model_test.py b/automl/cloud-client/language_text_classification_create_model_test.py index d836d2577d1..9b5c6f01f1e 100644 --- a/automl/cloud-client/language_text_classification_create_model_test.py +++ b/automl/cloud-client/language_text_classification_create_model_test.py @@ -19,14 +19,14 @@ import language_text_classification_create_model -PROJECT_ID = os.environ["GCLOUD_PROJECT"] -DATASET_ID = "TST3960250460385409610" +PROJECT_ID = os.environ["AUTOML_PROJECT_ID"] +DATASET_ID = os.environ["TEXT_CLASSIFICATION_DATASET_ID"] @pytest.mark.slow def test_text_classification_create_model(capsys): language_text_classification_create_model.create_model( - PROJECT_ID, DATASET_ID, "object_test_create_model" + PROJECT_ID, DATASET_ID, "classification_test_create_model" ) out, _ = capsys.readouterr() assert "Training started" in out diff --git a/automl/cloud-client/language_text_classification_predict.py b/automl/cloud-client/language_text_classification_predict.py index 856a113bae6..6edac71eb10 100644 --- a/automl/cloud-client/language_text_classification_predict.py +++ b/automl/cloud-client/language_text_classification_predict.py @@ -30,9 +30,11 @@ def predict(project_id, model_id, content): project_id, "us-central1", model_id ) + # Supported mime_types: 'text/plain', 'text/html' + # https://cloud.google.com/automl/docs/reference/rpc/google.cloud.automl.v1#textsnippet text_snippet = automl.types.TextSnippet( content=content, mime_type="text/plain" - ) # Types: 'text/plain', 'text/html' + ) payload = automl.types.ExamplePayload(text_snippet=text_snippet) response = prediction_client.predict(model_full_id, payload) diff --git a/automl/cloud-client/language_text_classification_predict_test.py b/automl/cloud-client/language_text_classification_predict_test.py index 5a698f8635c..36202f5b424 100644 --- a/automl/cloud-client/language_text_classification_predict_test.py +++ b/automl/cloud-client/language_text_classification_predict_test.py @@ -19,8 +19,8 @@ import language_text_classification_predict -PROJECT_ID = os.environ["GCLOUD_PROJECT"] -MODEL_ID = "TCN3472481026502981088" +PROJECT_ID = os.environ["AUTOML_PROJECT_ID"] +MODEL_ID = os.environ["TEXT_CLASSIFICATION_MODEL_ID"] @pytest.fixture(scope="function") @@ -35,7 +35,7 @@ def verify_model_state(): response.result() -def test_predict(capsys, verify_model_state): +def test_text_classification_predict(capsys, verify_model_state): verify_model_state text = "Fruit and nut flavour" language_text_classification_predict.predict(PROJECT_ID, MODEL_ID, text) diff --git a/automl/cloud-client/list_model_evaluations_test.py b/automl/cloud-client/list_model_evaluations_test.py index 839540bd30a..9d8cbc5989d 100644 --- a/automl/cloud-client/list_model_evaluations_test.py +++ b/automl/cloud-client/list_model_evaluations_test.py @@ -21,7 +21,7 @@ MODEL_ID = os.environ["ENTITY_EXTRACTION_MODEL_ID"] -def test_list_get_eval_model(capsys): +def test_list_model_evaluations(capsys): list_model_evaluations.list_model_evaluations(PROJECT_ID, MODEL_ID) out, _ = capsys.readouterr() assert "Model evaluation name: " in out diff --git a/automl/cloud-client/list_models_test.py b/automl/cloud-client/list_models_test.py index 10d7c8da85e..93f79d580e7 100644 --- a/automl/cloud-client/list_models_test.py +++ b/automl/cloud-client/list_models_test.py @@ -19,7 +19,7 @@ PROJECT_ID = os.environ["AUTOML_PROJECT_ID"] -def test_list_get_eval_model(capsys): +def test_list_models(capsys): list_models.list_models(PROJECT_ID) out, _ = capsys.readouterr() assert "Model id: " in out diff --git a/automl/cloud-client/translate_predict_test.py b/automl/cloud-client/translate_predict_test.py index aabfd05b61a..7dbdb4bac3a 100644 --- a/automl/cloud-client/translate_predict_test.py +++ b/automl/cloud-client/translate_predict_test.py @@ -35,7 +35,7 @@ def verify_model_state(): response.result() -def test_predict(capsys, verify_model_state): +def test_translate_predict(capsys, verify_model_state): verify_model_state translate_predict.predict(PROJECT_ID, MODEL_ID, "resources/input.txt") out, _ = capsys.readouterr() diff --git a/automl/cloud-client/vision_classification_create_dataset_test.py b/automl/cloud-client/vision_classification_create_dataset_test.py index 763eef825cb..aaa8a575538 100644 --- a/automl/cloud-client/vision_classification_create_dataset_test.py +++ b/automl/cloud-client/vision_classification_create_dataset_test.py @@ -25,7 +25,7 @@ @pytest.mark.slow -def test_create_dataset(capsys): +def test_vision_classification_create_dataset(capsys): # create dataset dataset_name = "test_" + datetime.datetime.now().strftime("%Y%m%d%H%M%S") vision_classification_create_dataset.create_dataset(