diff --git a/samples/snippets/clustering_model_test.py b/samples/snippets/clustering_model_test.py new file mode 100644 index 0000000000..a407fc7805 --- /dev/null +++ b/samples/snippets/clustering_model_test.py @@ -0,0 +1,35 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def test_clustering_model(): + # [START bigquery_dataframes_clustering_model] + from bigframes.ml.cluster import KMeans + import bigframes.pandas as bpd + + # Load data from BigQuery + query_or_table = "bigquery-public-data.ml_datasets.penguins" + bq_df = bpd.read_gbq(query_or_table) + + # Create the KMeans model + cluster_model = KMeans(n_clusters=10) + cluster_model.fit(bq_df["culmen_length_mm"], bq_df["sex"]) + + # Predict using the model + result = cluster_model.predict(bq_df) + # Score the model + score = cluster_model.score(bq_df) + # [END bigquery_dataframes_clustering_model] + assert result is not None + assert score is not None diff --git a/samples/snippets/gen_ai_model_test.py b/samples/snippets/gen_ai_model_test.py new file mode 100644 index 0000000000..7cbc90d4c0 --- /dev/null +++ b/samples/snippets/gen_ai_model_test.py @@ -0,0 +1,39 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def test_llm_model(): + PROJECT_ID = "bigframes-dev" + REGION = "us" + CONN_NAME = "bigframes-ml" + # [START bigquery_dataframes_gen_ai_model] + from bigframes.ml.llm import PaLM2TextGenerator + import bigframes.pandas as bpd + + # Create the LLM model + session = bpd.get_global_session() + connection = f"{PROJECT_ID}.{REGION}.{CONN_NAME}" + model = PaLM2TextGenerator(session=session, connection_name=connection) + + df_api = bpd.read_csv("gs://cloud-samples-data/vertex-ai/bigframe/df.csv") + + # Prepare the prompts and send them to the LLM model for prediction + df_prompt_prefix = "Generate Pandas sample code for DataFrame." + df_prompt = df_prompt_prefix + df_api["API"] + + # Predict using the model + df_pred = model.predict(df_prompt.to_frame(), max_output_tokens=1024) + # [END bigquery_dataframes_gen_ai_model] + assert df_pred["ml_generate_text_llm_result"] is not None + assert df_pred["ml_generate_text_llm_result"].iloc[0] is not None diff --git a/samples/snippets/regression_model_test.py b/samples/snippets/regression_model_test.py new file mode 100644 index 0000000000..7d1bde689c --- /dev/null +++ b/samples/snippets/regression_model_test.py @@ -0,0 +1,57 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def test_regression_model(): + # [START bigquery_dataframes_regression_model] + from bigframes.ml.linear_model import LinearRegression + import bigframes.pandas as bpd + + # Load data from BigQuery + query_or_table = "bigquery-public-data.ml_datasets.penguins" + bq_df = bpd.read_gbq(query_or_table) + + # Filter down to the data to the Adelie Penguin species + adelie_data = bq_df[bq_df.species == "Adelie Penguin (Pygoscelis adeliae)"] + + # Drop the species column + adelie_data = adelie_data.drop(columns=["species"]) + + # Drop rows with nulls to get training data + training_data = adelie_data.dropna() + + # Specify your feature (or input) columns and the label (or output) column: + feature_columns = training_data[ + ["island", "culmen_length_mm", "culmen_depth_mm", "flipper_length_mm", "sex"] + ] + label_columns = training_data[["body_mass_g"]] + + test_data = adelie_data[adelie_data.body_mass_g.isnull()] + + # Create the linear model + model = LinearRegression() + model.fit(feature_columns, label_columns) + + # Score the model + score = model.score(feature_columns, label_columns) + + # Predict using the model + result = model.predict(test_data) + # [END bigquery_dataframes_regression_model] + assert test_data is not None + assert feature_columns is not None + assert label_columns is not None + assert model is not None + assert score is not None + assert result is not None