From c1573f0af383ca753d2d9073a0585d56b162db1a Mon Sep 17 00:00:00 2001 From: Your Name Date: Wed, 25 Oct 2023 10:28:07 -0500 Subject: [PATCH 1/3] docs: add sample for getting started with BQML --- samples/snippets/bqml_getting_started_test.py | 66 +++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 samples/snippets/bqml_getting_started_test.py diff --git a/samples/snippets/bqml_getting_started_test.py b/samples/snippets/bqml_getting_started_test.py new file mode 100644 index 0000000000..ccc24ea466 --- /dev/null +++ b/samples/snippets/bqml_getting_started_test.py @@ -0,0 +1,66 @@ + +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +def test_bqml_getting_started(): + + import bigframes.pandas as bpd + import bigframes + +# + df = bpd.read_gbq(''' + SELECT GENERATE_UUID() AS rowindex, * + FROM + `bigquery-public-data.google_analytics_sample.ga_sessions_*` + WHERE + _TABLE_SUFFIX BETWEEN '20160801' AND '20170630' + ''', + index_col='rowindex') + + # make comments + + totals = df['totals'] + + #using totals, selecting id for transaction example + totals['0000fb2c-2861-40be-9c6c-309afd7e7883'] + + transactions = totals.struct.field("transactions") + + label = transactions.notnull().map({True: 1, False: 0}) + + operatingSystem = df['device'].struct.field("operatingSystem") + + operatingSystem = operatingSystem.fillna("") + + isMobile = df['device'].struct.field("isMobile") + + country = df['geoNetwork'].struct.field("country").fillna("") + + pageviews = totals.struct.field("pageviews").fillna(0) + + features = bpd.DataFrame({ + 'os': operatingSystem, + 'is_mobile': isMobile, + 'pageviews': pageviews + }) + + # printing out the dataframe + df + + from bigframes.ml.linear_model import LogisticRegression + + model = LogisticRegression() + + model.fit(features, label) + model.to_gbq('bqml_tutorial.sample_model', replace = True) From 4e7d81c5e8ff05eeb1a99a550c87cbf47fb9fa9b Mon Sep 17 00:00:00 2001 From: Your Name Date: Wed, 25 Oct 2023 11:24:23 -0500 Subject: [PATCH 2/3] Creating clarifying comments --- samples/snippets/bqml_getting_started_test.py | 29 +++++++++++-------- 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/samples/snippets/bqml_getting_started_test.py b/samples/snippets/bqml_getting_started_test.py index ccc24ea466..5aa2665a53 100644 --- a/samples/snippets/bqml_getting_started_test.py +++ b/samples/snippets/bqml_getting_started_test.py @@ -13,12 +13,14 @@ # See the License for the specific language governing permissions and # limitations under the License. -def test_bqml_getting_started(): +def test_bqml_getting_started(): + #[START bigquery_getting_Started_bqml_tutorial] + #DataFrame created from a BigQuery table: import bigframes.pandas as bpd import bigframes -# + # Original sql query from tutorial, translated to Python using BigQuery BigFrames dataframes df = bpd.read_gbq(''' SELECT GENERATE_UUID() AS rowindex, * FROM @@ -28,39 +30,42 @@ def test_bqml_getting_started(): ''', index_col='rowindex') - # make comments - + #Printing dataframe, setting totals value totals = df['totals'] - #using totals, selecting id for transaction example + #Using totals, selecting id for transaction example totals['0000fb2c-2861-40be-9c6c-309afd7e7883'] - transactions = totals.struct.field("transactions") - + #Columns to indicate whether there was purchase label = transactions.notnull().map({True: 1, False: 0}) + #Operating systems of users operatingSystem = df['device'].struct.field("operatingSystem") - operatingSystem = operatingSystem.fillna("") + #Indicates whether the users devices are mobile isMobile = df['device'].struct.field("isMobile") + #Country from which the sessions originate, IP address based country = df['geoNetwork'].struct.field("country").fillna("") + #Total number of pageviews within the session pageviews = totals.struct.field("pageviews").fillna(0) + #Setting features for dataframe, features = bpd.DataFrame({ 'os': operatingSystem, 'is_mobile': isMobile, 'pageviews': pageviews }) - # printing out the dataframe + #Printing out the dataframe df - + + #Creating a logistics regression model - from bigframes.ml.linear_model import LogisticRegression - model = LogisticRegression() - + #Model training parameters, model.fit(features, label) + #Write a DataFRame to a BigQuery table- model.to_gbq('bqml_tutorial.sample_model', replace = True) From a4026a0be4933fcfd9a103461abf4e994b4627fd Mon Sep 17 00:00:00 2001 From: Owl Bot Date: Wed, 25 Oct 2023 16:40:24 +0000 Subject: [PATCH 3/3] =?UTF-8?q?=F0=9F=A6=89=20Updates=20from=20OwlBot=20po?= =?UTF-8?q?st-processor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --- samples/snippets/bqml_getting_started_test.py | 68 +++++++++---------- 1 file changed, 34 insertions(+), 34 deletions(-) diff --git a/samples/snippets/bqml_getting_started_test.py b/samples/snippets/bqml_getting_started_test.py index 5aa2665a53..c7337bfe90 100644 --- a/samples/snippets/bqml_getting_started_test.py +++ b/samples/snippets/bqml_getting_started_test.py @@ -1,4 +1,3 @@ - # Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -15,57 +14,58 @@ def test_bqml_getting_started(): - #[START bigquery_getting_Started_bqml_tutorial] - #DataFrame created from a BigQuery table: - import bigframes.pandas as bpd - import bigframes + # [START bigquery_getting_Started_bqml_tutorial] + # DataFrame created from a BigQuery table: + import bigframes + import bigframes.pandas as bpd - # Original sql query from tutorial, translated to Python using BigQuery BigFrames dataframes - df = bpd.read_gbq(''' + # Original sql query from tutorial, translated to Python using BigQuery BigFrames dataframes + df = bpd.read_gbq( + """ SELECT GENERATE_UUID() AS rowindex, * FROM `bigquery-public-data.google_analytics_sample.ga_sessions_*` WHERE _TABLE_SUFFIX BETWEEN '20160801' AND '20170630' - ''', - index_col='rowindex') + """, + index_col="rowindex", + ) - #Printing dataframe, setting totals value - totals = df['totals'] + # Printing dataframe, setting totals value + totals = df["totals"] - #Using totals, selecting id for transaction example - totals['0000fb2c-2861-40be-9c6c-309afd7e7883'] + # Using totals, selecting id for transaction example + totals["0000fb2c-2861-40be-9c6c-309afd7e7883"] transactions = totals.struct.field("transactions") - #Columns to indicate whether there was purchase + # Columns to indicate whether there was purchase label = transactions.notnull().map({True: 1, False: 0}) - #Operating systems of users - operatingSystem = df['device'].struct.field("operatingSystem") + # Operating systems of users + operatingSystem = df["device"].struct.field("operatingSystem") operatingSystem = operatingSystem.fillna("") - #Indicates whether the users devices are mobile - isMobile = df['device'].struct.field("isMobile") + # Indicates whether the users devices are mobile + isMobile = df["device"].struct.field("isMobile") - #Country from which the sessions originate, IP address based - country = df['geoNetwork'].struct.field("country").fillna("") + # Country from which the sessions originate, IP address based + country = df["geoNetwork"].struct.field("country").fillna("") - #Total number of pageviews within the session + # Total number of pageviews within the session pageviews = totals.struct.field("pageviews").fillna(0) - #Setting features for dataframe, - features = bpd.DataFrame({ - 'os': operatingSystem, - 'is_mobile': isMobile, - 'pageviews': pageviews - }) + # Setting features for dataframe, + features = bpd.DataFrame( + {"os": operatingSystem, "is_mobile": isMobile, "pageviews": pageviews} + ) + + # Printing out the dataframe + df - #Printing out the dataframe - df - - #Creating a logistics regression model - + # Creating a logistics regression model - from bigframes.ml.linear_model import LogisticRegression + model = LogisticRegression() - #Model training parameters, + # Model training parameters, model.fit(features, label) - #Write a DataFRame to a BigQuery table- - model.to_gbq('bqml_tutorial.sample_model', replace = True) + # Write a DataFRame to a BigQuery table- + model.to_gbq("bqml_tutorial.sample_model", replace=True)