From 152102faf42f13aef65d35267f714a637212ca8f Mon Sep 17 00:00:00 2001 From: Ashley Xu Date: Thu, 19 Oct 2023 04:38:39 +0000 Subject: [PATCH 1/4] docs: add runnable and testable I/O code samples --- third_party/bigframes_vendored/pandas/io/parquet.py | 8 ++++++++ .../bigframes_vendored/pandas/io/parsers/readers.py | 9 ++++++++- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/third_party/bigframes_vendored/pandas/io/parquet.py b/third_party/bigframes_vendored/pandas/io/parquet.py index 9aed9af5a8..a29c1a7891 100644 --- a/third_party/bigframes_vendored/pandas/io/parquet.py +++ b/third_party/bigframes_vendored/pandas/io/parquet.py @@ -17,6 +17,14 @@ def read_parquet( Instead, set a serialized index column as the index and sort by that in the resulting DataFrame. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> gcs_path = "gs://bigframes-dev-testing/bigframes_test.parquet" + >>> df = bpd.read_parquet(path=gcs_path) + Args: path (str): Local or Cloud Storage path to Parquet file. diff --git a/third_party/bigframes_vendored/pandas/io/parsers/readers.py b/third_party/bigframes_vendored/pandas/io/parsers/readers.py index d19a92ecdf..824754c6bf 100644 --- a/third_party/bigframes_vendored/pandas/io/parsers/readers.py +++ b/third_party/bigframes_vendored/pandas/io/parsers/readers.py @@ -55,6 +55,13 @@ def read_csv( file. Instead, set a serialized index column as the index and sort by that in the resulting DataFrame. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + >>> filepath_or_buffer = ("gs://bigquery-public-data-ml-datasets/holidays_and_events_for_forecasting.csv") + >>> df = bpd.read_csv(filepath_or_buffer=filepath_or_buffer) + Args: filepath_or_buffer (str): A local or Google Cloud Storage (`gs://`) path with `engine="bigquery"` @@ -64,7 +71,7 @@ def read_csv( can be any ISO-8859-1 single-byte character. To use a character in the range 128-255, you must encode the character as UTF-8. Both engines support `sep="\t"` to specify tab character as separator. Default engine supports - having any number of spaces as separator by specifying `sep="\s+"`. Separators + having any number of spaces as separator by specifying `sep= "\ts+"`. Separators longer than 1 character are interpreted as regular expressions by the default engine. BigQuery engine only supports single character separators. header (Optional[int], default 0): From 5a04a7d453546875a7c4be64cf594530f16f35e1 Mon Sep 17 00:00:00 2001 From: Ashley Xu Date: Thu, 19 Oct 2023 04:40:25 +0000 Subject: [PATCH 2/4] docs: add runnable and testable reading methods code snippets --- bigframes/session/__init__.py | 87 +++++++++++++++++++ .../bigframes_vendored/pandas/io/gbq.py | 19 ++++ .../pandas/io/parsers/readers.py | 21 ++++- .../bigframes_vendored/pandas/io/pickle.py | 23 +++++ 4 files changed, 147 insertions(+), 3 deletions(-) diff --git a/bigframes/session/__init__.py b/bigframes/session/__init__.py index 2f001d7d49..212bf0de85 100644 --- a/bigframes/session/__init__.py +++ b/bigframes/session/__init__.py @@ -342,6 +342,37 @@ def read_gbq_query( ``row_number() over ()`` if there is no natural unique index or you want to preserve ordering. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + Preserve ordering in a query input. + + >>> bpd.read_gbq_query(''' + ... SELECT + ... -- Instead of an ORDER BY clause on the query, use + ... -- ROW_NUMBER() to create an ordered DataFrame. + ... ROW_NUMBER() OVER (ORDER BY AVG(pitchSpeed) DESC) + ... AS rowindex, + ... + ... pitcherFirstName, + ... pitcherLastName, + ... AVG(pitchSpeed) AS averagePitchSpeed + ... FROM `bigquery-public-data.baseball.games_wide` + ... WHERE year = 2016 + ... GROUP BY pitcherFirstName, pitcherLastName + ... ''', index_col="rowindex").head(n=5) + pitcherFirstName pitcherLastName averagePitchSpeed + rowindex + 1 Albertin Chapman 96.514113 + 2 Zachary Britton 94.591039 + 3 Trevor Rosenthal 94.213953 + 4 Jose Torres 94.103448 + 5 Tayron Guerrero 93.863636 + + [5 rows x 3 columns] + See also: :meth:`Session.read_gbq`. """ # NOTE: This method doesn't (yet) exist in pandas or pandas-gbq, so @@ -405,6 +436,28 @@ def read_gbq_table( ) -> dataframe.DataFrame: """Turn a BigQuery table into a DataFrame. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> bpd.read_gbq_table("bigquery-public-data.ml_datasets.penguins").head(5) + species island culmen_length_mm \\ + 0 Adelie Penguin (Pygoscelis adeliae) Dream 36.6 + 1 Adelie Penguin (Pygoscelis adeliae) Dream 39.8 + 2 Adelie Penguin (Pygoscelis adeliae) Dream 40.9 + 3 Chinstrap penguin (Pygoscelis antarctica) Dream 46.5 + 4 Adelie Penguin (Pygoscelis adeliae) Dream 37.3 + + culmen_depth_mm flipper_length_mm body_mass_g sex + 0 18.4 184.0 3475.0 FEMALE + 1 19.1 184.0 4650.0 MALE + 2 18.9 184.0 3900.0 MALE + 3 17.9 192.0 3500.0 FEMALE + 4 16.8 192.0 3000.0 FEMALE + + [5 rows x 7 columns] + See also: :meth:`Session.read_gbq`. """ # NOTE: This method doesn't (yet) exist in pandas or pandas-gbq, so @@ -792,6 +845,15 @@ def _read_ibis( def read_gbq_model(self, model_name: str): """Loads a BigQuery ML model from BigQuery. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> model_name = "bigframes-dev.bqml_tutorial.penguins_model" + >>> bpd.read_gbq_model(model_name) + LinearRegression(optimize_strategy='NORMAL_EQUATION') + Args: model_name (str): the model's name in BigQuery in the format @@ -815,6 +877,21 @@ def read_pandas(self, pandas_dataframe: pandas.DataFrame) -> dataframe.DataFrame The pandas DataFrame will be persisted as a temporary BigQuery table, which can be automatically recycled after the Session is closed. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> import pandas as pd + >>> bpd.options.display.progress_bar = None + + >>> d = {'col1': [1, 2], 'col2': [3, 4]} + >>> pandas_df = pd.DataFrame(data=d) + >>> bpd.read_pandas(pandas_df) + col1 col2 + 0 1 3 + 1 2 4 + + [2 rows x 2 columns] + Args: pandas_dataframe (pandas.DataFrame): a pandas DataFrame object to be loaded. @@ -1365,6 +1442,16 @@ def read_gbq_function( The return type of the function must be explicitly specified in the function's original definition even if not otherwise required. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> function_name = "bqutil.fn.cw_lower_case_ascii_only" + >>> func = bpd.read_gbq_function(function_name=function_name) + >>> func.bigframes_remote_function + 'bqutil.fn.cw_lower_case_ascii_only' + Args: function_name (str): the function's name in BigQuery in the format diff --git a/third_party/bigframes_vendored/pandas/io/gbq.py b/third_party/bigframes_vendored/pandas/io/gbq.py index 8919f4ed16..e6a18b55b2 100644 --- a/third_party/bigframes_vendored/pandas/io/gbq.py +++ b/third_party/bigframes_vendored/pandas/io/gbq.py @@ -42,6 +42,25 @@ def read_gbq( >>> import bigframes.pandas as bpd >>> bpd.options.display.progress_bar = None + If the input is a table ID: + + >>> bpd.read_gbq("bigquery-public-data.ml_datasets.penguins").head(5) + species island culmen_length_mm \\ + 0 Adelie Penguin (Pygoscelis adeliae) Dream 36.6 + 1 Adelie Penguin (Pygoscelis adeliae) Dream 39.8 + 2 Adelie Penguin (Pygoscelis adeliae) Dream 40.9 + 3 Chinstrap penguin (Pygoscelis antarctica) Dream 46.5 + 4 Adelie Penguin (Pygoscelis adeliae) Dream 37.3 + + culmen_depth_mm flipper_length_mm body_mass_g sex + 0 18.4 184.0 3475.0 FEMALE + 1 19.1 184.0 4650.0 MALE + 2 18.9 184.0 3900.0 MALE + 3 17.9 192.0 3500.0 FEMALE + 4 16.8 192.0 3000.0 FEMALE + + [5 rows x 7 columns] + Preserve ordering in a query input. >>> bpd.read_gbq(''' diff --git a/third_party/bigframes_vendored/pandas/io/parsers/readers.py b/third_party/bigframes_vendored/pandas/io/parsers/readers.py index 824754c6bf..392ca3afee 100644 --- a/third_party/bigframes_vendored/pandas/io/parsers/readers.py +++ b/third_party/bigframes_vendored/pandas/io/parsers/readers.py @@ -59,8 +59,9 @@ def read_csv( >>> import bigframes.pandas as bpd >>> bpd.options.display.progress_bar = None - >>> filepath_or_buffer = ("gs://bigquery-public-data-ml-datasets/holidays_and_events_for_forecasting.csv") - >>> df = bpd.read_csv(filepath_or_buffer=filepath_or_buffer) + + >>> gcs_path = ("gs://bigquery-public-data-ml-datasets/holidays_and_events_for_forecasting.csv") + >>> df = bpd.read_csv(filepath_or_buffer=gcs_path) Args: filepath_or_buffer (str): @@ -71,7 +72,7 @@ def read_csv( can be any ISO-8859-1 single-byte character. To use a character in the range 128-255, you must encode the character as UTF-8. Both engines support `sep="\t"` to specify tab character as separator. Default engine supports - having any number of spaces as separator by specifying `sep= "\ts+"`. Separators + having any number of spaces as separator by specifying `sep="\\s+"`. Separators longer than 1 character are interpreted as regular expressions by the default engine. BigQuery engine only supports single character separators. header (Optional[int], default 0): @@ -153,6 +154,20 @@ def read_json( file. Instead, set a serialized index column as the index and sort by that in the resulting DataFrame. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> gcs_path = "gs://bigframes-dev-testing/sample1.json" + >>> bpd.read_json(path_or_buf=gcs_path, lines=True, orient="records").head(n=5) + id name + 0 1 Alice + 1 2 Bob + 2 3 Carol + + [3 rows x 2 columns] + Args: path_or_buf (a valid JSON str, path object or file-like object): A local or Google Cloud Storage (`gs://`) path with `engine="bigquery"` diff --git a/third_party/bigframes_vendored/pandas/io/pickle.py b/third_party/bigframes_vendored/pandas/io/pickle.py index 71b31956a0..32e887f9f7 100644 --- a/third_party/bigframes_vendored/pandas/io/pickle.py +++ b/third_party/bigframes_vendored/pandas/io/pickle.py @@ -25,6 +25,29 @@ def read_pickle( If the content of the pickle file is a Series and its name attribute is None, the name will be set to '0' by default. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> gcs_path = "gs://bigframes-dev-testing/test_pickle.pkl" + >>> bpd.read_pickle(filepath_or_buffer=gcs_path).head(5) + species island culmen_length_mm \\ + 0 Adelie Penguin (Pygoscelis adeliae) Dream 36.6 + 1 Adelie Penguin (Pygoscelis adeliae) Dream 39.8 + 2 Adelie Penguin (Pygoscelis adeliae) Dream 40.9 + 3 Chinstrap penguin (Pygoscelis antarctica) Dream 46.5 + 4 Adelie Penguin (Pygoscelis adeliae) Dream 37.3 + + culmen_depth_mm flipper_length_mm body_mass_g sex + 0 18.4 184.0 3475.0 FEMALE + 1 19.1 184.0 4650.0 MALE + 2 18.9 184.0 3900.0 MALE + 3 17.9 192.0 3500.0 FEMALE + 4 16.8 192.0 3000.0 FEMALE + + [5 rows x 7 columns] + Args: filepath_or_buffer (str, path object, or file-like object): String, path object (implementing os.PathLike[str]), or file-like object From f68ce19ef0fba150ce7f6957a3d030f4035e9d83 Mon Sep 17 00:00:00 2001 From: Ashley Xu Date: Fri, 20 Oct 2023 03:57:25 +0000 Subject: [PATCH 3/4] fix: assign a df and show the first 2 rows --- bigframes/session/__init__.py | 27 +++++++------------ .../bigframes_vendored/pandas/io/gbq.py | 21 +++++---------- .../bigframes_vendored/pandas/io/parquet.py | 8 +++++- .../pandas/io/parsers/readers.py | 12 ++++++--- .../bigframes_vendored/pandas/io/pickle.py | 11 +++----- 5 files changed, 36 insertions(+), 43 deletions(-) diff --git a/bigframes/session/__init__.py b/bigframes/session/__init__.py index 212bf0de85..ee6af30f77 100644 --- a/bigframes/session/__init__.py +++ b/bigframes/session/__init__.py @@ -349,7 +349,7 @@ def read_gbq_query( Preserve ordering in a query input. - >>> bpd.read_gbq_query(''' + >>> df = bpd.read_gbq_query(''' ... SELECT ... -- Instead of an ORDER BY clause on the query, use ... -- ROW_NUMBER() to create an ordered DataFrame. @@ -362,16 +362,14 @@ def read_gbq_query( ... FROM `bigquery-public-data.baseball.games_wide` ... WHERE year = 2016 ... GROUP BY pitcherFirstName, pitcherLastName - ... ''', index_col="rowindex").head(n=5) + ... ''', index_col="rowindex") + >>> df.head(2) pitcherFirstName pitcherLastName averagePitchSpeed rowindex 1 Albertin Chapman 96.514113 2 Zachary Britton 94.591039 - 3 Trevor Rosenthal 94.213953 - 4 Jose Torres 94.103448 - 5 Tayron Guerrero 93.863636 - [5 rows x 3 columns] + [2 rows x 3 columns] See also: :meth:`Session.read_gbq`. """ @@ -441,22 +439,17 @@ def read_gbq_table( >>> import bigframes.pandas as bpd >>> bpd.options.display.progress_bar = None - >>> bpd.read_gbq_table("bigquery-public-data.ml_datasets.penguins").head(5) + >>> df = bpd.read_gbq_table("bigquery-public-data.ml_datasets.penguins") + >>> df.head(2) species island culmen_length_mm \\ 0 Adelie Penguin (Pygoscelis adeliae) Dream 36.6 1 Adelie Penguin (Pygoscelis adeliae) Dream 39.8 - 2 Adelie Penguin (Pygoscelis adeliae) Dream 40.9 - 3 Chinstrap penguin (Pygoscelis antarctica) Dream 46.5 - 4 Adelie Penguin (Pygoscelis adeliae) Dream 37.3 culmen_depth_mm flipper_length_mm body_mass_g sex 0 18.4 184.0 3475.0 FEMALE 1 19.1 184.0 4650.0 MALE - 2 18.9 184.0 3900.0 MALE - 3 17.9 192.0 3500.0 FEMALE - 4 16.8 192.0 3000.0 FEMALE - [5 rows x 7 columns] + [2 rows x 7 columns] See also: :meth:`Session.read_gbq`. """ @@ -851,8 +844,7 @@ def read_gbq_model(self, model_name: str): >>> bpd.options.display.progress_bar = None >>> model_name = "bigframes-dev.bqml_tutorial.penguins_model" - >>> bpd.read_gbq_model(model_name) - LinearRegression(optimize_strategy='NORMAL_EQUATION') + >>> model = bpd.read_gbq_model(model_name) Args: model_name (str): @@ -885,7 +877,8 @@ def read_pandas(self, pandas_dataframe: pandas.DataFrame) -> dataframe.DataFrame >>> d = {'col1': [1, 2], 'col2': [3, 4]} >>> pandas_df = pd.DataFrame(data=d) - >>> bpd.read_pandas(pandas_df) + >>> df = bpd.read_pandas(pandas_df) + >>> df col1 col2 0 1 3 1 2 4 diff --git a/third_party/bigframes_vendored/pandas/io/gbq.py b/third_party/bigframes_vendored/pandas/io/gbq.py index e6a18b55b2..575c501618 100644 --- a/third_party/bigframes_vendored/pandas/io/gbq.py +++ b/third_party/bigframes_vendored/pandas/io/gbq.py @@ -44,26 +44,21 @@ def read_gbq( If the input is a table ID: - >>> bpd.read_gbq("bigquery-public-data.ml_datasets.penguins").head(5) + >>> df = bpd.read_gbq("bigquery-public-data.ml_datasets.penguins") + >>> df.head(2) species island culmen_length_mm \\ 0 Adelie Penguin (Pygoscelis adeliae) Dream 36.6 1 Adelie Penguin (Pygoscelis adeliae) Dream 39.8 - 2 Adelie Penguin (Pygoscelis adeliae) Dream 40.9 - 3 Chinstrap penguin (Pygoscelis antarctica) Dream 46.5 - 4 Adelie Penguin (Pygoscelis adeliae) Dream 37.3 culmen_depth_mm flipper_length_mm body_mass_g sex 0 18.4 184.0 3475.0 FEMALE 1 19.1 184.0 4650.0 MALE - 2 18.9 184.0 3900.0 MALE - 3 17.9 192.0 3500.0 FEMALE - 4 16.8 192.0 3000.0 FEMALE - [5 rows x 7 columns] + [2 rows x 7 columns] Preserve ordering in a query input. - >>> bpd.read_gbq(''' + >>> df = bpd.read_gbq(''' ... SELECT ... -- Instead of an ORDER BY clause on the query, use ... -- ROW_NUMBER() to create an ordered DataFrame. @@ -76,16 +71,14 @@ def read_gbq( ... FROM `bigquery-public-data.baseball.games_wide` ... WHERE year = 2016 ... GROUP BY pitcherFirstName, pitcherLastName - ... ''', index_col="rowindex").head(n=5) + ... ''', index_col="rowindex") + >>> df.head(2) pitcherFirstName pitcherLastName averagePitchSpeed rowindex 1 Albertin Chapman 96.514113 2 Zachary Britton 94.591039 - 3 Trevor Rosenthal 94.213953 - 4 Jose Torres 94.103448 - 5 Tayron Guerrero 93.863636 - [5 rows x 3 columns] + [2 rows x 3 columns] Args: query_or_table (str): diff --git a/third_party/bigframes_vendored/pandas/io/parquet.py b/third_party/bigframes_vendored/pandas/io/parquet.py index a29c1a7891..f97bd386a4 100644 --- a/third_party/bigframes_vendored/pandas/io/parquet.py +++ b/third_party/bigframes_vendored/pandas/io/parquet.py @@ -22,8 +22,14 @@ def read_parquet( >>> import bigframes.pandas as bpd >>> bpd.options.display.progress_bar = None - >>> gcs_path = "gs://bigframes-dev-testing/bigframes_test.parquet" + >>> gcs_path = "gs://cloud-samples-data/bigquery/us-states/us-states.parquet" >>> df = bpd.read_parquet(path=gcs_path) + >>> df.head(2) + name post_abbr + 0 Alabama AL + 1 Alaska AK + + [2 rows x 2 columns] Args: path (str): diff --git a/third_party/bigframes_vendored/pandas/io/parsers/readers.py b/third_party/bigframes_vendored/pandas/io/parsers/readers.py index 392ca3afee..de14d5d2d4 100644 --- a/third_party/bigframes_vendored/pandas/io/parsers/readers.py +++ b/third_party/bigframes_vendored/pandas/io/parsers/readers.py @@ -62,6 +62,12 @@ def read_csv( >>> gcs_path = ("gs://bigquery-public-data-ml-datasets/holidays_and_events_for_forecasting.csv") >>> df = bpd.read_csv(filepath_or_buffer=gcs_path) + >>> df.head(2) + region holiday_name primary_date preholiday_days postholiday_days + 0 AU AUNZ_AnzacDay 2000-04-25 1 1 + 1 AU AUNZ_AnzacDay 2001-04-25 1 1 + + [2 rows x 5 columns] Args: filepath_or_buffer (str): @@ -160,13 +166,13 @@ def read_json( >>> bpd.options.display.progress_bar = None >>> gcs_path = "gs://bigframes-dev-testing/sample1.json" - >>> bpd.read_json(path_or_buf=gcs_path, lines=True, orient="records").head(n=5) + >>> df = bpd.read_json(path_or_buf=gcs_path, lines=True, orient="records") + >>> df.head(2) id name 0 1 Alice 1 2 Bob - 2 3 Carol - [3 rows x 2 columns] + [2 rows x 2 columns] Args: path_or_buf (a valid JSON str, path object or file-like object): diff --git a/third_party/bigframes_vendored/pandas/io/pickle.py b/third_party/bigframes_vendored/pandas/io/pickle.py index 32e887f9f7..053ba4871c 100644 --- a/third_party/bigframes_vendored/pandas/io/pickle.py +++ b/third_party/bigframes_vendored/pandas/io/pickle.py @@ -31,22 +31,17 @@ def read_pickle( >>> bpd.options.display.progress_bar = None >>> gcs_path = "gs://bigframes-dev-testing/test_pickle.pkl" - >>> bpd.read_pickle(filepath_or_buffer=gcs_path).head(5) + >>> df = bpd.read_pickle(filepath_or_buffer=gcs_path) + >>> df.head(2) species island culmen_length_mm \\ 0 Adelie Penguin (Pygoscelis adeliae) Dream 36.6 1 Adelie Penguin (Pygoscelis adeliae) Dream 39.8 - 2 Adelie Penguin (Pygoscelis adeliae) Dream 40.9 - 3 Chinstrap penguin (Pygoscelis antarctica) Dream 46.5 - 4 Adelie Penguin (Pygoscelis adeliae) Dream 37.3 culmen_depth_mm flipper_length_mm body_mass_g sex 0 18.4 184.0 3475.0 FEMALE 1 19.1 184.0 4650.0 MALE - 2 18.9 184.0 3900.0 MALE - 3 17.9 192.0 3500.0 FEMALE - 4 16.8 192.0 3000.0 FEMALE - [5 rows x 7 columns] + [2 rows x 7 columns] Args: filepath_or_buffer (str, path object, or file-like object): From c11fc1ba215b3c13e5d5d4d054e8d7ea6de1e078 Mon Sep 17 00:00:00 2001 From: Ashley Xu Date: Fri, 20 Oct 2023 17:16:37 +0000 Subject: [PATCH 4/4] address comments --- bigframes/session/__init__.py | 20 +++++++++++++++++++ .../pandas/io/parsers/readers.py | 10 +++++----- 2 files changed, 25 insertions(+), 5 deletions(-) diff --git a/bigframes/session/__init__.py b/bigframes/session/__init__.py index ee6af30f77..5ec3da1a5a 100644 --- a/bigframes/session/__init__.py +++ b/bigframes/session/__init__.py @@ -347,6 +347,22 @@ def read_gbq_query( >>> import bigframes.pandas as bpd >>> bpd.options.display.progress_bar = None + Simple query input: + + >>> df = bpd.read_gbq_query(''' + ... SELECT + ... pitcherFirstName, + ... pitcherLastName, + ... pitchSpeed, + ... FROM `bigquery-public-data.baseball.games_wide` + ... ''') + >>> df.head(2) + pitcherFirstName pitcherLastName pitchSpeed + 0 0 + 1 0 + + [2 rows x 3 columns] + Preserve ordering in a query input. >>> df = bpd.read_gbq_query(''' @@ -439,6 +455,8 @@ def read_gbq_table( >>> import bigframes.pandas as bpd >>> bpd.options.display.progress_bar = None + Read a whole table, with arbitrary ordering or ordering corresponding to the primary key(s). + >>> df = bpd.read_gbq_table("bigquery-public-data.ml_datasets.penguins") >>> df.head(2) species island culmen_length_mm \\ @@ -843,6 +861,8 @@ def read_gbq_model(self, model_name: str): >>> import bigframes.pandas as bpd >>> bpd.options.display.progress_bar = None + Read an existing BigQuery ML model. + >>> model_name = "bigframes-dev.bqml_tutorial.penguins_model" >>> model = bpd.read_gbq_model(model_name) diff --git a/third_party/bigframes_vendored/pandas/io/parsers/readers.py b/third_party/bigframes_vendored/pandas/io/parsers/readers.py index de14d5d2d4..e8ed6182a6 100644 --- a/third_party/bigframes_vendored/pandas/io/parsers/readers.py +++ b/third_party/bigframes_vendored/pandas/io/parsers/readers.py @@ -60,14 +60,14 @@ def read_csv( >>> import bigframes.pandas as bpd >>> bpd.options.display.progress_bar = None - >>> gcs_path = ("gs://bigquery-public-data-ml-datasets/holidays_and_events_for_forecasting.csv") + >>> gcs_path = "gs://cloud-samples-data/bigquery/us-states/us-states.csv" >>> df = bpd.read_csv(filepath_or_buffer=gcs_path) >>> df.head(2) - region holiday_name primary_date preholiday_days postholiday_days - 0 AU AUNZ_AnzacDay 2000-04-25 1 1 - 1 AU AUNZ_AnzacDay 2001-04-25 1 1 + name post_abbr + 0 Alabama AL + 1 Alaska AK - [2 rows x 5 columns] + [2 rows x 2 columns] Args: filepath_or_buffer (str):