diff --git a/bigframes/session/__init__.py b/bigframes/session/__init__.py index 2f001d7d49..5ec3da1a5a 100644 --- a/bigframes/session/__init__.py +++ b/bigframes/session/__init__.py @@ -342,6 +342,51 @@ def read_gbq_query( ``row_number() over ()`` if there is no natural unique index or you want to preserve ordering. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + Simple query input: + + >>> df = bpd.read_gbq_query(''' + ... SELECT + ... pitcherFirstName, + ... pitcherLastName, + ... pitchSpeed, + ... FROM `bigquery-public-data.baseball.games_wide` + ... ''') + >>> df.head(2) + pitcherFirstName pitcherLastName pitchSpeed + 0 0 + 1 0 + + [2 rows x 3 columns] + + Preserve ordering in a query input. + + >>> df = bpd.read_gbq_query(''' + ... SELECT + ... -- Instead of an ORDER BY clause on the query, use + ... -- ROW_NUMBER() to create an ordered DataFrame. + ... ROW_NUMBER() OVER (ORDER BY AVG(pitchSpeed) DESC) + ... AS rowindex, + ... + ... pitcherFirstName, + ... pitcherLastName, + ... AVG(pitchSpeed) AS averagePitchSpeed + ... FROM `bigquery-public-data.baseball.games_wide` + ... WHERE year = 2016 + ... GROUP BY pitcherFirstName, pitcherLastName + ... ''', index_col="rowindex") + >>> df.head(2) + pitcherFirstName pitcherLastName averagePitchSpeed + rowindex + 1 Albertin Chapman 96.514113 + 2 Zachary Britton 94.591039 + + [2 rows x 3 columns] + See also: :meth:`Session.read_gbq`. """ # NOTE: This method doesn't (yet) exist in pandas or pandas-gbq, so @@ -405,6 +450,25 @@ def read_gbq_table( ) -> dataframe.DataFrame: """Turn a BigQuery table into a DataFrame. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + Read a whole table, with arbitrary ordering or ordering corresponding to the primary key(s). + + >>> df = bpd.read_gbq_table("bigquery-public-data.ml_datasets.penguins") + >>> df.head(2) + species island culmen_length_mm \\ + 0 Adelie Penguin (Pygoscelis adeliae) Dream 36.6 + 1 Adelie Penguin (Pygoscelis adeliae) Dream 39.8 + + culmen_depth_mm flipper_length_mm body_mass_g sex + 0 18.4 184.0 3475.0 FEMALE + 1 19.1 184.0 4650.0 MALE + + [2 rows x 7 columns] + See also: :meth:`Session.read_gbq`. """ # NOTE: This method doesn't (yet) exist in pandas or pandas-gbq, so @@ -792,6 +856,16 @@ def _read_ibis( def read_gbq_model(self, model_name: str): """Loads a BigQuery ML model from BigQuery. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + Read an existing BigQuery ML model. + + >>> model_name = "bigframes-dev.bqml_tutorial.penguins_model" + >>> model = bpd.read_gbq_model(model_name) + Args: model_name (str): the model's name in BigQuery in the format @@ -815,6 +889,22 @@ def read_pandas(self, pandas_dataframe: pandas.DataFrame) -> dataframe.DataFrame The pandas DataFrame will be persisted as a temporary BigQuery table, which can be automatically recycled after the Session is closed. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> import pandas as pd + >>> bpd.options.display.progress_bar = None + + >>> d = {'col1': [1, 2], 'col2': [3, 4]} + >>> pandas_df = pd.DataFrame(data=d) + >>> df = bpd.read_pandas(pandas_df) + >>> df + col1 col2 + 0 1 3 + 1 2 4 + + [2 rows x 2 columns] + Args: pandas_dataframe (pandas.DataFrame): a pandas DataFrame object to be loaded. @@ -1365,6 +1455,16 @@ def read_gbq_function( The return type of the function must be explicitly specified in the function's original definition even if not otherwise required. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> function_name = "bqutil.fn.cw_lower_case_ascii_only" + >>> func = bpd.read_gbq_function(function_name=function_name) + >>> func.bigframes_remote_function + 'bqutil.fn.cw_lower_case_ascii_only' + Args: function_name (str): the function's name in BigQuery in the format diff --git a/third_party/bigframes_vendored/pandas/io/gbq.py b/third_party/bigframes_vendored/pandas/io/gbq.py index 8919f4ed16..575c501618 100644 --- a/third_party/bigframes_vendored/pandas/io/gbq.py +++ b/third_party/bigframes_vendored/pandas/io/gbq.py @@ -42,9 +42,23 @@ def read_gbq( >>> import bigframes.pandas as bpd >>> bpd.options.display.progress_bar = None + If the input is a table ID: + + >>> df = bpd.read_gbq("bigquery-public-data.ml_datasets.penguins") + >>> df.head(2) + species island culmen_length_mm \\ + 0 Adelie Penguin (Pygoscelis adeliae) Dream 36.6 + 1 Adelie Penguin (Pygoscelis adeliae) Dream 39.8 + + culmen_depth_mm flipper_length_mm body_mass_g sex + 0 18.4 184.0 3475.0 FEMALE + 1 19.1 184.0 4650.0 MALE + + [2 rows x 7 columns] + Preserve ordering in a query input. - >>> bpd.read_gbq(''' + >>> df = bpd.read_gbq(''' ... SELECT ... -- Instead of an ORDER BY clause on the query, use ... -- ROW_NUMBER() to create an ordered DataFrame. @@ -57,16 +71,14 @@ def read_gbq( ... FROM `bigquery-public-data.baseball.games_wide` ... WHERE year = 2016 ... GROUP BY pitcherFirstName, pitcherLastName - ... ''', index_col="rowindex").head(n=5) + ... ''', index_col="rowindex") + >>> df.head(2) pitcherFirstName pitcherLastName averagePitchSpeed rowindex 1 Albertin Chapman 96.514113 2 Zachary Britton 94.591039 - 3 Trevor Rosenthal 94.213953 - 4 Jose Torres 94.103448 - 5 Tayron Guerrero 93.863636 - [5 rows x 3 columns] + [2 rows x 3 columns] Args: query_or_table (str): diff --git a/third_party/bigframes_vendored/pandas/io/parquet.py b/third_party/bigframes_vendored/pandas/io/parquet.py index 9aed9af5a8..f97bd386a4 100644 --- a/third_party/bigframes_vendored/pandas/io/parquet.py +++ b/third_party/bigframes_vendored/pandas/io/parquet.py @@ -17,6 +17,20 @@ def read_parquet( Instead, set a serialized index column as the index and sort by that in the resulting DataFrame. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> gcs_path = "gs://cloud-samples-data/bigquery/us-states/us-states.parquet" + >>> df = bpd.read_parquet(path=gcs_path) + >>> df.head(2) + name post_abbr + 0 Alabama AL + 1 Alaska AK + + [2 rows x 2 columns] + Args: path (str): Local or Cloud Storage path to Parquet file. diff --git a/third_party/bigframes_vendored/pandas/io/parsers/readers.py b/third_party/bigframes_vendored/pandas/io/parsers/readers.py index d19a92ecdf..e8ed6182a6 100644 --- a/third_party/bigframes_vendored/pandas/io/parsers/readers.py +++ b/third_party/bigframes_vendored/pandas/io/parsers/readers.py @@ -55,6 +55,20 @@ def read_csv( file. Instead, set a serialized index column as the index and sort by that in the resulting DataFrame. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> gcs_path = "gs://cloud-samples-data/bigquery/us-states/us-states.csv" + >>> df = bpd.read_csv(filepath_or_buffer=gcs_path) + >>> df.head(2) + name post_abbr + 0 Alabama AL + 1 Alaska AK + + [2 rows x 2 columns] + Args: filepath_or_buffer (str): A local or Google Cloud Storage (`gs://`) path with `engine="bigquery"` @@ -64,7 +78,7 @@ def read_csv( can be any ISO-8859-1 single-byte character. To use a character in the range 128-255, you must encode the character as UTF-8. Both engines support `sep="\t"` to specify tab character as separator. Default engine supports - having any number of spaces as separator by specifying `sep="\s+"`. Separators + having any number of spaces as separator by specifying `sep="\\s+"`. Separators longer than 1 character are interpreted as regular expressions by the default engine. BigQuery engine only supports single character separators. header (Optional[int], default 0): @@ -146,6 +160,20 @@ def read_json( file. Instead, set a serialized index column as the index and sort by that in the resulting DataFrame. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> gcs_path = "gs://bigframes-dev-testing/sample1.json" + >>> df = bpd.read_json(path_or_buf=gcs_path, lines=True, orient="records") + >>> df.head(2) + id name + 0 1 Alice + 1 2 Bob + + [2 rows x 2 columns] + Args: path_or_buf (a valid JSON str, path object or file-like object): A local or Google Cloud Storage (`gs://`) path with `engine="bigquery"` diff --git a/third_party/bigframes_vendored/pandas/io/pickle.py b/third_party/bigframes_vendored/pandas/io/pickle.py index 71b31956a0..053ba4871c 100644 --- a/third_party/bigframes_vendored/pandas/io/pickle.py +++ b/third_party/bigframes_vendored/pandas/io/pickle.py @@ -25,6 +25,24 @@ def read_pickle( If the content of the pickle file is a Series and its name attribute is None, the name will be set to '0' by default. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> gcs_path = "gs://bigframes-dev-testing/test_pickle.pkl" + >>> df = bpd.read_pickle(filepath_or_buffer=gcs_path) + >>> df.head(2) + species island culmen_length_mm \\ + 0 Adelie Penguin (Pygoscelis adeliae) Dream 36.6 + 1 Adelie Penguin (Pygoscelis adeliae) Dream 39.8 + + culmen_depth_mm flipper_length_mm body_mass_g sex + 0 18.4 184.0 3475.0 FEMALE + 1 19.1 184.0 4650.0 MALE + + [2 rows x 7 columns] + Args: filepath_or_buffer (str, path object, or file-like object): String, path object (implementing os.PathLike[str]), or file-like object