From 52cee8c9c04bc5be6cbbfab70290aa6591737109 Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Mon, 30 Aug 2021 17:21:41 -0600 Subject: [PATCH 01/26] feat: STRUCT and ARRAY support --- sqlalchemy_bigquery/__init__.py | 4 +- sqlalchemy_bigquery/_struct.py | 124 +++++++++++++++++++ sqlalchemy_bigquery/_types.py | 141 +++++++++++++++++++++ sqlalchemy_bigquery/base.py | 120 ++---------------- tests/system/conftest.py | 13 ++ tests/system/test_sqlalchemy_bigquery.py | 12 -- tests/unit/conftest.py | 5 + tests/unit/test__struct.py | 150 +++++++++++++++++++++++ tests/unit/test_catalog_functions.py | 36 +++--- tests/unit/test_dialect_types.py | 4 +- tests/unit/test_select.py | 16 ++- 11 files changed, 482 insertions(+), 143 deletions(-) create mode 100644 sqlalchemy_bigquery/_struct.py create mode 100644 sqlalchemy_bigquery/_types.py create mode 100644 tests/unit/test__struct.py diff --git a/sqlalchemy_bigquery/__init__.py b/sqlalchemy_bigquery/__init__.py index f0defda1..f0d8a6c6 100644 --- a/sqlalchemy_bigquery/__init__.py +++ b/sqlalchemy_bigquery/__init__.py @@ -23,7 +23,7 @@ from .version import __version__ # noqa from .base import BigQueryDialect, dialect # noqa -from .base import ( +from ._types import ( ARRAY, BIGNUMERIC, BOOL, @@ -38,6 +38,7 @@ NUMERIC, RECORD, STRING, + STRUCT, TIME, TIMESTAMP, ) @@ -58,6 +59,7 @@ "NUMERIC", "RECORD", "STRING", + "STRUCT", "TIME", "TIMESTAMP", ] diff --git a/sqlalchemy_bigquery/_struct.py b/sqlalchemy_bigquery/_struct.py new file mode 100644 index 00000000..dc5cdd30 --- /dev/null +++ b/sqlalchemy_bigquery/_struct.py @@ -0,0 +1,124 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Mapping, Tuple + +import packaging.version +import sqlalchemy.sql.default_comparator +import sqlalchemy.sql.sqltypes +import sqlalchemy.types + +from . import base + +sqlalchemy_1_4_or_more = packaging.version.parse( + sqlalchemy.__version__ +) >= packaging.version.parse("1.4") + +if sqlalchemy_1_4_or_more: + import sqlalchemy.sql.coercions + import sqlalchemy.sql.roles + +# We have to delay getting the type compiler, because of circular imports. :( +type_compiler = None + + +class STRUCT(sqlalchemy.sql.sqltypes.Indexable, sqlalchemy.types.UserDefinedType): + def __init__( + self, + *fields: Tuple[str, sqlalchemy.types.TypeEngine], + **kwfields: Mapping[str, sqlalchemy.types.TypeEngine], + ): + self.__fields = tuple( + ( + name, + type_ if isinstance(type_, sqlalchemy.types.TypeEngine) else type_(), + ) + for (name, type_) in (fields + tuple(kwfields.items())) + ) + + self.__byname = {name.lower(): type_ for (name, type_) in self.__fields} + + def __repr__(self): + fields = ", ".join(f"{name}={repr(type_)}" for name, type_ in self.__fields) + return f"STRUCT({fields})" + + def get_col_spec(self, **kw): + global type_compiler + + try: + process = type_compiler.process + except AttributeError: + type_compiler = base.dialect.type_compiler(base.dialect()) + process = type_compiler.process + + fields = ", ".join(f"{name} {process(type_)}" for name, type_ in self.__fields) + return f"STRUCT<{fields}>" + + def bind_processor(self, dialect): + return dict + + class Comparator(sqlalchemy.sql.sqltypes.Indexable.Comparator): + def _setup_getitem(self, name): + if not isinstance(name, str): + raise TypeError( + f"STRUCT fields can only be accessed with strings field names," + f" not {name}." + ) + subtype = self.expr.type._STRUCT__byname.get(name.lower()) + if subtype is None: + raise KeyError(name) + operator = struct_getitem_op + index = _field_index(self, name, operator) + return operator, index, subtype + + def __getattr__(self, name): + if name.lower() in self.expr.type._STRUCT__byname: + return self[name] + + comparator_factory = Comparator + + +if sqlalchemy_1_4_or_more: + + def _field_index(self, name, operator): + return sqlalchemy.sql.coercions.expect( + sqlalchemy.sql.roles.BinaryElementRole, + name, + expr=self.expr, + operator=operator, + bindparam_type=sqlalchemy.types.String(), + ) + + +else: + + def _field_index(self, name, operator): + return sqlalchemy.sql.default_comparator._check_literal( + self.expr, operator, name, bindparam_type=sqlalchemy.types.String(), + ) + + +def struct_getitem_op(a, b): + raise NotImplementedError() + + +sqlalchemy.sql.default_comparator.operator_lookup[ + struct_getitem_op.__name__ +] = sqlalchemy.sql.default_comparator.operator_lookup["json_getitem_op"] + + +class SQLCompiler: + def visit_struct_getitem_op_binary(self, binary, operator_, **kw): + left = self.process(binary.left, **kw) + return f"{left}.{binary.right.value}" diff --git a/sqlalchemy_bigquery/_types.py b/sqlalchemy_bigquery/_types.py new file mode 100644 index 00000000..5fac827c --- /dev/null +++ b/sqlalchemy_bigquery/_types.py @@ -0,0 +1,141 @@ +# Copyright (c) 2021 The sqlalchemy-bigquery Authors +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of +# this software and associated documentation files (the "Software"), to deal in +# the Software without restriction, including without limitation the rights to +# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +# the Software, and to permit persons to whom the Software is furnished to do so, +# subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +import sqlalchemy.types +import sqlalchemy.util + +from google.cloud.bigquery.schema import SchemaField + +try: + from .geography import GEOGRAPHY +except ImportError: + pass + +from ._struct import STRUCT + +_type_map = { + "ARRAY": sqlalchemy.types.ARRAY, + "BIGNUMERIC": sqlalchemy.types.Numeric, + "BOOLEAN": sqlalchemy.types.Boolean, + "BOOL": sqlalchemy.types.Boolean, + "BYTES": sqlalchemy.types.BINARY, + "DATETIME": sqlalchemy.types.DATETIME, + "DATE": sqlalchemy.types.DATE, + "FLOAT64": sqlalchemy.types.Float, + "FLOAT": sqlalchemy.types.Float, + "INT64": sqlalchemy.types.Integer, + "INTEGER": sqlalchemy.types.Integer, + "NUMERIC": sqlalchemy.types.Numeric, + "RECORD": STRUCT, + "STRING": sqlalchemy.types.String, + "STRUCT": STRUCT, + "TIMESTAMP": sqlalchemy.types.TIMESTAMP, + "TIME": sqlalchemy.types.TIME, +} + +# By convention, dialect-provided types are spelled with all upper case. +ARRAY = _type_map["ARRAY"] +BIGNUMERIC = _type_map["NUMERIC"] +BOOLEAN = _type_map["BOOLEAN"] +BOOL = _type_map["BOOL"] +BYTES = _type_map["BYTES"] +DATETIME = _type_map["DATETIME"] +DATE = _type_map["DATE"] +FLOAT64 = _type_map["FLOAT64"] +FLOAT = _type_map["FLOAT"] +INT64 = _type_map["INT64"] +INTEGER = _type_map["INTEGER"] +NUMERIC = _type_map["NUMERIC"] +RECORD = _type_map["RECORD"] +STRING = _type_map["STRING"] +TIMESTAMP = _type_map["TIMESTAMP"] +TIME = _type_map["TIME"] + +try: + _type_map["GEOGRAPHY"] = GEOGRAPHY +except NameError: + pass + + +def _get_transitive_schema_fields(fields, cur_fields): + """ + Recurse into record type and return all the nested field names. + As contributed by @sumedhsakdeo on issue #17 + """ + results = [] + for field in fields: + results += [field] + if field.field_type == "RECORD": + cur_fields.append(field) + sub_fields = [ + SchemaField.from_api_repr( + dict(f.to_api_repr(), name=f"{field.name}.{f.name}") + ) + for f in field.fields + ] + results += _get_transitive_schema_fields(sub_fields, cur_fields) + cur_fields.pop() + return results + + +def _get_sqla_column_type(field): + try: + coltype = _type_map[field.field_type] + except KeyError: + sqlalchemy.util.warn( + "Did not recognize type '%s' of column '%s'" + % (field.field_type, field.name) + ) + coltype = sqlalchemy.types.NullType + else: + if field.field_type.endswith("NUMERIC"): + coltype = coltype(precision=field.precision, scale=field.scale) + elif field.field_type == "STRING" or field.field_type == "BYTES": + coltype = coltype(field.max_length) + elif field.field_type == "RECORD" or field.field_type == "STRUCT": + coltype = STRUCT( + *( + (subfield.name, _get_sqla_column_type(subfield)) + for subfield in field.fields + ) + ) + else: + coltype = coltype() + + if field.mode == "REPEATED": + coltype = ARRAY(coltype) + + return coltype + + +def get_columns(bq_schema): + fields = _get_transitive_schema_fields(bq_schema, []) + return [ + { + "name": field.name, + "type": _get_sqla_column_type(field), + "nullable": field.mode == "NULLABLE" or field.mode == "REPEATED", + "comment": field.description, + "default": None, + "precision": field.precision, + "scale": field.scale, + "max_length": field.max_length, + } + for field in fields + ] diff --git a/sqlalchemy_bigquery/base.py b/sqlalchemy_bigquery/base.py index e03d074e..3e49d1e1 100644 --- a/sqlalchemy_bigquery/base.py +++ b/sqlalchemy_bigquery/base.py @@ -30,7 +30,6 @@ from google import auth import google.api_core.exceptions from google.cloud.bigquery import dbapi -from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.table import TableReference from google.api_core.exceptions import NotFound @@ -40,7 +39,7 @@ import sqlalchemy.sql.sqltypes import sqlalchemy.sql.type_api from sqlalchemy.exc import NoSuchTableError -from sqlalchemy import types, util +from sqlalchemy import util from sqlalchemy.sql.compiler import ( SQLCompiler, GenericTypeCompiler, @@ -55,12 +54,7 @@ import re from .parse_url import parse_url -from sqlalchemy_bigquery import _helpers - -try: - from .geography import GEOGRAPHY -except ImportError: - pass +from . import _helpers, _struct, _types FIELD_ILLEGAL_CHARACTERS = re.compile(r"[^\w]+") @@ -117,49 +111,6 @@ def format_label(self, label, name=None): return result -_type_map = { - "ARRAY": types.ARRAY, - "BIGNUMERIC": types.Numeric, - "BOOLEAN": types.Boolean, - "BOOL": types.Boolean, - "BYTES": types.BINARY, - "DATETIME": types.DATETIME, - "DATE": types.DATE, - "FLOAT64": types.Float, - "FLOAT": types.Float, - "INT64": types.Integer, - "INTEGER": types.Integer, - "NUMERIC": types.Numeric, - "RECORD": types.JSON, - "STRING": types.String, - "TIMESTAMP": types.TIMESTAMP, - "TIME": types.TIME, -} - -# By convention, dialect-provided types are spelled with all upper case. -ARRAY = _type_map["ARRAY"] -BIGNUMERIC = _type_map["NUMERIC"] -BOOLEAN = _type_map["BOOLEAN"] -BOOL = _type_map["BOOL"] -BYTES = _type_map["BYTES"] -DATETIME = _type_map["DATETIME"] -DATE = _type_map["DATE"] -FLOAT64 = _type_map["FLOAT64"] -FLOAT = _type_map["FLOAT"] -INT64 = _type_map["INT64"] -INTEGER = _type_map["INTEGER"] -NUMERIC = _type_map["NUMERIC"] -RECORD = _type_map["RECORD"] -STRING = _type_map["STRING"] -TIMESTAMP = _type_map["TIMESTAMP"] -TIME = _type_map["TIME"] - -try: - _type_map["GEOGRAPHY"] = GEOGRAPHY -except NameError: - pass - - class BigQueryExecutionContext(DefaultExecutionContext): def create_cursor(self): # Set arraysize @@ -227,7 +178,7 @@ def pre_exec(self): ) -class BigQueryCompiler(SQLCompiler): +class BigQueryCompiler(_struct.SQLCompiler, SQLCompiler): compound_keywords = SQLCompiler.compound_keywords.copy() compound_keywords[selectable.CompoundSelect.UNION] = "UNION DISTINCT" @@ -566,6 +517,11 @@ def visit_bindparam( return param + def visit_getitem_binary(self, binary, operator_, **kw): + left = self.process(binary.left, **kw) + right = self.process(binary.right, **kw) + return f"{left}[OFFSET({right})]" + class BigQueryTypeCompiler(GenericTypeCompiler): def visit_INTEGER(self, type_, **kw): @@ -836,14 +792,6 @@ def create_connect_args(self, url): ) return ([client], {}) - def _json_deserializer(self, row): - """JSON deserializer for RECORD types. - - The DB-API layer already deserializes JSON to a dictionary, so this - just returns the input. - """ - return row - def _get_table_or_view_names(self, connection, table_type, schema=None): current_schema = schema or self.dataset_id get_table_name = ( @@ -968,59 +916,9 @@ def has_table(self, connection, table_name, schema=None): except NoSuchTableError: return False - def _get_columns_helper(self, columns, cur_columns): - """ - Recurse into record type and return all the nested field names. - As contributed by @sumedhsakdeo on issue #17 - """ - results = [] - for col in columns: - results += [col] - if col.field_type == "RECORD": - cur_columns.append(col) - fields = [ - SchemaField.from_api_repr( - dict(f.to_api_repr(), name=f"{col.name}.{f.name}") - ) - for f in col.fields - ] - results += self._get_columns_helper(fields, cur_columns) - cur_columns.pop() - return results - def get_columns(self, connection, table_name, schema=None, **kw): table = self._get_table(connection, table_name, schema) - columns = self._get_columns_helper(table.schema, []) - result = [] - for col in columns: - try: - coltype = _type_map[col.field_type] - except KeyError: - util.warn( - "Did not recognize type '%s' of column '%s'" - % (col.field_type, col.name) - ) - coltype = types.NullType - - if col.field_type.endswith("NUMERIC"): - coltype = coltype(precision=col.precision, scale=col.scale) - elif col.field_type == "STRING" or col.field_type == "BYTES": - coltype = coltype(col.max_length) - - result.append( - { - "name": col.name, - "type": types.ARRAY(coltype) if col.mode == "REPEATED" else coltype, - "nullable": col.mode == "NULLABLE" or col.mode == "REPEATED", - "comment": col.description, - "default": None, - "precision": col.precision, - "scale": col.scale, - "max_length": col.max_length, - } - ) - - return result + return _types.get_columns(table.schema) def get_table_comment(self, connection, table_name, schema=None, **kw): table = self._get_table(connection, table_name, schema) diff --git a/tests/system/conftest.py b/tests/system/conftest.py index d9db14ab..7bf76a2d 100644 --- a/tests/system/conftest.py +++ b/tests/system/conftest.py @@ -26,6 +26,8 @@ from google.cloud import bigquery import test_utils.prefixer +from sqlalchemy_bigquery import BigQueryDialect + prefixer = test_utils.prefixer.Prefixer("python-bigquery-sqlalchemy", "tests/system") DATA_DIR = pathlib.Path(__file__).parent / "data" @@ -140,6 +142,17 @@ def cleanup_datasets(bigquery_client: bigquery.Client): ) +@pytest.fixture(scope="session") +def engine(): + engine = sqlalchemy.create_engine("bigquery://", echo=True) + return engine + + +@pytest.fixture(scope="session") +def dialect(): + return BigQueryDialect() + + @pytest.fixture def metadata(): return sqlalchemy.MetaData() diff --git a/tests/system/test_sqlalchemy_bigquery.py b/tests/system/test_sqlalchemy_bigquery.py index d8622020..0cbf737d 100644 --- a/tests/system/test_sqlalchemy_bigquery.py +++ b/tests/system/test_sqlalchemy_bigquery.py @@ -20,7 +20,6 @@ # -*- coding: utf-8 -*- from __future__ import unicode_literals -from sqlalchemy_bigquery import BigQueryDialect from sqlalchemy.engine import create_engine from sqlalchemy.schema import Table, MetaData, Column from sqlalchemy.ext.declarative import declarative_base @@ -133,17 +132,6 @@ ] -@pytest.fixture(scope="session") -def engine(): - engine = create_engine("bigquery://", echo=True) - return engine - - -@pytest.fixture(scope="session") -def dialect(): - return BigQueryDialect() - - @pytest.fixture(scope="session") def engine_using_test_dataset(bigquery_dataset): engine = create_engine(f"bigquery:///{bigquery_dataset}", echo=True) diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py index 886e9aee..d311a134 100644 --- a/tests/unit/conftest.py +++ b/tests/unit/conftest.py @@ -42,6 +42,11 @@ ) +@pytest.fixture() +def engine(): + return sqlalchemy.create_engine("bigquery://myproject/mydataset") + + @pytest.fixture() def faux_conn(): test_data = dict(execute=[]) diff --git a/tests/unit/test__struct.py b/tests/unit/test__struct.py new file mode 100644 index 00000000..c4b2f61f --- /dev/null +++ b/tests/unit/test__struct.py @@ -0,0 +1,150 @@ +# Copyright (c) 2017 The sqlalchemy-bigquery Authors +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of +# this software and associated documentation files (the "Software"), to deal in +# the Software without restriction, including without limitation the rights to +# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +# the Software, and to permit persons to whom the Software is furnished to do so, +# subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +import datetime + +import pytest + +import sqlalchemy + + +def _test_struct(): + from sqlalchemy_bigquery import STRUCT + + return STRUCT( + name=sqlalchemy.String, + children=sqlalchemy.ARRAY( + STRUCT(name=sqlalchemy.String, bdate=sqlalchemy.DATE) + ), + ) + + +def test_struct_colspec(): + assert _test_struct().get_col_spec() == ( + "STRUCT>>" + ) + + +def test_struct_repr(): + assert repr(_test_struct()) == ( + "STRUCT(name=String(), children=ARRAY(STRUCT(name=String(), bdate=DATE())))" + ) + + +def test_bind_processor(): + assert _test_struct().bind_processor(None) is dict + + +def _col(): + return sqlalchemy.Table( + "t", sqlalchemy.MetaData(), sqlalchemy.Column("person", _test_struct()), + ).c.person + + +@pytest.mark.parametrize( + "expr,sql", + [ + (_col()["name"], "`t`.`person`.name"), + (_col()["Name"], "`t`.`person`.Name"), + (_col().NAME, "`t`.`person`.NAME"), + (_col().children, "`t`.`person`.children"), + ( + _col().children[0].label("anon_1"), # SQLAlchemy doesn't add the label + # in this case for some reason + "(`t`.`person`.children)[OFFSET(%(param_1:INT64)s)]", + ), + ( + _col().children[0]["bdate"], + "((`t`.`person`.children)[OFFSET(%(param_1:INT64)s)]).bdate", + ), + ( + _col().children[0].bdate, + "((`t`.`person`.children)[OFFSET(%(param_1:INT64)s)]).bdate", + ), + ], +) +def test_struct_traversal_project(engine, expr, sql): + sql = f"SELECT {sql} AS `anon_1` \nFROM `t`" + assert str(sqlalchemy.select([expr]).compile(engine)) == sql + + +@pytest.mark.parametrize( + "expr,sql", + [ + (_col()["name"] == "x", "(`t`.`person`.name) = %(param_1:STRING)s"), + (_col()["Name"] == "x", "(`t`.`person`.Name) = %(param_1:STRING)s"), + (_col().NAME == "x", "(`t`.`person`.NAME) = %(param_1:STRING)s"), + ( + _col().children[0] == dict(name="foo", bdate=datetime.date(2020, 1, 1)), + "(`t`.`person`.children)[OFFSET(%(param_1:INT64)s)]" + " = %(param_2:STRUCT)s", + ), + ( + _col().children[0] == dict(name="foo", bdate=datetime.date(2020, 1, 1)), + "(`t`.`person`.children)[OFFSET(%(param_1:INT64)s)]" + " = %(param_2:STRUCT)s", + ), + ( + _col().children[0]["bdate"] == datetime.date(2021, 8, 30), + "(((`t`.`person`.children)[OFFSET(%(param_1:INT64)s)]).bdate)" + " = %(param_2:DATE)s", + ), + ( + _col().children[0].bdate == datetime.date(2021, 8, 30), + "(((`t`.`person`.children)[OFFSET(%(param_1:INT64)s)]).bdate)" + " = %(param_2:DATE)s", + ), + ], +) +def test_struct_traversal_filter(engine, expr, sql, param=1): + want = f"SELECT `t`.`person` \nFROM `t`, `t` \nWHERE {sql}" + got = str(sqlalchemy.select([_col()]).where(expr).compile(engine)) + assert got == want + + +def test_struct_insert_type_info(engine, metadata): + t = sqlalchemy.Table("t", metadata, sqlalchemy.Column("person", _test_struct())) + got = str( + t.insert() + .values( + person=dict( + name="bob", + children=[dict(name="billy", bdate=datetime.date(2020, 1, 1))], + ) + ) + .compile(engine) + ) + + assert got == ( + "INSERT INTO `t` (`person`) VALUES (%(person:" + "STRUCT>>" + ")s)" + ) + + +def test_struct_non_string_field_access(engine): + with pytest.raises( + TypeError, + match="STRUCT fields can only be accessed with strings field names, not 42", + ): + _col()[42] + + +def test_struct_bad_name(engine): + with pytest.raises(KeyError, match="42"): + _col()["42"] diff --git a/tests/unit/test_catalog_functions.py b/tests/unit/test_catalog_functions.py index 6613ae57..fd7d0d63 100644 --- a/tests/unit/test_catalog_functions.py +++ b/tests/unit/test_catalog_functions.py @@ -165,8 +165,8 @@ def test_get_table_comment(faux_conn): ("STRING(42)", sqlalchemy.types.String(42), dict(max_length=42)), ("BYTES", sqlalchemy.types.BINARY(), ()), ("BYTES(42)", sqlalchemy.types.BINARY(42), dict(max_length=42)), - ("INT64", sqlalchemy.types.Integer, ()), - ("FLOAT64", sqlalchemy.types.Float, ()), + ("INT64", sqlalchemy.types.Integer(), ()), + ("FLOAT64", sqlalchemy.types.Float(), ()), ("NUMERIC", sqlalchemy.types.NUMERIC(), ()), ("NUMERIC(4)", sqlalchemy.types.NUMERIC(4), dict(precision=4)), ("NUMERIC(4, 2)", sqlalchemy.types.NUMERIC(4, 2), dict(precision=4, scale=2)), @@ -177,11 +177,11 @@ def test_get_table_comment(faux_conn): sqlalchemy.types.NUMERIC(42, 2), dict(precision=42, scale=2), ), - ("BOOL", sqlalchemy.types.Boolean, ()), - ("TIMESTAMP", sqlalchemy.types.TIMESTAMP, ()), - ("DATE", sqlalchemy.types.DATE, ()), - ("TIME", sqlalchemy.types.TIME, ()), - ("DATETIME", sqlalchemy.types.DATETIME, ()), + ("BOOL", sqlalchemy.types.Boolean(), ()), + ("TIMESTAMP", sqlalchemy.types.TIMESTAMP(), ()), + ("DATE", sqlalchemy.types.DATE(), ()), + ("TIME", sqlalchemy.types.TIME(), ()), + ("DATETIME", sqlalchemy.types.DATETIME(), ()), ("THURSDAY", sqlalchemy.types.NullType, ()), ], ) @@ -207,6 +207,8 @@ def test_get_table_columns(faux_conn, btype, atype, extra): def test_get_table_columns_special_cases(faux_conn): + from sqlalchemy_bigquery import STRUCT + cursor = faux_conn.connection.cursor() cursor.execute("create table foo (s STRING, n INT64 not null, r RECORD)") client = faux_conn.connection._client @@ -218,10 +220,10 @@ def test_get_table_columns_special_cases(faux_conn): ) actual = faux_conn.dialect.get_columns(faux_conn, "foo") - stype = actual[0].pop("type") - assert isinstance(stype, sqlalchemy.types.ARRAY) - assert isinstance(stype.item_type, sqlalchemy.types.String) - assert actual == [ + for a in actual: + a["type"] = repr(a["type"]) + + expected = [ { "comment": "a fine column", "default": None, @@ -230,13 +232,14 @@ def test_get_table_columns_special_cases(faux_conn): "max_length": None, "precision": None, "scale": None, + "type": repr(sqlalchemy.types.ARRAY(sqlalchemy.types.String())), }, { "comment": None, "default": None, "name": "n", "nullable": False, - "type": sqlalchemy.types.Integer, + "type": repr(sqlalchemy.types.Integer()), "max_length": None, "precision": None, "scale": None, @@ -246,7 +249,9 @@ def test_get_table_columns_special_cases(faux_conn): "default": None, "name": "r", "nullable": True, - "type": sqlalchemy.types.JSON, + "type": repr( + STRUCT(i=sqlalchemy.types.Integer(), f=sqlalchemy.types.Float()) + ), "max_length": None, "precision": None, "scale": None, @@ -256,7 +261,7 @@ def test_get_table_columns_special_cases(faux_conn): "default": None, "name": "r.i", "nullable": True, - "type": sqlalchemy.types.Integer, + "type": repr(sqlalchemy.types.Integer()), "max_length": None, "precision": None, "scale": None, @@ -266,12 +271,13 @@ def test_get_table_columns_special_cases(faux_conn): "default": None, "name": "r.f", "nullable": True, - "type": sqlalchemy.types.Float, + "type": repr(sqlalchemy.types.Float()), "max_length": None, "precision": None, "scale": None, }, ] + assert actual == expected def test_has_table(faux_conn): diff --git a/tests/unit/test_dialect_types.py b/tests/unit/test_dialect_types.py index a1af7c47..47ffd94a 100644 --- a/tests/unit/test_dialect_types.py +++ b/tests/unit/test_dialect_types.py @@ -24,7 +24,7 @@ def test_types_import(): """Demonstrate behavior of importing types independent of any other import.""" dialect_module = importlib.import_module("sqlalchemy_bigquery") - base_module = importlib.import_module("sqlalchemy_bigquery.base") - custom_types = getattr(base_module, "_type_map") + _types_module = importlib.import_module("sqlalchemy_bigquery._types") + custom_types = getattr(_types_module, "_type_map") for type_name, type_value in custom_types.items(): assert getattr(dialect_module, type_name) == type_value diff --git a/tests/unit/test_select.py b/tests/unit/test_select.py index 474fc9d9..8049b8a6 100644 --- a/tests/unit/test_select.py +++ b/tests/unit/test_select.py @@ -173,8 +173,12 @@ def test_typed_parameters(faux_conn, type_, val, btype, vrep): ) -def test_select_json(faux_conn, metadata): - table = sqlalchemy.Table("t", metadata, sqlalchemy.Column("x", sqlalchemy.JSON)) +def test_select_struct(faux_conn, metadata): + from sqlalchemy_bigquery import STRUCT + + table = sqlalchemy.Table( + "t", metadata, sqlalchemy.Column("x", STRUCT(y=sqlalchemy.Integer)), + ) faux_conn.ex("create table t (x RECORD)") faux_conn.ex("""insert into t values ('{"y": 1}')""") @@ -430,3 +434,11 @@ def test_unnest_w_no_table_references(faux_conn, alias): assert " ".join(compiled.strip().split()) == ( "SELECT `anon_1` FROM unnest(%(unnest_1)s) AS `anon_1`" ) + + +def test_array_indexing(engine, metadata): + t = sqlalchemy.Table( + "t", metadata, sqlalchemy.Column("a", sqlalchemy.ARRAY(sqlalchemy.String)), + ) + got = str(sqlalchemy.select([t.c.a[0]]).compile(engine)) + assert got == "SELECT `t`.`a`[OFFSET(%(a_1:INT64)s)] AS `anon_1` \nFROM `t`" From 6bacc0db6aa364c520c9a186954e57255724d7a3 Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Tue, 31 Aug 2021 07:22:44 -0600 Subject: [PATCH 02/26] Fixed test that expected JSON rather than STRUCT --- tests/system/test_sqlalchemy_bigquery.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/tests/system/test_sqlalchemy_bigquery.py b/tests/system/test_sqlalchemy_bigquery.py index 0cbf737d..5a8f4158 100644 --- a/tests/system/test_sqlalchemy_bigquery.py +++ b/tests/system/test_sqlalchemy_bigquery.py @@ -18,7 +18,9 @@ # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. # -*- coding: utf-8 -*- -from __future__ import unicode_literals + +import datetime +import decimal from sqlalchemy.engine import create_engine from sqlalchemy.schema import Table, MetaData, Column @@ -31,8 +33,8 @@ from pytz import timezone import pytest import sqlalchemy -import datetime -import decimal + +import sqlalchemy_bigquery ONE_ROW_CONTENTS_EXPANDED = [ 588, @@ -97,17 +99,21 @@ {"name": "bytes", "type": types.BINARY(), "nullable": True, "default": None}, { "name": "record", - "type": types.JSON(), + "type": sqlalchemy_bigquery.STRUCT(name=types.String, age=types.Integer), "nullable": True, "default": None, "comment": "In Standard SQL this data type is a STRUCT.", }, {"name": "record.name", "type": types.String(), "nullable": True, "default": None}, {"name": "record.age", "type": types.Integer(), "nullable": True, "default": None}, - {"name": "nested_record", "type": types.JSON(), "nullable": True, "default": None}, + {"name": "nested_record", + "type": sqlalchemy_bigquery.STRUCT( + record=sqlalchemy_bigquery.STRUCT(name=types.String, age=types.Integer)), + "nullable": True, + "default": None}, { "name": "nested_record.record", - "type": types.JSON(), + "type": sqlalchemy_bigquery.STRUCT(name=types.String, age=types.Integer), "nullable": True, "default": None, }, From 74aab64f0bf15984e5d01b99caba9bd513ce3d09 Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Tue, 31 Aug 2021 08:06:57 -0600 Subject: [PATCH 03/26] Added system test I neglected to check in before :( --- tests/system/test__struct.py | 70 ++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 tests/system/test__struct.py diff --git a/tests/system/test__struct.py b/tests/system/test__struct.py new file mode 100644 index 00000000..08396b97 --- /dev/null +++ b/tests/system/test__struct.py @@ -0,0 +1,70 @@ +# Copyright (c) 2021 The sqlalchemy-bigquery Authors +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of +# this software and associated documentation files (the "Software"), to deal in +# the Software without restriction, including without limitation the rights to +# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +# the Software, and to permit persons to whom the Software is furnished to do so, +# subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +import datetime + +import sqlalchemy + +import sqlalchemy_bigquery + + +def test_struct(engine, bigquery_dataset, metadata): + conn = engine.connect() + table = sqlalchemy.Table( + f"{bigquery_dataset}.test_struct", + metadata, + sqlalchemy.Column( + "person", + sqlalchemy_bigquery.STRUCT( + name=sqlalchemy.String, + children=sqlalchemy.ARRAY( + sqlalchemy_bigquery.STRUCT(name=sqlalchemy.String, + bdate=sqlalchemy.DATE) + ), + )), + ) + metadata.create_all(engine) + + conn.execute( + table.insert() + .values( + person=dict( + name="bob", + children=[dict(name="billy", bdate=datetime.date(2020, 1, 1))], + ) + ) + ) + + assert list(conn.execute(sqlalchemy.select([table]))) == [ + ({'name': 'bob', + 'children': [{'name': 'billy', 'bdate': datetime.date(2020, 1, 1)}]},)] + assert list(conn.execute(sqlalchemy.select([table.c.person.NAME]))) == [('bob',)] + assert list(conn.execute(sqlalchemy.select([table.c.person.children[0]]))) == [ + ({'name': 'billy', 'bdate': datetime.date(2020, 1, 1)},)] + assert list(conn.execute(sqlalchemy.select([table.c.person.children[0].bdate])) + ) == [(datetime.date(2020, 1, 1),)] + assert list(conn.execute(sqlalchemy.select([table]) + .where(table.c.person.children[0].NAME == 'billy')) + ) == [({'name': 'bob', + 'children': [{'name': 'billy', + 'bdate': datetime.date(2020, 1, 1)}]},)] + assert list(conn.execute(sqlalchemy.select([table]) + .where(table.c.person.children[0].NAME == 'sally') + ) + ) == [] From c5653e240878ad60394989d915cfe03f0c32c3b1 Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Tue, 31 Aug 2021 08:07:30 -0600 Subject: [PATCH 04/26] blacken --- tests/system/test__struct.py | 65 ++++++++++++++++-------- tests/system/test_sqlalchemy_bigquery.py | 13 +++-- 2 files changed, 51 insertions(+), 27 deletions(-) diff --git a/tests/system/test__struct.py b/tests/system/test__struct.py index 08396b97..b5aa8799 100644 --- a/tests/system/test__struct.py +++ b/tests/system/test__struct.py @@ -34,37 +34,58 @@ def test_struct(engine, bigquery_dataset, metadata): sqlalchemy_bigquery.STRUCT( name=sqlalchemy.String, children=sqlalchemy.ARRAY( - sqlalchemy_bigquery.STRUCT(name=sqlalchemy.String, - bdate=sqlalchemy.DATE) - ), - )), + sqlalchemy_bigquery.STRUCT( + name=sqlalchemy.String, bdate=sqlalchemy.DATE + ) + ), + ), + ), ) metadata.create_all(engine) conn.execute( - table.insert() - .values( + table.insert().values( person=dict( name="bob", children=[dict(name="billy", bdate=datetime.date(2020, 1, 1))], - ) ) ) + ) assert list(conn.execute(sqlalchemy.select([table]))) == [ - ({'name': 'bob', - 'children': [{'name': 'billy', 'bdate': datetime.date(2020, 1, 1)}]},)] - assert list(conn.execute(sqlalchemy.select([table.c.person.NAME]))) == [('bob',)] + ( + { + "name": "bob", + "children": [{"name": "billy", "bdate": datetime.date(2020, 1, 1)}], + }, + ) + ] + assert list(conn.execute(sqlalchemy.select([table.c.person.NAME]))) == [("bob",)] assert list(conn.execute(sqlalchemy.select([table.c.person.children[0]]))) == [ - ({'name': 'billy', 'bdate': datetime.date(2020, 1, 1)},)] - assert list(conn.execute(sqlalchemy.select([table.c.person.children[0].bdate])) - ) == [(datetime.date(2020, 1, 1),)] - assert list(conn.execute(sqlalchemy.select([table]) - .where(table.c.person.children[0].NAME == 'billy')) - ) == [({'name': 'bob', - 'children': [{'name': 'billy', - 'bdate': datetime.date(2020, 1, 1)}]},)] - assert list(conn.execute(sqlalchemy.select([table]) - .where(table.c.person.children[0].NAME == 'sally') - ) - ) == [] + ({"name": "billy", "bdate": datetime.date(2020, 1, 1)},) + ] + assert list( + conn.execute(sqlalchemy.select([table.c.person.children[0].bdate])) + ) == [(datetime.date(2020, 1, 1),)] + assert list( + conn.execute( + sqlalchemy.select([table]).where(table.c.person.children[0].NAME == "billy") + ) + ) == [ + ( + { + "name": "bob", + "children": [{"name": "billy", "bdate": datetime.date(2020, 1, 1)}], + }, + ) + ] + assert ( + list( + conn.execute( + sqlalchemy.select([table]).where( + table.c.person.children[0].NAME == "sally" + ) + ) + ) + == [] + ) diff --git a/tests/system/test_sqlalchemy_bigquery.py b/tests/system/test_sqlalchemy_bigquery.py index 5a8f4158..564c5e68 100644 --- a/tests/system/test_sqlalchemy_bigquery.py +++ b/tests/system/test_sqlalchemy_bigquery.py @@ -106,11 +106,14 @@ }, {"name": "record.name", "type": types.String(), "nullable": True, "default": None}, {"name": "record.age", "type": types.Integer(), "nullable": True, "default": None}, - {"name": "nested_record", - "type": sqlalchemy_bigquery.STRUCT( - record=sqlalchemy_bigquery.STRUCT(name=types.String, age=types.Integer)), - "nullable": True, - "default": None}, + { + "name": "nested_record", + "type": sqlalchemy_bigquery.STRUCT( + record=sqlalchemy_bigquery.STRUCT(name=types.String, age=types.Integer) + ), + "nullable": True, + "default": None, + }, { "name": "nested_record.record", "type": sqlalchemy_bigquery.STRUCT(name=types.String, age=types.Integer), From 9df180462b404964c3fbf5fcae3b93af8dd1006b Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Tue, 31 Aug 2021 16:17:25 -0600 Subject: [PATCH 05/26] Don't strip from parameter types Otherwise, the BQ doesn't handle arrays of structs. --- sqlalchemy_bigquery/base.py | 3 --- tests/unit/test_select.py | 3 --- 2 files changed, 6 deletions(-) diff --git a/sqlalchemy_bigquery/base.py b/sqlalchemy_bigquery/base.py index 3e49d1e1..f5b1d515 100644 --- a/sqlalchemy_bigquery/base.py +++ b/sqlalchemy_bigquery/base.py @@ -494,9 +494,6 @@ def visit_bindparam( type_.scale = -t.exponent bq_type = self.dialect.type_compiler.process(type_) - if bq_type[-1] == ">" and bq_type.startswith("ARRAY<"): - # Values get arrayified at a lower level. - bq_type = bq_type[6:-1] bq_type = self.__remove_type_parameter(bq_type) assert_(param != "%s", f"Unexpected param: {param}") diff --git a/tests/unit/test_select.py b/tests/unit/test_select.py index 8049b8a6..641677a4 100644 --- a/tests/unit/test_select.py +++ b/tests/unit/test_select.py @@ -129,9 +129,6 @@ def test_typed_parameters(faux_conn, type_, val, btype, vrep): faux_conn.execute(table.insert().values(**{col_name: val})) - if btype.startswith("ARRAY<"): - btype = btype[6:-1] - ptype = btype[: btype.index("(")] if "(" in btype else btype assert faux_conn.test_data["execute"][-1] == ( From 0df170128ba2b4d0fe91340b2fdfe58f09fa1e96 Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Tue, 31 Aug 2021 16:18:09 -0600 Subject: [PATCH 06/26] Added system tests to verift PR 67 and issue 233 --- tests/system/test__struct.py | 93 ++++++++++++++++++++++++++++++++++++ 1 file changed, 93 insertions(+) diff --git a/tests/system/test__struct.py b/tests/system/test__struct.py index b5aa8799..b9d6bad5 100644 --- a/tests/system/test__struct.py +++ b/tests/system/test__struct.py @@ -89,3 +89,96 @@ def test_struct(engine, bigquery_dataset, metadata): ) == [] ) + + +def test_complex_literals_pr_67(engine, bigquery_dataset, metadata): + # https://github.com/googleapis/python-bigquery-sqlalchemy/pull/67 + + # Simple select example: + + table_name = f"{bigquery_dataset}.test_comples_literals_pr_67" + engine.execute( + f""" + create table {table_name} as ( + select 'a' as id, + struct(1 as x__count, 2 as y__count, 3 as z__count) as dimensions + ) + """) + + table = sqlalchemy.Table(table_name, metadata, autoload_with=engine) + + got = str(sqlalchemy.select([(table.c.dimensions.x__count + 5).label("c")]) + .compile(engine)) + want = ( + f"SELECT (`{table_name}`.`dimensions`.x__count) + %(param_1:INT64)s AS `c` \n" + f"FROM `{table_name}`" + ) + + assert got == want + + # Hopefully, "Example doing a pivot" is addressed by + # test_unnest_and_struct_access_233 below :) + + +def test_unnest_and_struct_access_233(engine, bigquery_dataset, metadata): + # https://github.com/googleapis/python-bigquery-sqlalchemy/issues/233 + + from sqlalchemy import Table, select, Column, ARRAY, String, func + from sqlalchemy.orm import sessionmaker + from sqlalchemy_bigquery import STRUCT + + conn = engine.connect() + + mock_table = Table(f"{bigquery_dataset}.Mock", metadata, Column("mock_id", String)) + another_mock_table = Table( + f"{bigquery_dataset}.AnotherMock", + metadata, + Column("objects", ARRAY(STRUCT(object_id=String))), + ) + metadata.create_all(engine) + + conn.execute( + mock_table.insert(), + dict(mock_id="x"), + dict(mock_id="y"), + dict(mock_id="z"), + ) + conn.execute( + another_mock_table.insert(), + dict(objects=[dict(object_id='x'), dict(object_id='y'), dict(object_id='q')]), + ) + + subquery = select( + func.unnest(another_mock_table.c.objects).alias('another_mock_objects').column + ).subquery() + + join = mock_table.join( + subquery, + subquery.c.another_mock_objects['object_id'] == mock_table.c.mock_id, + ) + + query = select(mock_table).select_from(join) + + got = str(query.compile(engine)) + want = ( + f"SELECT `{bigquery_dataset}.Mock`.`mock_id` \n" + f"FROM `{bigquery_dataset}.Mock` " + f"JOIN (" + f"SELECT `another_mock_objects` \n" + f"FROM " + f"`{bigquery_dataset}.AnotherMock` `{bigquery_dataset}.AnotherMock_1`, " + f"unnest(`{bigquery_dataset}.AnotherMock_1`.`objects`)" + f" AS `another_mock_objects`" + f") AS `anon_1` " + f"ON " + f"(`anon_1`.`another_mock_objects`.object_id) = " + f"`{bigquery_dataset}.Mock`.`mock_id`" + ) + assert got == want + + Session = sessionmaker(bind=engine) + session = Session() + results = sorted(session.execute(query)) + + assert results == [('x',), ('y',)] + From f10a571b2e2fec732c87145705da2be91ed3450a Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Tue, 31 Aug 2021 16:22:44 -0600 Subject: [PATCH 07/26] blacken --- tests/system/test__struct.py | 35 +++++++++++++++++------------------ 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/tests/system/test__struct.py b/tests/system/test__struct.py index b9d6bad5..dfe96c0e 100644 --- a/tests/system/test__struct.py +++ b/tests/system/test__struct.py @@ -103,16 +103,20 @@ def test_complex_literals_pr_67(engine, bigquery_dataset, metadata): select 'a' as id, struct(1 as x__count, 2 as y__count, 3 as z__count) as dimensions ) - """) + """ + ) table = sqlalchemy.Table(table_name, metadata, autoload_with=engine) - got = str(sqlalchemy.select([(table.c.dimensions.x__count + 5).label("c")]) - .compile(engine)) + got = str( + sqlalchemy.select([(table.c.dimensions.x__count + 5).label("c")]).compile( + engine + ) + ) want = ( f"SELECT (`{table_name}`.`dimensions`.x__count) + %(param_1:INT64)s AS `c` \n" f"FROM `{table_name}`" - ) + ) assert got == want @@ -134,28 +138,24 @@ def test_unnest_and_struct_access_233(engine, bigquery_dataset, metadata): f"{bigquery_dataset}.AnotherMock", metadata, Column("objects", ARRAY(STRUCT(object_id=String))), - ) + ) metadata.create_all(engine) conn.execute( - mock_table.insert(), - dict(mock_id="x"), - dict(mock_id="y"), - dict(mock_id="z"), + mock_table.insert(), dict(mock_id="x"), dict(mock_id="y"), dict(mock_id="z"), ) conn.execute( another_mock_table.insert(), - dict(objects=[dict(object_id='x'), dict(object_id='y'), dict(object_id='q')]), + dict(objects=[dict(object_id="x"), dict(object_id="y"), dict(object_id="q")]), ) subquery = select( - func.unnest(another_mock_table.c.objects).alias('another_mock_objects').column - ).subquery() + func.unnest(another_mock_table.c.objects).alias("another_mock_objects").column + ).subquery() join = mock_table.join( - subquery, - subquery.c.another_mock_objects['object_id'] == mock_table.c.mock_id, - ) + subquery, subquery.c.another_mock_objects["object_id"] == mock_table.c.mock_id, + ) query = select(mock_table).select_from(join) @@ -173,12 +173,11 @@ def test_unnest_and_struct_access_233(engine, bigquery_dataset, metadata): f"ON " f"(`anon_1`.`another_mock_objects`.object_id) = " f"`{bigquery_dataset}.Mock`.`mock_id`" - ) + ) assert got == want Session = sessionmaker(bind=engine) session = Session() results = sorted(session.execute(query)) - assert results == [('x',), ('y',)] - + assert results == [("x",), ("y",)] From ec31040ef06502027c5824f4f1dcfdaa6d602787 Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Wed, 1 Sep 2021 07:19:50 -0600 Subject: [PATCH 08/26] Renamed test file to conform to samples test-file naming conventions --- samples/snippets/geography_test.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 samples/snippets/geography_test.py diff --git a/samples/snippets/geography_test.py b/samples/snippets/geography_test.py new file mode 100644 index 00000000..7a570b81 --- /dev/null +++ b/samples/snippets/geography_test.py @@ -0,0 +1,27 @@ +# Copyright (c) 2021 The sqlalchemy-bigquery Authors +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of +# this software and associated documentation files (the "Software"), to deal in +# the Software without restriction, including without limitation the rights to +# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +# the Software, and to permit persons to whom the Software is furnished to do so, +# subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + +def test_geography(engine): + from . import geography + + lakes_touching_lake2, lakes_containing = geography.example(engine) + + assert sorted(lake.name for lake in lakes_touching_lake2) == ["Majeur", "Orta"] + assert [lake.name for lake in lakes_containing] == ["Orta"] From accf76226c2fac216e3b6b810a0d61cd1daae713 Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Wed, 1 Sep 2021 07:24:06 -0600 Subject: [PATCH 09/26] Require google-cloud-bigquery 2.25.2 to get struct field-name underscore fix --- setup.py | 2 +- testing/constraints-3.6.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index f70c3a0d..7efe0f9b 100644 --- a/setup.py +++ b/setup.py @@ -83,7 +83,7 @@ def readme(): # Until this issue is closed # https://github.com/googleapis/google-cloud-python/issues/10566 "google-auth>=1.25.0,<3.0.0dev", # Work around pip wack. - "google-cloud-bigquery>=2.24.1", + "google-cloud-bigquery>=2.25.2,<3.0.0dev", "sqlalchemy>=1.2.0,<1.5.0dev", "future", ], diff --git a/testing/constraints-3.6.txt b/testing/constraints-3.6.txt index e5ed0b2a..60421130 100644 --- a/testing/constraints-3.6.txt +++ b/testing/constraints-3.6.txt @@ -6,5 +6,5 @@ # e.g., if setup.py has "foo >= 1.14.0, < 2.0.0dev", sqlalchemy==1.2.0 google-auth==1.25.0 -google-cloud-bigquery==2.24.1 +google-cloud-bigquery==2.25.2 google-api-core==1.30.0 From ef5f8910a56813f3b793fca13b3f1e7187423c2b Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Wed, 1 Sep 2021 09:48:59 -0600 Subject: [PATCH 10/26] Added STRUCT documentation --- docs/index.rst | 1 + samples/snippets/STRUCT.py | 88 +++++++++++++++++++ .../{test_geography.py => STRUCT_test.py} | 10 +-- 3 files changed, 94 insertions(+), 5 deletions(-) create mode 100644 samples/snippets/STRUCT.py rename samples/snippets/{test_geography.py => STRUCT_test.py} (80%) diff --git a/docs/index.rst b/docs/index.rst index 4fe42891..c70b2d2f 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -3,6 +3,7 @@ :maxdepth: 2 README + struct geography alembic reference diff --git a/samples/snippets/STRUCT.py b/samples/snippets/STRUCT.py new file mode 100644 index 00000000..5e2e30ee --- /dev/null +++ b/samples/snippets/STRUCT.py @@ -0,0 +1,88 @@ +# Copyright (c) 2021 The sqlalchemy-bigquery Authors +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of +# this software and associated documentation files (the "Software"), to deal in +# the Software without restriction, including without limitation the rights to +# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +# the Software, and to permit persons to whom the Software is furnished to do so, +# subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + +def example(engine): + # [START bigquery_sqlalchemy_create_table_with_struct] + from sqlalchemy.ext.declarative import declarative_base + from sqlalchemy import Column, String, Integer, Float + from sqlalchemy_bigquery import STRUCT + + Base = declarative_base() + + class Car(Base): + __tablename__ = "Cars" + + model = Column(String, primary_key=True) + engine = Column( + STRUCT( + cylinder=STRUCT(("count", Integer), + ("compression", Float)), + horsepower=Integer) + ) + + # [END bigquery_sqlalchemy_create_table_with_struct] + Car.__table__.create(engine) + + # [START bigquery_sqlalchemy_insert_struct] + from sqlalchemy.orm import sessionmaker + + Session = sessionmaker(bind=engine) + session = Session() + + sebring = Car(model="Sebring", + engine=dict( + cylinder=dict( + count=6, + compression=18.0), + horsepower=235)) + townc = Car(model="Town and Counttry", + engine=dict( + cylinder=dict( + count=6, + compression=16.0), + horsepower=251)) + xj8 = Car(model="XJ8", + engine=dict( + cylinder=dict( + count=8, + compression=10.75), + horsepower=575)) + + session.add_all((sebring, townc, xj8)) + session.commit() + + # [END bigquery_sqlalchemy_insert_struct] + + # [START bigquery_sqlalchemy_query_struct] + sixes = session.query(Car).filter(Car.engine.cylinder.count == 6) + # [END bigquery_sqlalchemy_query_struct] + sixes1 = list(sixes) + + # [START bigquery_sqlalchemy_query_STRUCT] + sixes = session.query(Car).filter(Car.engine.CYLINDER.COUNT == 6) + # [END bigquery_sqlalchemy_query_STRUCT] + sixes2 = list(sixes) + + # [START bigquery_sqlalchemy_query_getitem] + sixes = session.query(Car).filter(Car.engine["cylinder"]["count"] == 6) + # [END bigquery_sqlalchemy_query_getitem] + sixes3 = list(sixes) + + return sixes1, sixes2, sixes3 diff --git a/samples/snippets/test_geography.py b/samples/snippets/STRUCT_test.py similarity index 80% rename from samples/snippets/test_geography.py rename to samples/snippets/STRUCT_test.py index 7a570b81..5a5c6515 100644 --- a/samples/snippets/test_geography.py +++ b/samples/snippets/STRUCT_test.py @@ -18,10 +18,10 @@ # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -def test_geography(engine): - from . import geography +def test_struct(engine): + from . import STRUCT - lakes_touching_lake2, lakes_containing = geography.example(engine) + sixeses = STRUCT.example(engine) - assert sorted(lake.name for lake in lakes_touching_lake2) == ["Majeur", "Orta"] - assert [lake.name for lake in lakes_containing] == ["Orta"] + for sixes in sixeses: + assert sorted(car.model for car in sixes) == ["Sebring", "Town and Counttry"] From cce9dbb4b3aa7fada827816acc8297f9c1201a14 Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Wed, 1 Sep 2021 09:50:49 -0600 Subject: [PATCH 11/26] fix bigquery version --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index ef7a6f26..94a0373d 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -16,7 +16,7 @@ GeoAlchemy2==0.9.4 geopandas==0.9.0 google-api-core==2.0.0 google-auth==2.0.1 -google-cloud-bigquery==2.25.1 +google-cloud-bigquery==2.25.2 google-cloud-bigquery-storage==2.6.3 google-cloud-core==2.0.0 google-crc32c==1.1.3 From b697df63908bcb9ede445b9f1fcdb430dc0e94a1 Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Wed, 1 Sep 2021 10:52:27 -0600 Subject: [PATCH 12/26] get blacken to leave sample code alone. I want it narrow to avoid horizonal scrolling --- samples/snippets/STRUCT.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/samples/snippets/STRUCT.py b/samples/snippets/STRUCT.py index 5e2e30ee..ce59f90b 100644 --- a/samples/snippets/STRUCT.py +++ b/samples/snippets/STRUCT.py @@ -19,6 +19,7 @@ def example(engine): + # fmt: off # [START bigquery_sqlalchemy_create_table_with_struct] from sqlalchemy.ext.declarative import declarative_base from sqlalchemy import Column, String, Integer, Float @@ -83,6 +84,7 @@ class Car(Base): # [START bigquery_sqlalchemy_query_getitem] sixes = session.query(Car).filter(Car.engine["cylinder"]["count"] == 6) # [END bigquery_sqlalchemy_query_getitem] + # fmt: on sixes3 = list(sixes) return sixes1, sixes2, sixes3 From 6a278b913499b4e1b3b23c7ee1c38331e25a0a99 Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Wed, 1 Sep 2021 10:55:05 -0600 Subject: [PATCH 13/26] Check in missing file :( --- docs/struct.rst | 69 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 docs/struct.rst diff --git a/docs/struct.rst b/docs/struct.rst new file mode 100644 index 00000000..9b2d5724 --- /dev/null +++ b/docs/struct.rst @@ -0,0 +1,69 @@ +Working with BigQuery STRUCT data +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The BigQuery `STRUCT data type +`_ +provided data that are collections of named fields. + +`sqlalchemy-bigquery` provided a STRUCT type that can be used to +define tables with STRUCT columns: + +.. literalinclude:: samples/snippets/STRUCT.py + :language: python + :dedent: 4 + :start-after: [START bigquery_sqlalchemy_create_table_with_struct] + :end-before: [END bigquery_sqlalchemy_create_table_with_struct] + +`STRUCT` types can be nested, as in this example. Struct fields can +be defined in two ways: + +- Fields can be provided as keyword arguments, as in the `cylinder` + and `horsepower` fields in this example. + +- Fields can be provided as name-type tuples provided as positional + arguments, as with the `count` and `compression` fields in this example. + +STRUCT columns are automatically created when existing database tables +containing STRUCT columns are introspected. + +Struct data are represented in Python as Python dictionaries: + +.. literalinclude:: samples/snippets/STRUCT.py + :language: python + :dedent: 4 + :start-after: [START bigquery_sqlalchemy_insert_struct] + :end-before: [END bigquery_sqlalchemy_insert_struct] + +When querying struct fields, you can use attribute access syntax: + +.. literalinclude:: samples/snippets/STRUCT.py + :language: python + :dedent: 4 + :start-after: [START bigquery_sqlalchemy_query_struct] + :end-before: [END bigquery_sqlalchemy_query_struct] + +or mapping access: + +.. literalinclude:: samples/snippets/STRUCT.py + :language: python + :dedent: 4 + :start-after: [START bigquery_sqlalchemy_query_getitem] + :end-before: [END bigquery_sqlalchemy_query_getitem] + +and field names are case insensitive: + +.. literalinclude:: samples/snippets/STRUCT.py + :language: python + :dedent: 4 + :start-after: [START bigquery_sqlalchemy_query_STRUCT] + :end-before: [END bigquery_sqlalchemy_query_STRUCT] + +When using attribute-access syntax, field names may conflict with +column attribute names. For example SQLAlchemy columns have `name` +and `type` attributes, among others. When accessing a field whose name +conflicts with a column attribute name, either use mapping access, or +spell the field name with upper-case letters. + + + + From 84426bd9510970b82b10c2f8b5a064fd87b95b02 Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Wed, 1 Sep 2021 11:24:46 -0600 Subject: [PATCH 14/26] need sqla 1.4 for unnest --- tests/system/test__struct.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/system/test__struct.py b/tests/system/test__struct.py index dfe96c0e..24863056 100644 --- a/tests/system/test__struct.py +++ b/tests/system/test__struct.py @@ -19,6 +19,8 @@ import datetime +import packaging.version +import pytest import sqlalchemy import sqlalchemy_bigquery @@ -124,6 +126,10 @@ def test_complex_literals_pr_67(engine, bigquery_dataset, metadata): # test_unnest_and_struct_access_233 below :) +@pytest.mark.skipif( + packaging.version.parse(sqlalchemy.__version__) < packaging.version.parse("1.4"), + reason="unnest (and other table-valued-function) support required version 1.4", +) def test_unnest_and_struct_access_233(engine, bigquery_dataset, metadata): # https://github.com/googleapis/python-bigquery-sqlalchemy/issues/233 From 587a0f70f904f0c9e56d5d67c8a901c67d0fb3c3 Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Wed, 1 Sep 2021 15:23:53 -0600 Subject: [PATCH 15/26] fixed typo --- docs/alembic.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/alembic.rst b/docs/alembic.rst index e83953a0..8b5df741 100644 --- a/docs/alembic.rst +++ b/docs/alembic.rst @@ -43,7 +43,7 @@ Supported operations: `_ Note that some of the operations above have limited capability, again -do to `BigQuery limitations +due to `BigQuery limitations `_. The `execute` operation allows access to BigQuery-specific From 5bf07b467c0d28d7c6fcd1f915857819d8abd479 Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Tue, 7 Sep 2021 14:54:38 -0600 Subject: [PATCH 16/26] Update sqlalchemy_bigquery/_struct.py Co-authored-by: Tim Swast --- sqlalchemy_bigquery/_struct.py | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/sqlalchemy_bigquery/_struct.py b/sqlalchemy_bigquery/_struct.py index dc5cdd30..d6042a86 100644 --- a/sqlalchemy_bigquery/_struct.py +++ b/sqlalchemy_bigquery/_struct.py @@ -1,16 +1,21 @@ -# Copyright 2021 Google LLC +# Copyright (c) 2021 The sqlalchemy-bigquery Authors # -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at +# Permission is hereby granted, free of charge, to any person obtaining a copy of +# this software and associated documentation files (the "Software"), to deal in +# the Software without restriction, including without limitation the rights to +# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +# the Software, and to permit persons to whom the Software is furnished to do so, +# subject to the following conditions: # -# http://www.apache.org/licenses/LICENSE-2.0 +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. # -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. from typing import Mapping, Tuple From e9371673a31c4a8f8ff9781088a59c4e0b0a3043 Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Tue, 7 Sep 2021 15:01:00 -0600 Subject: [PATCH 17/26] added STRUCT docstring --- sqlalchemy_bigquery/_struct.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sqlalchemy_bigquery/_struct.py b/sqlalchemy_bigquery/_struct.py index dc5cdd30..38e66889 100644 --- a/sqlalchemy_bigquery/_struct.py +++ b/sqlalchemy_bigquery/_struct.py @@ -34,6 +34,12 @@ class STRUCT(sqlalchemy.sql.sqltypes.Indexable, sqlalchemy.types.UserDefinedType): + """ + A type for BigQuery STRUCT/RECORD data + + See https://googleapis.dev/python/sqlalchemy-bigquery/latest/struct.html + """ + def __init__( self, *fields: Tuple[str, sqlalchemy.types.TypeEngine], From 8661f5b66441abc3fb4e8f04a0d26ad639fec5f7 Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Tue, 7 Sep 2021 15:01:32 -0600 Subject: [PATCH 18/26] Add doc link --- sqlalchemy_bigquery/geography.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sqlalchemy_bigquery/geography.py b/sqlalchemy_bigquery/geography.py index 9a10c236..16384dd4 100644 --- a/sqlalchemy_bigquery/geography.py +++ b/sqlalchemy_bigquery/geography.py @@ -95,7 +95,7 @@ class Lake(Base): name = Column(String) geog = column(GEOGRAPHY) - + See https://googleapis.dev/python/sqlalchemy-bigquery/latest/geography.html """ def __init__(self): From af68a54370a78d27f44d0da755ba980aeda34c90 Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Tue, 7 Sep 2021 15:25:29 -0600 Subject: [PATCH 19/26] Added some comments --- sqlalchemy_bigquery/_struct.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sqlalchemy_bigquery/_struct.py b/sqlalchemy_bigquery/_struct.py index 1b4e7beb..c6bd4436 100644 --- a/sqlalchemy_bigquery/_struct.py +++ b/sqlalchemy_bigquery/_struct.py @@ -45,11 +45,16 @@ class STRUCT(sqlalchemy.sql.sqltypes.Indexable, sqlalchemy.types.UserDefinedType See https://googleapis.dev/python/sqlalchemy-bigquery/latest/struct.html """ + # See https://docs.sqlalchemy.org/en/14/core/custom_types.html#creating-new-types + def __init__( self, *fields: Tuple[str, sqlalchemy.types.TypeEngine], **kwfields: Mapping[str, sqlalchemy.types.TypeEngine], ): + # Note that because: + # https://docs.python.org/3/whatsnew/3.6.html#pep-468-preserving-keyword-argument-order + # We know that `kwfields` preserves order. self.__fields = tuple( ( name, From da43fd2a2f63fd7c734b0a2877f4fa3f6edf4e71 Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Wed, 8 Sep 2021 13:01:48 -0600 Subject: [PATCH 20/26] Localize logic for getting subtye column specifications --- sqlalchemy_bigquery/_struct.py | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/sqlalchemy_bigquery/_struct.py b/sqlalchemy_bigquery/_struct.py index c6bd4436..62488c82 100644 --- a/sqlalchemy_bigquery/_struct.py +++ b/sqlalchemy_bigquery/_struct.py @@ -34,8 +34,13 @@ import sqlalchemy.sql.coercions import sqlalchemy.sql.roles -# We have to delay getting the type compiler, because of circular imports. :( -type_compiler = None + +def _get_subtype_col_spec(type_): + global _get_subtype_col_spec + + type_compiler = base.dialect.type_compiler(base.dialect()) + _get_subtype_col_spec = type_compiler.process + return _get_subtype_col_spec(type_) class STRUCT(sqlalchemy.sql.sqltypes.Indexable, sqlalchemy.types.UserDefinedType): @@ -70,15 +75,9 @@ def __repr__(self): return f"STRUCT({fields})" def get_col_spec(self, **kw): - global type_compiler - - try: - process = type_compiler.process - except AttributeError: - type_compiler = base.dialect.type_compiler(base.dialect()) - process = type_compiler.process - - fields = ", ".join(f"{name} {process(type_)}" for name, type_ in self.__fields) + fields = ", ".join( + f"{name} {_get_subtype_col_spec(type_)}" for name, type_ in self.__fields + ) return f"STRUCT<{fields}>" def bind_processor(self, dialect): From f04cac275e155561f19d8f9fc4ac46df4eab8bda Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Wed, 8 Sep 2021 13:08:19 -0600 Subject: [PATCH 21/26] explain semi-private name mangling --- sqlalchemy_bigquery/_struct.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sqlalchemy_bigquery/_struct.py b/sqlalchemy_bigquery/_struct.py index 62488c82..f6f410fa 100644 --- a/sqlalchemy_bigquery/_struct.py +++ b/sqlalchemy_bigquery/_struct.py @@ -90,6 +90,7 @@ def _setup_getitem(self, name): f"STRUCT fields can only be accessed with strings field names," f" not {name}." ) + # Note that type._STRUCT__byname is accessing the __byname private variable. subtype = self.expr.type._STRUCT__byname.get(name.lower()) if subtype is None: raise KeyError(name) From 5af05bbca557f8a6b8f7972990b4ab6b13d1f9f3 Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Wed, 8 Sep 2021 13:12:37 -0600 Subject: [PATCH 22/26] Make name magling more explicit --- sqlalchemy_bigquery/_struct.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/sqlalchemy_bigquery/_struct.py b/sqlalchemy_bigquery/_struct.py index f6f410fa..b0c394a2 100644 --- a/sqlalchemy_bigquery/_struct.py +++ b/sqlalchemy_bigquery/_struct.py @@ -60,7 +60,7 @@ def __init__( # Note that because: # https://docs.python.org/3/whatsnew/3.6.html#pep-468-preserving-keyword-argument-order # We know that `kwfields` preserves order. - self.__fields = tuple( + self._STRUCT_fields = tuple( ( name, type_ if isinstance(type_, sqlalchemy.types.TypeEngine) else type_(), @@ -68,15 +68,20 @@ def __init__( for (name, type_) in (fields + tuple(kwfields.items())) ) - self.__byname = {name.lower(): type_ for (name, type_) in self.__fields} + self._STRUCT_byname = { + name.lower(): type_ for (name, type_) in self._STRUCT_fields + } def __repr__(self): - fields = ", ".join(f"{name}={repr(type_)}" for name, type_ in self.__fields) + fields = ", ".join( + f"{name}={repr(type_)}" for name, type_ in self._STRUCT_fields + ) return f"STRUCT({fields})" def get_col_spec(self, **kw): fields = ", ".join( - f"{name} {_get_subtype_col_spec(type_)}" for name, type_ in self.__fields + f"{name} {_get_subtype_col_spec(type_)}" + for name, type_ in self._STRUCT_fields ) return f"STRUCT<{fields}>" @@ -90,8 +95,7 @@ def _setup_getitem(self, name): f"STRUCT fields can only be accessed with strings field names," f" not {name}." ) - # Note that type._STRUCT__byname is accessing the __byname private variable. - subtype = self.expr.type._STRUCT__byname.get(name.lower()) + subtype = self.expr.type._STRUCT_byname.get(name.lower()) if subtype is None: raise KeyError(name) operator = struct_getitem_op @@ -99,7 +103,7 @@ def _setup_getitem(self, name): return operator, index, subtype def __getattr__(self, name): - if name.lower() in self.expr.type._STRUCT__byname: + if name.lower() in self.expr.type._STRUCT_byname: return self[name] comparator_factory = Comparator From 09866c6040f39b275f30ff8c8d44f3469d99e010 Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Wed, 8 Sep 2021 13:15:45 -0600 Subject: [PATCH 23/26] explain why we have different implementations of _field_index for SQLAlchemy 1.3 and 1/4 --- sqlalchemy_bigquery/_struct.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sqlalchemy_bigquery/_struct.py b/sqlalchemy_bigquery/_struct.py index b0c394a2..c4499d0b 100644 --- a/sqlalchemy_bigquery/_struct.py +++ b/sqlalchemy_bigquery/_struct.py @@ -109,6 +109,10 @@ def __getattr__(self, name): comparator_factory = Comparator +# In the implementations of _field_index below, we're stealing from +# the JSON type implementation, but the code to steal changed in +# 1.4. :/ + if sqlalchemy_1_4_or_more: def _field_index(self, name, operator): From 054c227a285f6d18a380e2c005eb0c05b977ac39 Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Wed, 8 Sep 2021 13:30:50 -0600 Subject: [PATCH 24/26] get rid of cur_fields, we're not using it anymore. Also, check for both RECORD and STRUCT fild types, in case the API ever starts returning STRUCT. --- sqlalchemy_bigquery/_types.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/sqlalchemy_bigquery/_types.py b/sqlalchemy_bigquery/_types.py index 5fac827c..4e18dc2a 100644 --- a/sqlalchemy_bigquery/_types.py +++ b/sqlalchemy_bigquery/_types.py @@ -72,8 +72,10 @@ except NameError: pass +STRUCT_FIELD_TYPES = "RECORD", "STRUCT" -def _get_transitive_schema_fields(fields, cur_fields): + +def _get_transitive_schema_fields(fields): """ Recurse into record type and return all the nested field names. As contributed by @sumedhsakdeo on issue #17 @@ -81,16 +83,14 @@ def _get_transitive_schema_fields(fields, cur_fields): results = [] for field in fields: results += [field] - if field.field_type == "RECORD": - cur_fields.append(field) + if field.field_type in STRUCT_FIELD_TYPES: sub_fields = [ SchemaField.from_api_repr( dict(f.to_api_repr(), name=f"{field.name}.{f.name}") ) for f in field.fields ] - results += _get_transitive_schema_fields(sub_fields, cur_fields) - cur_fields.pop() + results += _get_transitive_schema_fields(sub_fields) return results @@ -125,7 +125,7 @@ def _get_sqla_column_type(field): def get_columns(bq_schema): - fields = _get_transitive_schema_fields(bq_schema, []) + fields = _get_transitive_schema_fields(bq_schema) return [ { "name": field.name, From 1a79305a4fee3d7caae2c2b78d7223ee136a3165 Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Wed, 8 Sep 2021 13:39:40 -0600 Subject: [PATCH 25/26] Add a todo to find out why Sqlalchemy doesn't generate an alias when accessing array items --- tests/unit/test__struct.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/unit/test__struct.py b/tests/unit/test__struct.py index c4b2f61f..ee096fb5 100644 --- a/tests/unit/test__struct.py +++ b/tests/unit/test__struct.py @@ -64,8 +64,10 @@ def _col(): (_col().NAME, "`t`.`person`.NAME"), (_col().children, "`t`.`person`.children"), ( - _col().children[0].label("anon_1"), # SQLAlchemy doesn't add the label - # in this case for some reason + # SQLAlchemy doesn't add the label in this case for some reason. + # TODO: why? + # https://github.com/googleapis/python-bigquery-sqlalchemy/issues/336 + _col().children[0].label("anon_1"), "(`t`.`person`.children)[OFFSET(%(param_1:INT64)s)]", ), ( From 5e2ae32b003edf10e9d4464a0353e2bb9c85dfec Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Wed, 8 Sep 2021 13:40:59 -0600 Subject: [PATCH 26/26] user `repr` rather than `str` to shpow an object in an error message Co-authored-by: Tim Swast --- sqlalchemy_bigquery/_struct.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sqlalchemy_bigquery/_struct.py b/sqlalchemy_bigquery/_struct.py index c4499d0b..a3d9aba4 100644 --- a/sqlalchemy_bigquery/_struct.py +++ b/sqlalchemy_bigquery/_struct.py @@ -93,7 +93,7 @@ def _setup_getitem(self, name): if not isinstance(name, str): raise TypeError( f"STRUCT fields can only be accessed with strings field names," - f" not {name}." + f" not {repr(name)}." ) subtype = self.expr.type._STRUCT_byname.get(name.lower()) if subtype is None: