Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 4 additions & 42 deletions bigframes/core/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,14 @@

import bigframes.core.compile.compiled as compiled
import bigframes.core.compile.compiler as compiler
import bigframes.core.expression as expressions
import bigframes.core.guid
import bigframes.core.nodes as nodes
from bigframes.core.ordering import OrderingColumnReference
import bigframes.core.ordering as orderings
import bigframes.core.utils
from bigframes.core.window_spec import WindowSpec
import bigframes.dtypes
import bigframes.operations as ops
import bigframes.operations.aggregations as agg_ops
import bigframes.session._io.bigquery

Expand Down Expand Up @@ -152,48 +152,10 @@ def concat(self, other: typing.Sequence[ArrayValue]) -> ArrayValue:
nodes.ConcatNode(children=tuple([self.node, *[val.node for val in other]]))
)

def project_unary_op(
self, column_name: str, op: ops.UnaryOp, output_name=None
) -> ArrayValue:
"""Creates a new expression based on this expression with unary operation applied to one column."""
def project(self, expression: expressions.Expression, output_id: str):
return ArrayValue(
nodes.ProjectRowOpNode(
child=self.node, input_ids=(column_name,), op=op, output_id=output_name
)
)

def project_binary_op(
self,
left_column_id: str,
right_column_id: str,
op: ops.BinaryOp,
output_column_id: str,
) -> ArrayValue:
"""Creates a new expression based on this expression with binary operation applied to two columns."""
return ArrayValue(
nodes.ProjectRowOpNode(
child=self.node,
input_ids=(left_column_id, right_column_id),
op=op,
output_id=output_column_id,
)
)

def project_ternary_op(
self,
col_id_1: str,
col_id_2: str,
col_id_3: str,
op: ops.TernaryOp,
output_column_id: str,
) -> ArrayValue:
"""Creates a new expression based on this expression with ternary operation applied to three columns."""
return ArrayValue(
nodes.ProjectRowOpNode(
child=self.node,
input_ids=(col_id_1, col_id_2, col_id_3),
op=op,
output_id=output_column_id,
nodes.ProjectionNode(
child=self.node, assignments=((expression, output_id),)
)
)

Expand Down
36 changes: 22 additions & 14 deletions bigframes/core/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -669,7 +669,7 @@ def apply_unary_op(
"""
# TODO(tbergeron): handle labels safely so callers don't need to
result_id = guid.generate_guid()
expr = self._expr.project_unary_op(column, op, result_id)
expr = self._expr.project(op.as_expr(column), result_id)
block = Block(
expr,
index_columns=self.index_columns,
Expand All @@ -686,8 +686,8 @@ def apply_binary_op(
result_label: Label = None,
) -> typing.Tuple[Block, str]:
result_id = guid.generate_guid()
expr = self._expr.project_binary_op(
left_column_id, right_column_id, op, result_id
expr = self._expr.project(
op.as_expr(left_column_id, right_column_id), result_id
)
block = Block(
expr,
Expand All @@ -706,9 +706,7 @@ def apply_ternary_op(
result_label: Label = None,
) -> typing.Tuple[Block, str]:
result_id = guid.generate_guid()
expr = self._expr.project_ternary_op(
col_id_1, col_id_2, col_id_3, op, result_id
)
expr = self._expr.project(op.as_expr(col_id_1, col_id_2, col_id_3), result_id)
block = Block(
expr,
index_columns=self.index_columns,
Expand Down Expand Up @@ -1240,9 +1238,14 @@ def add_prefix(self, prefix: str, axis: str | int | None = None) -> Block:
if axis_number == 0:
expr = self._expr
for index_col in self._index_columns:
expr = expr.project_unary_op(index_col, ops.AsTypeOp(to_type="string"))
expr = expr.project(
expression=ops.AsTypeOp(to_type="string").as_expr(index_col),
output_id=index_col,
)
prefix_op = ops.ApplyLeft(base_op=ops.add_op, left_scalar=prefix)
expr = expr.project_unary_op(index_col, prefix_op)
expr = expr.project(
expression=prefix_op.as_expr(index_col), output_id=index_col
)
return Block(
expr,
index_columns=self.index_columns,
Expand All @@ -1259,9 +1262,14 @@ def add_suffix(self, suffix: str, axis: str | int | None = None) -> Block:
if axis_number == 0:
expr = self._expr
for index_col in self._index_columns:
expr = expr.project_unary_op(index_col, ops.AsTypeOp(to_type="string"))
expr = expr.project(
expression=ops.AsTypeOp(to_type="string").as_expr(index_col),
output_id=index_col,
)
prefix_op = ops.ApplyRight(base_op=ops.add_op, right_scalar=suffix)
expr = expr.project_unary_op(index_col, prefix_op)
expr = expr.project(
expression=prefix_op.as_expr(index_col), output_id=index_col
)
return Block(
expr,
index_columns=self.index_columns,
Expand Down Expand Up @@ -1568,10 +1576,10 @@ def merge(
coalesced_ids = []
for left_id, right_id in zip(left_join_ids, right_join_ids):
coalesced_id = guid.generate_guid()
joined_expr = joined_expr.project_binary_op(
get_column_left[left_id],
get_column_right[right_id],
ops.coalesce_op,
joined_expr = joined_expr.project(
ops.coalesce_op.as_expr(
get_column_left[left_id], get_column_right[right_id]
),
coalesced_id,
)
coalesced_ids.append(coalesced_id)
Expand Down
16 changes: 9 additions & 7 deletions bigframes/core/compile/compiled.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@

import bigframes.constants as constants
import bigframes.core.compile.scalar_op_compiler as op_compilers
import bigframes.core.expression as expressions
import bigframes.core.guid
from bigframes.core.ordering import (
encode_order_string,
Expand Down Expand Up @@ -151,18 +152,19 @@ def _reproject_to_table(self: T) -> T:
"""
...

def project_row_op(
def project_expression(
self: T,
input_column_ids: typing.Sequence[str],
op: ops.RowOp,
expression: expressions.Expression,
output_column_id: typing.Optional[str] = None,
) -> T:
"""Creates a new expression based on this expression with unary operation applied to one column."""
"""Apply an expression to the ArrayValue and assign the output to a column."""
result_id = (
output_column_id or input_column_ids[0]
output_column_id or expression.unbound_variables[0]
) # overwrite input if not output id provided
inputs = tuple(self._get_ibis_column(col) for col in input_column_ids)
value = op_compiler.compile_row_op(op, inputs).name(result_id)
bindings = {
col: self._get_ibis_column(col) for col in expression.unbound_variables
}
value = op_compiler.compile_expression(expression, bindings).name(result_id)
return self._set_or_replace_by_id(result_id, value)

def assign(self: T, source_id: str, destination_id: str) -> T:
Expand Down
9 changes: 5 additions & 4 deletions bigframes/core/compile/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,10 +143,11 @@ def compile_reversed(node: nodes.ReversedNode, ordered: bool = True):


@_compile_node.register
def compile_project(node: nodes.ProjectRowOpNode, ordered: bool = True):
return compile_node(node.child, ordered).project_row_op(
node.input_ids, node.op, node.output_id
)
def compile_projection(node: nodes.ProjectionNode, ordered: bool = True):
result = compile_node(node.child, ordered)
for expr, id in node.assignments:
result = result.project_expression(expr, id)
return result


@_compile_node.register
Expand Down
40 changes: 40 additions & 0 deletions bigframes/core/compile/scalar_op_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import pandas as pd

import bigframes.constants as constants
import bigframes.core.expression as expressions
import bigframes.dtypes
import bigframes.dtypes as dtypes
import bigframes.operations as ops
Expand All @@ -50,6 +51,45 @@ class ScalarOpCompiler:
],
] = {}

@functools.singledispatchmethod
def compile_expression(
self,
expression: expressions.Expression,
bindings: typing.Dict[str, ibis_types.Value],
) -> ibis_types.Value:
raise NotImplementedError(f"Unrecognized expression: {expression}")

@compile_expression.register
def _(
self,
expression: expressions.ScalarConstantExpression,
bindings: typing.Dict[str, ibis_types.Value],
) -> ibis_types.Value:
return ibis.literal(expression.value)

@compile_expression.register
def _(
self,
expression: expressions.UnboundVariableExpression,
bindings: typing.Dict[str, ibis_types.Value],
) -> ibis_types.Value:
if expression.id not in bindings:
raise ValueError(f"Could not resolve unbound variable {expression.id}")
else:
return bindings[expression.id]

@compile_expression.register
def _(
self,
expression: expressions.OpExpression,
bindings: typing.Dict[str, ibis_types.Value],
) -> ibis_types.Value:
inputs = [
self.compile_expression(sub_expr, bindings)
for sub_expr in expression.inputs
]
return self.compile_row_op(expression.op, inputs)

def compile_row_op(
self, op: ops.RowOp, inputs: typing.Sequence[ibis_types.Value]
) -> ibis_types.Value:
Expand Down
69 changes: 69 additions & 0 deletions bigframes/core/expression.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import annotations
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

plz add docs for the file and for each of the classes. Abstractions are hard to understand.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added some docstrings


import abc
import dataclasses
import itertools
import typing

import bigframes.operations


@dataclasses.dataclass(frozen=True)
class Expression(abc.ABC):
"""An expression represents a computation taking N scalar inputs and producing a single output scalar."""

@property
def unbound_variables(self) -> typing.Tuple[str, ...]:
return ()


@dataclasses.dataclass(frozen=True)
class ScalarConstantExpression(Expression):
"""An expression representing a scalar constant."""

# TODO: Further constrain?
value: typing.Hashable


@dataclasses.dataclass(frozen=True)
class UnboundVariableExpression(Expression):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What does unbound mean?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Means its a "free" variable. For instance, in the expression "x + y + 3/x", the variables "x" and "y" are unbound.

"""A variable expression representing an unbound variable."""

id: str

@property
def unbound_variables(self) -> typing.Tuple[str, ...]:
return (self.id,)


@dataclasses.dataclass(frozen=True)
class OpExpression(Expression):
"""An expression representing a scalar operation applied to 1 or more argument sub-expressions."""

op: bigframes.operations.RowOp
inputs: typing.Tuple[Expression, ...]

def __post_init__(self):
assert self.op.arguments == len(self.inputs)

@property
def unbound_variables(self) -> typing.Tuple[str, ...]:
return tuple(
itertools.chain.from_iterable(
map(lambda x: x.unbound_variables, self.inputs)
)
)
4 changes: 2 additions & 2 deletions bigframes/core/indexes/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -604,8 +604,8 @@ def coalesce_columns(
expr = expr.drop_columns([left_id])
elif how == "outer":
coalesced_id = bigframes.core.guid.generate_guid()
expr = expr.project_binary_op(
left_id, right_id, ops.coalesce_op, coalesced_id
expr = expr.project(
ops.coalesce_op.as_expr(left_id, right_id), coalesced_id
)
expr = expr.drop_columns([left_id, right_id])
result_ids.append(coalesced_id)
Expand Down
10 changes: 4 additions & 6 deletions bigframes/core/nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,15 @@
from dataclasses import dataclass, field, fields
import functools
import typing
from typing import Optional, Tuple
from typing import Tuple

import pandas

import bigframes.core.expression as expressions
import bigframes.core.guid
from bigframes.core.ordering import OrderingColumnReference
import bigframes.core.window_spec as window
import bigframes.dtypes
import bigframes.operations as ops
import bigframes.operations.aggregations as agg_ops

if typing.TYPE_CHECKING:
Expand Down Expand Up @@ -196,10 +196,8 @@ def __hash__(self):


@dataclass(frozen=True)
class ProjectRowOpNode(UnaryNode):
input_ids: typing.Tuple[str, ...]
op: ops.RowOp
output_id: Optional[str] = None
class ProjectionNode(UnaryNode):
assignments: typing.Tuple[typing.Tuple[expressions.Expression, str], ...]

def __hash__(self):
return self._node_hash
Expand Down
Loading