Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 6 additions & 9 deletions bigframes/core/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,8 +157,8 @@ def project_unary_op(
) -> ArrayValue:
"""Creates a new expression based on this expression with unary operation applied to one column."""
return ArrayValue(
nodes.ProjectUnaryOpNode(
child=self.node, input_id=column_name, op=op, output_id=output_name
nodes.ProjectRowOpNode(
child=self.node, input_ids=(column_name,), op=op, output_id=output_name
)
)

Expand All @@ -171,10 +171,9 @@ def project_binary_op(
) -> ArrayValue:
"""Creates a new expression based on this expression with binary operation applied to two columns."""
return ArrayValue(
nodes.ProjectBinaryOpNode(
nodes.ProjectRowOpNode(
child=self.node,
left_input_id=left_column_id,
right_input_id=right_column_id,
input_ids=(left_column_id, right_column_id),
op=op,
output_id=output_column_id,
)
Expand All @@ -190,11 +189,9 @@ def project_ternary_op(
) -> ArrayValue:
"""Creates a new expression based on this expression with ternary operation applied to three columns."""
return ArrayValue(
nodes.ProjectTernaryOpNode(
nodes.ProjectRowOpNode(
child=self.node,
input_id1=col_id_1,
input_id2=col_id_2,
input_id3=col_id_3,
input_ids=(col_id_1, col_id_2, col_id_3),
op=op,
output_id=output_column_id,
)
Expand Down
2 changes: 1 addition & 1 deletion bigframes/core/block_transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def equals(block1: blocks.Block, block2: blocks.Block) -> bool:
lcolmapped = lmap[lcol]
rcolmapped = rmap[rcol]
joined_block, result_id = joined_block.apply_binary_op(
lcolmapped, rcolmapped, ops.eq_nulls_match_op
lcolmapped, rcolmapped, ops.eq_null_match_op
)
joined_block, result_id = joined_block.apply_unary_op(
result_id, ops.partial_right(ops.fillna_op, False)
Expand Down
12 changes: 6 additions & 6 deletions bigframes/core/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -581,12 +581,12 @@ def _split(
# Create an ordering col and convert to string
block, ordering_col = block.promote_offsets()
block, string_ordering_col = block.apply_unary_op(
ordering_col, ops.AsTypeOp("string[pyarrow]")
ordering_col, ops.AsTypeOp(to_type="string[pyarrow]")
)

# Apply hash method to sum col and order by it.
block, string_sum_col = block.apply_binary_op(
string_ordering_col, random_state_col, ops.concat_op
string_ordering_col, random_state_col, ops.strconcat_op
)
block, hash_string_sum_col = block.apply_unary_op(string_sum_col, ops.hash_op)
block = block.order_by([ordering.OrderingColumnReference(hash_string_sum_col)])
Expand Down Expand Up @@ -1232,8 +1232,8 @@ def add_prefix(self, prefix: str, axis: str | int | None = None) -> Block:
if axis_number == 0:
expr = self._expr
for index_col in self._index_columns:
expr = expr.project_unary_op(index_col, ops.AsTypeOp("string"))
prefix_op = ops.BinopPartialLeft(ops.add_op, prefix)
expr = expr.project_unary_op(index_col, ops.AsTypeOp(to_type="string"))
prefix_op = ops.ApplyLeft(base_op=ops.add_op, left_scalar=prefix)
expr = expr.project_unary_op(index_col, prefix_op)
return Block(
expr,
Expand All @@ -1251,8 +1251,8 @@ def add_suffix(self, suffix: str, axis: str | int | None = None) -> Block:
if axis_number == 0:
expr = self._expr
for index_col in self._index_columns:
expr = expr.project_unary_op(index_col, ops.AsTypeOp("string"))
prefix_op = ops.BinopPartialRight(ops.add_op, suffix)
expr = expr.project_unary_op(index_col, ops.AsTypeOp(to_type="string"))
prefix_op = ops.ApplyRight(base_op=ops.add_op, right_scalar=suffix)
expr = expr.project_unary_op(index_col, prefix_op)
return Block(
expr,
Expand Down
57 changes: 17 additions & 40 deletions bigframes/core/compile/compiled.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import pandas

import bigframes.constants as constants
import bigframes.core.compile.scalar_op_compiler as op_compilers
import bigframes.core.guid
from bigframes.core.ordering import (
encode_order_string,
Expand All @@ -43,8 +44,11 @@
ORDER_ID_COLUMN = "bigframes_ordering_id"
PREDICATE_COLUMN = "bigframes_predicate"


T = typing.TypeVar("T", bound="BaseIbisIR")

op_compiler = op_compilers.scalar_op_compiler


class BaseIbisIR(abc.ABC):
"""Implementation detail, contains common logic between ordered and unordered IR"""
Expand Down Expand Up @@ -147,49 +151,20 @@ def _reproject_to_table(self: T) -> T:
"""
...

def project_unary_op(
def project_row_op(
self: T,
input_column_id: str,
op: ops.UnaryOp,
input_column_ids: typing.Sequence[str],
op: ops.RowOp,
output_column_id: typing.Optional[str] = None,
) -> T:
"""Creates a new expression based on this expression with unary operation applied to one column."""
result_id = (
output_column_id or input_column_id
output_column_id or input_column_ids[0]
) # overwrite input if not output id provided
value = op._as_ibis(self._get_ibis_column(input_column_id)).name(result_id)
inputs = tuple(self._get_ibis_column(col) for col in input_column_ids)
value = op_compiler.compile_row_op(op, inputs).name(result_id)
return self._set_or_replace_by_id(result_id, value)

def project_binary_op(
self: T,
left_column_id: str,
right_column_id: str,
op: ops.BinaryOp,
output_column_id: str,
) -> T:
"""Creates a new expression based on this expression with binary operation applied to two columns."""
value = op(
self._get_ibis_column(left_column_id),
self._get_ibis_column(right_column_id),
).name(output_column_id)
return self._set_or_replace_by_id(output_column_id, value)

def project_ternary_op(
self: T,
col_id_1: str,
col_id_2: str,
col_id_3: str,
op: ops.TernaryOp,
output_column_id: str,
) -> T:
"""Creates a new expression based on this expression with ternary operation applied to three columns."""
value = op(
self._get_ibis_column(col_id_1),
self._get_ibis_column(col_id_2),
self._get_ibis_column(col_id_3),
).name(output_column_id)
return self._set_or_replace_by_id(output_column_id, value)

def assign(self: T, source_id: str, destination_id: str) -> T:
return self._set_or_replace_by_id(
destination_id, self._get_ibis_column(source_id)
Expand Down Expand Up @@ -454,7 +429,9 @@ def unpivot(
None, force_dtype=col_dtype
)
ibis_values = [
ops.AsTypeOp(col_dtype)._as_ibis(unpivot_table[col])
op_compiler.compile_row_op(
ops.AsTypeOp(col_dtype), (unpivot_table[col],)
)
if col is not None
else null_value
for col in source_cols
Expand Down Expand Up @@ -521,9 +498,7 @@ def aggregate(
expr = OrderedIR(result, columns=columns, ordering=ordering)
if dropna:
for column_id in by_column_ids:
expr = expr._filter(
ops.notnull_op._as_ibis(expr._get_ibis_column(column_id))
)
expr = expr._filter(expr._get_ibis_column(column_id).notnull())
# Can maybe remove this as Ordering id is redundant as by_column is unique after aggregation
return expr._project_offsets()
else:
Expand Down Expand Up @@ -982,7 +957,9 @@ def unpivot(
None, force_dtype=col_dtype
)
ibis_values = [
ops.AsTypeOp(col_dtype)._as_ibis(unpivot_table[col])
op_compiler.compile_row_op(
ops.AsTypeOp(col_dtype), (unpivot_table[col],)
)
if col is not None
else null_value
for col in source_cols
Expand Down
20 changes: 3 additions & 17 deletions bigframes/core/compile/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,23 +143,9 @@ def compile_reversed(node: nodes.ReversedNode, ordered: bool = True):


@_compile_node.register
def compile_project_unary(node: nodes.ProjectUnaryOpNode, ordered: bool = True):
return compile_node(node.child, ordered).project_unary_op(
node.input_id, node.op, node.output_id
)


@_compile_node.register
def compile_project_binary(node: nodes.ProjectBinaryOpNode, ordered: bool = True):
return compile_node(node.child, ordered).project_binary_op(
node.left_input_id, node.right_input_id, node.op, node.output_id
)


@_compile_node.register
def compile_project_ternary(node: nodes.ProjectTernaryOpNode, ordered: bool = True):
return compile_node(node.child, ordered).project_ternary_op(
node.input_id1, node.input_id2, node.input_id3, node.op, node.output_id
def compile_project(node: nodes.ProjectRowOpNode, ordered: bool = True):
return compile_node(node.child, ordered).project_row_op(
node.input_ids, node.op, node.output_id
)


Expand Down
Loading