Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
124 changes: 91 additions & 33 deletions bigframes/core/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
import pandas

import bigframes.core.compile as compiling
import bigframes.core.expression as expressions
import bigframes.core.expression as ex
import bigframes.core.guid
import bigframes.core.nodes as nodes
from bigframes.core.ordering import OrderingColumnReference
Expand Down Expand Up @@ -114,12 +114,6 @@ def row_count(self) -> ArrayValue:
return ArrayValue(nodes.RowCountNode(child=self.node))

# Operations

def drop_columns(self, columns: Iterable[str]) -> ArrayValue:
return ArrayValue(
nodes.DropColumnsNode(child=self.node, columns=tuple(columns))
)

def filter(self, predicate_id: str, keep_null: bool = False) -> ArrayValue:
"""Filter the table on a given expression, the predicate must be a boolean series aligned with the table expression."""
return ArrayValue(
Expand All @@ -140,21 +134,104 @@ def promote_offsets(self, col_id: str) -> ArrayValue:
"""
return ArrayValue(nodes.PromoteOffsetsNode(child=self.node, col_id=col_id))

def select_columns(self, column_ids: typing.Sequence[str]) -> ArrayValue:
return ArrayValue(
nodes.SelectNode(child=self.node, column_ids=tuple(column_ids))
)

def concat(self, other: typing.Sequence[ArrayValue]) -> ArrayValue:
"""Append together multiple ArrayValue objects."""
return ArrayValue(
nodes.ConcatNode(children=tuple([self.node, *[val.node for val in other]]))
)

def project(self, expression: expressions.Expression, output_id: str):
def project_to_id(self, expression: ex.Expression, output_id: str):
if output_id in self.column_ids: # Mutate case
exprs = [
((expression if (col_id == output_id) else ex.free_var(col_id)), col_id)
for col_id in self.column_ids
]
else: # append case
self_projection = (
(ex.free_var(col_id), col_id) for col_id in self.column_ids
)
exprs = [*self_projection, (expression, output_id)]
return ArrayValue(
nodes.ProjectionNode(
child=self.node,
assignments=tuple(exprs),
)
)

def assign(self, source_id: str, destination_id: str) -> ArrayValue:
if destination_id in self.column_ids: # Mutate case
exprs = [
(
(
ex.free_var(source_id)
if (col_id == destination_id)
else ex.free_var(col_id)
),
col_id,
)
for col_id in self.column_ids
]
else: # append case
self_projection = (
(ex.free_var(col_id), col_id) for col_id in self.column_ids
)
exprs = [*self_projection, (ex.free_var(source_id), destination_id)]
return ArrayValue(
nodes.ProjectionNode(
child=self.node,
assignments=tuple(exprs),
)
)

def assign_constant(
self,
destination_id: str,
value: typing.Any,
dtype: typing.Optional[bigframes.dtypes.Dtype],
) -> ArrayValue:
if destination_id in self.column_ids: # Mutate case
exprs = [
(
(
ex.const(value, dtype)
if (col_id == destination_id)
else ex.free_var(col_id)
),
col_id,
)
for col_id in self.column_ids
]
else: # append case
self_projection = (
(ex.free_var(col_id), col_id) for col_id in self.column_ids
)
exprs = [*self_projection, (ex.const(value, dtype), destination_id)]
return ArrayValue(
nodes.ProjectionNode(
child=self.node,
assignments=tuple(exprs),
)
)

def select_columns(self, column_ids: typing.Sequence[str]) -> ArrayValue:
selections = ((ex.free_var(col_id), col_id) for col_id in column_ids)
return ArrayValue(
nodes.ProjectionNode(
child=self.node,
assignments=tuple(selections),
)
)

def drop_columns(self, columns: Iterable[str]) -> ArrayValue:
new_projection = (
(ex.free_var(col_id), col_id)
for col_id in self.column_ids
if col_id not in columns
)
return ArrayValue(
nodes.ProjectionNode(
child=self.node, assignments=((expression, output_id),)
child=self.node,
assignments=tuple(new_projection),
)
)

Expand Down Expand Up @@ -277,25 +354,6 @@ def unpivot(
)
)

def assign(self, source_id: str, destination_id: str) -> ArrayValue:
return ArrayValue(
nodes.AssignNode(
child=self.node, source_id=source_id, destination_id=destination_id
)
)

def assign_constant(
self,
destination_id: str,
value: typing.Any,
dtype: typing.Optional[bigframes.dtypes.Dtype],
) -> ArrayValue:
return ArrayValue(
nodes.AssignConstantNode(
child=self.node, destination_id=destination_id, value=value, dtype=dtype
)
)

def join(
self,
self_column_ids: typing.Sequence[str],
Expand Down
24 changes: 12 additions & 12 deletions bigframes/core/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -671,7 +671,7 @@ def project_expr(
"""
# TODO(tbergeron): handle labels safely so callers don't need to
result_id = guid.generate_guid()
array_val = self._expr.project(expr, result_id)
array_val = self._expr.project_to_id(expr, result_id)
block = Block(
array_val,
index_columns=self.index_columns,
Expand Down Expand Up @@ -1226,11 +1226,11 @@ def add_prefix(self, prefix: str, axis: str | int | None = None) -> Block:
if axis_number == 0:
expr = self._expr
for index_col in self._index_columns:
add_prefix = ops.add_op.as_expr(
ex.const(prefix), ops.AsTypeOp(to_type="string").as_expr(index_col)
)
expr = expr.project(
expression=add_prefix,
expr = expr.project_to_id(
expression=ops.add_op.as_expr(
ex.const(prefix),
ops.AsTypeOp(to_type="string").as_expr(index_col),
),
output_id=index_col,
)
return Block(
Expand All @@ -1249,11 +1249,11 @@ def add_suffix(self, suffix: str, axis: str | int | None = None) -> Block:
if axis_number == 0:
expr = self._expr
for index_col in self._index_columns:
add_suffix = ops.add_op.as_expr(
ops.AsTypeOp(to_type="string").as_expr(index_col), ex.const(suffix)
)
expr = expr.project(
expression=add_suffix,
expr = expr.project_to_id(
expression=ops.add_op.as_expr(
ops.AsTypeOp(to_type="string").as_expr(index_col),
ex.const(suffix),
),
output_id=index_col,
)
return Block(
Expand Down Expand Up @@ -1557,7 +1557,7 @@ def merge(
coalesced_ids = []
for left_id, right_id in zip(left_join_ids, right_join_ids):
coalesced_id = guid.generate_guid()
joined_expr = joined_expr.project(
joined_expr = joined_expr.project_to_id(
ops.coalesce_op.as_expr(
get_column_left[left_id], get_column_right[right_id]
),
Expand Down
107 changes: 44 additions & 63 deletions bigframes/core/compile/compiled.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,8 @@
import ibis.expr.types as ibis_types
import pandas

import bigframes.constants as constants
import bigframes.core.compile.scalar_op_compiler as op_compilers
import bigframes.core.expression as expressions
import bigframes.core.expression as ex
import bigframes.core.guid
from bigframes.core.ordering import (
encode_order_string,
Expand Down Expand Up @@ -96,16 +95,6 @@ def _reduced_predicate(self) -> typing.Optional[ibis_types.BooleanValue]:
else None
)

@abc.abstractmethod
def select_columns(self: T, column_ids: typing.Sequence[str]) -> T:
"""Creates a new expression based on this expression with new columns."""
...

def drop_columns(self: T, columns: Iterable[str]) -> T:
return self.select_columns(
[col for col in self.column_ids if col not in columns]
)

@abc.abstractmethod
def filter(self: T, predicate_id: str, keep_null: bool = False) -> T:
"""Filter the table on a given expression, the predicate must be a boolean series aligned with the table expression."""
Expand Down Expand Up @@ -152,40 +141,26 @@ def _reproject_to_table(self: T) -> T:
"""
...

def project_expression(
def projection(
self: T,
expression: expressions.Expression,
output_column_id: typing.Optional[str] = None,
expression_id_pairs: typing.Tuple[typing.Tuple[ex.Expression, str], ...],
) -> T:
"""Apply an expression to the ArrayValue and assign the output to a column."""
result_id = (
output_column_id or expression.unbound_variables[0]
) # overwrite input if not output id provided
bindings = {
col: self._get_ibis_column(col) for col in expression.unbound_variables
}
value = op_compiler.compile_expression(expression, bindings).name(result_id)
return self._set_or_replace_by_id(result_id, value)
bindings = {col: self._get_ibis_column(col) for col in self.column_ids}
values = [
op_compiler.compile_expression(expression, bindings).name(id)
for expression, id in expression_id_pairs
]
result = self._select(tuple(values)) # type: ignore

def assign(self: T, source_id: str, destination_id: str) -> T:
return self._set_or_replace_by_id(
destination_id, self._get_ibis_column(source_id)
)
# Need to reproject to convert ibis Scalar to ibis Column object
if any(exp_id[0].is_const for exp_id in expression_id_pairs):
result = result._reproject_to_table()
return result

def assign_constant(
self: T,
destination_id: str,
value: typing.Any,
dtype: typing.Optional[bigframes.dtypes.Dtype],
) -> T:
# TODO(b/281587571): Solve scalar constant aggregation problem w/Ibis.
ibis_value = bigframes.dtypes.literal_to_ibis_scalar(value, dtype)
if ibis_value is None:
raise NotImplementedError(
f"Type not supported as scalar value {type(value)}. {constants.FEEDBACK_LINK}"
)
expr = self._set_or_replace_by_id(destination_id, ibis_value)
return expr._reproject_to_table()
@abc.abstractmethod
def _select(self: T, values: typing.Tuple[ibis_types.Value]) -> T:
...

@abc.abstractmethod
def _set_or_replace_by_id(self: T, id: str, new_value: ibis_types.Value) -> T:
Expand Down Expand Up @@ -330,14 +305,6 @@ def _to_ibis_expr(
table = table.filter(ibis.random() < ibis.literal(fraction))
return table

def select_columns(self, column_ids: typing.Sequence[str]) -> UnorderedIR:
"""Creates a new expression based on this expression with new columns."""
columns = [self._get_ibis_column(col_id) for col_id in column_ids]
builder = self.builder()
builder.columns = list(columns)
new_expr = builder.build()
return new_expr

def filter(self, predicate_id: str, keep_null: bool = False) -> UnorderedIR:
condition = typing.cast(
ibis_types.BooleanValue, self._get_ibis_column(predicate_id)
Expand Down Expand Up @@ -577,6 +544,11 @@ def _set_or_replace_by_id(
builder.columns = [*self.columns, new_value.name(id)]
return builder.build()

def _select(self, values: typing.Tuple[ibis_types.Value]) -> UnorderedIR:
builder = self.builder()
builder.columns = values
return builder.build()

def _reproject_to_table(self) -> UnorderedIR:
"""
Internal operators that projects the internal representation into a
Expand Down Expand Up @@ -816,20 +788,6 @@ def promote_offsets(self, col_id: str) -> OrderedIR:
]
return expr_builder.build()

def select_columns(self, column_ids: typing.Sequence[str]) -> OrderedIR:
"""Creates a new expression based on this expression with new columns."""
columns = [self._get_ibis_column(col_id) for col_id in column_ids]
expr = self
for ordering_column in set(self.column_ids).intersection(
[col_ref.column_id for col_ref in self._ordering.ordering_value_columns]
):
# Need to hide ordering columns that are being dropped. Alternatively, could project offsets
expr = expr._hide_column(ordering_column)
builder = expr.builder()
builder.columns = list(columns)
new_expr = builder.build()
return new_expr

## Methods that only work with ordering
def project_window_op(
self,
Expand Down Expand Up @@ -1221,6 +1179,29 @@ def _set_or_replace_by_id(self, id: str, new_value: ibis_types.Value) -> Ordered
builder.columns = [*self.columns, new_value.name(id)]
return builder.build()

def _select(self, values: typing.Tuple[ibis_types.Value]) -> OrderedIR:
"""Safely assign by id while maintaining ordering integrity."""
# TODO: Split into explicit set and replace methods
ordering_col_ids = [
col_ref.column_id for col_ref in self._ordering.ordering_value_columns
]
ir = self
mappings = {value.name: value for value in values}
for ordering_id in ordering_col_ids:
# Drop case
if (ordering_id not in mappings) and (ordering_id in ir.column_ids):
# id is being dropped, hide it first
ir = ir._hide_column(ordering_id)
# Mutate case
elif (ordering_id in mappings) and not mappings[ordering_id].equals(
ir._get_any_column(ordering_id)
):
ir = ir._hide_column(ordering_id)

builder = ir.builder()
builder.columns = list(values)
return builder.build()

## Ordering specific helpers
def _get_any_column(self, key: str) -> ibis_types.Value:
"""Gets the Ibis expression for a given column. Will also get hidden columns."""
Expand Down
Loading