Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Missing resample() implementation for grouped data frame #1134

@dbalabka

Description

@dbalabka

Describe the issue:

import pandas as pd
import dask.dataframe as dd

# Create a sample DataFrame with 'id' and 'date' columns
data = {
    'id': [1, 1, 1, 2, 2, 2],
    'date': pd.to_datetime(['2023-01-01', '2023-01-04', '2023-01-05', '2023-01-01', '2023-01-04', '2023-01-05'])
}
df = dd.DataFrame(data)

# Group by 'id' and resample to daily frequency
result = df.groupby(by=['id']).resample("D", on="date").compute()

# Print the resulting DataFrame
print(result)
---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
File ~/src/.venv/lib/python3.10/site-packages/dask_expr/_groupby.py:1593, in GroupBy.__getattr__(self, key)
   1592 try:
-> 1593     return self[key]
   1594 except KeyError as e:

File ~/src/.venv/lib/python3.10/site-packages/dask_expr/_groupby.py:1615, in GroupBy.__getitem__(self, key)
   1614 if is_scalar(key):
-> 1615     return SeriesGroupBy(
   1616         self.obj,
   1617         by=self.by,
   1618         slice=key,
   1619         sort=self.sort,
   1620         dropna=self.dropna,
   1621         observed=self.observed,
   1622     )
   1623 g = GroupBy(
   1624     self.obj,
   1625     by=self.by,
   (...)
   1630     group_keys=self.group_keys,
   1631 )

File ~/src/.venv/lib/python3.10/site-packages/dask_expr/_groupby.py:2214, in SeriesGroupBy.__init__(self, obj, by, sort, observed, dropna, slice)
   2212         obj._meta.groupby(by, **_as_dict("observed", observed))
-> 2214 super().__init__(
   2215     obj, by=by, slice=slice, observed=observed, dropna=dropna, sort=sort
   2216 )

File ~/src/.venv/lib/python3.10/site-packages/dask_expr/_groupby.py:1558, in GroupBy.__init__(self, obj, by, group_keys, sort, observed, dropna, slice)
   1557     slice = list(slice)
-> 1558 self._meta = self._meta[slice]

File ~/src/.venv/lib/python3.10/site-packages/pandas/core/groupby/generic.py:1951, in DataFrameGroupBy.__getitem__(self, key)
   1947     raise ValueError(
   1948         "Cannot subset columns with a tuple with more than one element. "
   1949         "Use a list instead."
   1950     )
-> 1951 return super().__getitem__(key)

File ~/src/.venv/lib/python3.10/site-packages/pandas/core/base.py:244, in SelectionMixin.__getitem__(self, key)
    243 if key not in self.obj:
--> 244     raise KeyError(f"Column not found: {key}")
    245 ndim = self.obj[key].ndim

KeyError: 'Column not found: resample'

Minimal Complete Verifiable Example:

# Put your MCVE code here

Environment:

  • Dask version: 2024.8.0
  • dask-expr==1.1.10
  • Python version: 3.10
  • Operating System: WSL
  • Install method (conda, pip, source): poetry

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions