|
15 | 15 | from __future__ import annotations
|
16 | 16 |
|
17 | 17 | import typing
|
| 18 | +from typing import Sequence, Union |
18 | 19 |
|
19 | 20 | import bigframes_vendored.pandas.core.groupby as vendored_pandas_groupby
|
20 | 21 | import pandas as pd
|
@@ -115,14 +116,35 @@ def mean(self, numeric_only: bool = False, *args) -> df.DataFrame:
|
115 | 116 | def median(
|
116 | 117 | self, numeric_only: bool = False, *, exact: bool = False
|
117 | 118 | ) -> df.DataFrame:
|
118 |
| - if exact: |
119 |
| - raise NotImplementedError( |
120 |
| - f"Only approximate median is supported. {constants.FEEDBACK_LINK}" |
121 |
| - ) |
122 | 119 | if not numeric_only:
|
123 | 120 | self._raise_on_non_numeric("median")
|
| 121 | + if exact: |
| 122 | + return self.quantile(0.5) |
124 | 123 | return self._aggregate_all(agg_ops.median_op, numeric_only=True)
|
125 | 124 |
|
| 125 | + def quantile( |
| 126 | + self, q: Union[float, Sequence[float]] = 0.5, *, numeric_only: bool = False |
| 127 | + ) -> df.DataFrame: |
| 128 | + if not numeric_only: |
| 129 | + self._raise_on_non_numeric("quantile") |
| 130 | + q_cols = tuple( |
| 131 | + col |
| 132 | + for col in self._selected_cols |
| 133 | + if self._column_type(col) in dtypes.NUMERIC_BIGFRAMES_TYPES_PERMISSIVE |
| 134 | + ) |
| 135 | + multi_q = utils.is_list_like(q) |
| 136 | + result = block_ops.quantile( |
| 137 | + self._block, |
| 138 | + q_cols, |
| 139 | + qs=tuple(q) if multi_q else (q,), # type: ignore |
| 140 | + grouping_column_ids=self._by_col_ids, |
| 141 | + ) |
| 142 | + result_df = df.DataFrame(result) |
| 143 | + if multi_q: |
| 144 | + return result_df.stack() |
| 145 | + else: |
| 146 | + return result_df.droplevel(-1, 1) |
| 147 | + |
126 | 148 | def min(self, numeric_only: bool = False, *args) -> df.DataFrame:
|
127 | 149 | return self._aggregate_all(agg_ops.min_op, numeric_only=numeric_only)
|
128 | 150 |
|
@@ -466,8 +488,31 @@ def sum(self, *args) -> series.Series:
|
466 | 488 | def mean(self, *args) -> series.Series:
|
467 | 489 | return self._aggregate(agg_ops.mean_op)
|
468 | 490 |
|
469 |
| - def median(self, *args, **kwargs) -> series.Series: |
470 |
| - return self._aggregate(agg_ops.mean_op) |
| 491 | + def median( |
| 492 | + self, |
| 493 | + *args, |
| 494 | + exact: bool = False, |
| 495 | + **kwargs, |
| 496 | + ) -> series.Series: |
| 497 | + if exact: |
| 498 | + return self.quantile(0.5) |
| 499 | + else: |
| 500 | + return self._aggregate(agg_ops.median_op) |
| 501 | + |
| 502 | + def quantile( |
| 503 | + self, q: Union[float, Sequence[float]] = 0.5, *, numeric_only: bool = False |
| 504 | + ) -> series.Series: |
| 505 | + multi_q = utils.is_list_like(q) |
| 506 | + result = block_ops.quantile( |
| 507 | + self._block, |
| 508 | + (self._value_column,), |
| 509 | + qs=tuple(q) if multi_q else (q,), # type: ignore |
| 510 | + grouping_column_ids=self._by_col_ids, |
| 511 | + ) |
| 512 | + if multi_q: |
| 513 | + return series.Series(result.stack()) |
| 514 | + else: |
| 515 | + return series.Series(result.stack()).droplevel(-1) |
471 | 516 |
|
472 | 517 | def std(self, *args, **kwargs) -> series.Series:
|
473 | 518 | return self._aggregate(agg_ops.std_op)
|
|
0 commit comments