Skip to content

feat: Add dataframe.to_html #259

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Dec 28, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 52 additions & 0 deletions bigframes/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -2682,6 +2682,58 @@ def to_string(
encoding,
)

def to_html(
self,
buf=None,
columns: Sequence[str] | None = None,
col_space=None,
header: bool = True,
index: bool = True,
na_rep: str = "NaN",
formatters=None,
float_format=None,
sparsify: bool | None = None,
index_names: bool = True,
justify: str | None = None,
max_rows: int | None = None,
max_cols: int | None = None,
show_dimensions: bool = False,
decimal: str = ".",
bold_rows: bool = True,
classes: str | list | tuple | None = None,
escape: bool = True,
notebook: bool = False,
border: int | None = None,
table_id: str | None = None,
render_links: bool = False,
encoding: str | None = None,
) -> str:
return self.to_pandas().to_html(
buf,
columns, # type: ignore
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Prefer to cast explicitly instead of just ignore. Better readability.

col_space,
header,
index,
na_rep,
formatters,
float_format,
sparsify,
index_names,
justify, # type: ignore
max_rows,
max_cols,
show_dimensions,
decimal,
bold_rows,
classes,
escape,
notebook,
border,
table_id,
render_links,
encoding,
)

def to_markdown(
self,
buf=None,
Expand Down
9 changes: 9 additions & 0 deletions tests/system/small/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -3463,6 +3463,15 @@ def test_df_to_string(scalars_df_index, scalars_pandas_df_index):
assert bf_result == pd_result


def test_df_to_html(scalars_df_index, scalars_pandas_df_index):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we add a test with all the input parameters. Then if pandas changes anything such as deprecating some arguments, we may know that and modify our docs.

Copy link
Collaborator Author

@Genesis929 Genesis929 Dec 20, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are way too many inputs, the test would be quite long after formatting. And on the other hand, based on how the to_html is defined on our end, if any argument is not longer available in pandas, I think an error will be raised in the test anyway, as we are passing all keyword arguments no matter whether there is an user input.

unsupported = ["numeric_col"] # formatted differently

bf_result = scalars_df_index.drop(columns=unsupported).to_html()
pd_result = scalars_pandas_df_index.drop(columns=unsupported).to_html()

assert bf_result == pd_result


def test_df_to_markdown(scalars_df_index, scalars_pandas_df_index):
# Nulls have bug from tabulate https://github.com/astanin/python-tabulate/issues/231
bf_result = scalars_df_index.dropna().to_markdown()
Expand Down
124 changes: 124 additions & 0 deletions third_party/bigframes_vendored/pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -685,6 +685,130 @@ def to_string(
"""
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)

def to_html(
self,
buf=None,
columns: Sequence[str] | None = None,
col_space=None,
header: bool = True,
index: bool = True,
na_rep: str = "NaN",
formatters=None,
float_format=None,
sparsify: bool | None = None,
index_names: bool = True,
justify: str | None = None,
max_rows: int | None = None,
max_cols: int | None = None,
show_dimensions: bool = False,
decimal: str = ".",
bold_rows: bool = True,
classes: str | list | tuple | None = None,
escape: bool = True,
notebook: bool = False,
border: int | None = None,
table_id: str | None = None,
render_links: bool = False,
encoding: str | None = None,
):
"""Render a DataFrame as an HTML table.

**Examples:**

>>> import bigframes.pandas as bpd
>>> bpd.options.display.progress_bar = None

>>> df = bpd.DataFrame({'col1': [1, 2], 'col2': [3, 4]})
>>> print(df.to_html())
<table border="1" class="dataframe">
<thead>
<tr style="text-align: right;">
<th></th>
<th>col1</th>
<th>col2</th>
</tr>
</thead>
<tbody>
<tr>
<th>0</th>
<td>1</td>
<td>3</td>
</tr>
<tr>
<th>1</th>
<td>2</td>
<td>4</td>
</tr>
</tbody>
</table>

Args:
buf (str, Path or StringIO-like, optional, default None):
Buffer to write to. If None, the output is returned as a string.
columns (sequence, optional, default None):
The subset of columns to write. Writes all columns by default.
col_space (str or int, list or dict of int or str, optional):
The minimum width of each column in CSS length units. An int is
assumed to be px units.
header (bool, optional):
Whether to print column labels, default True.
index (bool, optional, default True):
Whether to print index (row) labels.
na_rep (str, optional, default 'NaN'):
String representation of NAN to use.
formatters (list, tuple or dict of one-param. functions, optional):
Formatter functions to apply to columns' elements by position or
name.
The result of each function must be a unicode string.
List/tuple must be of length equal to the number of columns.
float_format (one-parameter function, optional, default None):
Formatter function to apply to columns' elements if they are
floats. This function must return a unicode string and will
be applied only to the non-NaN elements, with NaN being
handled by na_rep.
sparsify (bool, optional, default True):
Set to False for a DataFrame with a hierarchical index to print
every multiindex key at each row.
index_names (bool, optional, default True):
Prints the names of the indexes.
justify (str, default None):
How to justify the column labels. If None uses the option from
the print configuration (controlled by set_option), 'right' out
of the box. Valid values are, 'left', 'right', 'center', 'justify',
'justify-all', 'start', 'end', 'inherit', 'match-parent', 'initial',
'unset'.
max_rows (int, optional):
Maximum number of rows to display in the console.
max_cols (int, optional):
Maximum number of columns to display in the console.
show_dimensions (bool, default False):
Display DataFrame dimensions (number of rows by number of columns).
decimal (str, default '.'):
Character recognized as decimal separator, e.g. ',' in Europe.
bold_rows (bool, default True):
Make the row labels bold in the output.
classes (str or list or tuple, default None):
CSS class(es) to apply to the resulting html table.
escape (bool, default True):
Convert the characters <, >, and & to HTML-safe sequences.
notebook (bool, default False):
Whether the generated HTML is for IPython Notebook.
border (int):
A border=border attribute is included in the opening <table>
tag. Default pd.options.display.html.border.
table_id (str, optional):
A css id is included in the opening <table> tag if specified.
render_links (bool, default False):
Convert URLs to HTML links.
encoding (str, default "utf-8"):
Set character encoding.

Returns:
str or None: If buf is None, returns the result as a string. Otherwise
returns None.
"""
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)

def to_markdown(
self,
buf=None,
Expand Down