Skip to content

Commit d5a04c1

Browse files
Dr-Irvjreback
authored andcommitted
Bug in read_excel with multi-index containing integers #11317
Bug in to_excel with openpyxl 2.2+ and merging #11408
1 parent ff283a6 commit d5a04c1

File tree

8 files changed

+60
-9
lines changed

8 files changed

+60
-9
lines changed

doc/source/whatsnew/v0.17.1.txt

+7-2
Original file line numberDiff line numberDiff line change
@@ -79,14 +79,13 @@ Bug Fixes
7979
- Bug in ``HDFStore.select`` when comparing with a numpy scalar in a where clause (:issue:`11283`)
8080

8181

82+
8283
- Bug in tz-conversions with an ambiguous time and ``.dt`` accessors (:issue:`11295`)
8384
- Bug in comparisons of Series vs list-likes (:issue:`11339`)
8485

8586

8687
- Bug in ``DataFrame.replace`` with a ``datetime64[ns, tz]`` and a non-compat to_replace (:issue:`11326`, :issue:`11153`)
8788

88-
89-
9089
- Bug in list-like indexing with a mixed-integer Index (:issue:`11320`)
9190

9291
- Bug in ``pivot_table`` with ``margins=True`` when indexes are of ``Categorical`` dtype (:issue:`10993`)
@@ -114,6 +113,12 @@ Bug Fixes
114113

115114
- Bug in ``pandas.json`` when file to load is big (:issue:`11344`)
116115
- Bugs in ``to_excel`` with duplicate columns (:issue:`11007`, :issue:`10982`, :issue:`10970`)
116+
117117
- Fixed a bug that prevented the construction of an empty series of dtype
118118
``datetime64[ns, tz]`` (:issue:`11245`).
119+
120+
- Bug in ``read_excel`` with multi-index containing integers (:issue:`11317`)
121+
122+
- Bug in ``to_excel`` with openpyxl 2.2+ and merging (:issue:`11408`)
123+
119124
- Bug in ``DataFrame.to_dict()`` produces a ``np.datetime64`` object instead of ``Timestamp`` when only datetime is present in data (:issue:`11327`)

pandas/core/format.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -1729,7 +1729,7 @@ def _format_header_mi(self):
17291729
return
17301730

17311731
columns = self.columns
1732-
level_strs = columns.format(sparsify=True, adjoin=False, names=False)
1732+
level_strs = columns.format(sparsify=self.merge_cells, adjoin=False, names=False)
17331733
level_lengths = _get_level_lengths(level_strs)
17341734
coloffset = 0
17351735
lnum = 0
@@ -1873,8 +1873,9 @@ def _format_hierarchical_rows(self):
18731873

18741874
# MultiIndex columns require an extra row
18751875
# with index names (blank if None) for
1876-
# unambigous round-trip
1877-
if isinstance(self.columns, MultiIndex):
1876+
# unambigous round-trip, unless not merging,
1877+
# in which case the names all go on one row Issue #11328
1878+
if isinstance(self.columns, MultiIndex) and self.merge_cells:
18781879
self.rowcounter += 1
18791880

18801881
# if index labels are not empty go ahead and dump

pandas/io/excel.py

+7-2
Original file line numberDiff line numberDiff line change
@@ -708,7 +708,12 @@ def write_cells(self, cells, sheet_name=None, startrow=0, startcol=0):
708708
for cell in cells:
709709
colletter = get_column_letter(startcol + cell.col + 1)
710710
xcell = wks.cell("%s%s" % (colletter, startrow + cell.row + 1))
711-
xcell.value = _conv_value(cell.val)
711+
if (isinstance(cell.val, compat.string_types)
712+
and xcell.data_type_for_value(cell.val)
713+
!= xcell.TYPE_STRING):
714+
xcell.set_value_explicit(cell.val)
715+
else:
716+
xcell.value = _conv_value(cell.val)
712717
style = None
713718
if cell.style:
714719
style = self._convert_to_style(cell.style)
@@ -1240,7 +1245,7 @@ def write_cells(self, cells, sheet_name=None, startrow=0, startcol=0):
12401245
start_row=startrow + cell.row + 1,
12411246
start_column=startcol + cell.col + 1,
12421247
end_column=startcol + cell.mergeend + 1,
1243-
end_row=startrow + cell.mergeend + 1
1248+
end_row=startrow + cell.mergestart + 1
12441249
)
12451250

12461251
# When cells are merged only the top-left cell is preserved

pandas/io/parsers.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -873,10 +873,13 @@ def extract(r):
873873
columns = lzip(*[extract(r) for r in header])
874874
names = ic + columns
875875

876+
def tostr(x):
877+
return str(x) if not isinstance(x, compat.string_types) else x
878+
876879
# if we find 'Unnamed' all of a single level, then our header was too
877880
# long
878881
for n in range(len(columns[0])):
879-
if all(['Unnamed' in c[n] for c in columns]):
882+
if all(['Unnamed' in tostr(c[n]) for c in columns]):
880883
raise _parser.CParserError(
881884
"Passed header=[%s] are too many rows for this "
882885
"multi_index of columns"
-1.5 KB
Binary file not shown.
1.86 KB
Binary file not shown.
1.77 KB
Binary file not shown.

pandas/io/tests/test_excel.py

+38-1
Original file line numberDiff line numberDiff line change
@@ -558,6 +558,12 @@ def test_read_excel_multiindex(self):
558558
actual = read_excel(mi_file, 'mi_column_name', header=[0,1], index_col=0)
559559
tm.assert_frame_equal(actual, expected)
560560

561+
# Issue #11317
562+
expected.columns = mi.set_levels([1,2],level=1).set_names(['c1', 'c2'])
563+
actual = read_excel(mi_file, 'name_with_int', index_col=0, header=[0,1])
564+
tm.assert_frame_equal(actual, expected)
565+
566+
expected.columns = mi.set_names(['c1', 'c2'])
561567
expected.index = mi.set_names(['ilvl1', 'ilvl2'])
562568
actual = read_excel(mi_file, 'both_name', index_col=[0,1], header=[0,1])
563569
tm.assert_frame_equal(actual, expected)
@@ -1083,7 +1089,38 @@ def test_to_excel_multiindex(self):
10831089
df = read_excel(reader, 'test1', index_col=[0, 1],
10841090
parse_dates=False)
10851091
tm.assert_frame_equal(frame, df)
1086-
self.assertEqual(frame.index.names, df.index.names)
1092+
1093+
# Test for Issue 11328. If column indices are integers, make
1094+
# sure they are handled correctly for either setting of
1095+
# merge_cells
1096+
def test_to_excel_multiindex_cols(self):
1097+
_skip_if_no_xlrd()
1098+
1099+
frame = self.frame
1100+
arrays = np.arange(len(frame.index) * 2).reshape(2, -1)
1101+
new_index = MultiIndex.from_arrays(arrays,
1102+
names=['first', 'second'])
1103+
frame.index = new_index
1104+
1105+
new_cols_index = MultiIndex.from_tuples([(40, 1), (40, 2),
1106+
(50, 1), (50, 2)])
1107+
frame.columns = new_cols_index
1108+
header = [0, 1]
1109+
if not self.merge_cells:
1110+
header = 0
1111+
1112+
with ensure_clean(self.ext) as path:
1113+
# round trip
1114+
frame.to_excel(path, 'test1', merge_cells=self.merge_cells)
1115+
reader = ExcelFile(path)
1116+
df = read_excel(reader, 'test1', header=header,
1117+
index_col=[0, 1],
1118+
parse_dates=False)
1119+
if not self.merge_cells:
1120+
fm = frame.columns.format(sparsify=False,
1121+
adjoin=False, names=False)
1122+
frame.columns = [ ".".join(map(str, q)) for q in zip(*fm) ]
1123+
tm.assert_frame_equal(frame, df)
10871124

10881125
def test_to_excel_multiindex_dates(self):
10891126
_skip_if_no_xlrd()

0 commit comments

Comments
 (0)