Review (jorisvandenbossche)

pandas-dev · jreback · Feb 24, 2019 · Jan 28, 2019 · Jan 28, 2019 · Jan 29, 2019
commit 5863678045ce09f8f3ddca9321420a2080bbd8bd
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -4151,14 +4151,23 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
                 # arrays are fine as long as they are one-dimensional
                 if getattr(col, 'ndim', 1) > 1:
                     raise ValueError(err_msg)
+            elif is_list_like(col, allow_sets=False):
+                # various iterators/generators are hashable, but should not
+                # raise a KeyError
+                tipo = type(col)
+                raise ValueError(err_msg + ' Received column of '
+                                 'type {}'.format(tipo))
             else:
                 # everything else gets tried as a key; see GH 24969
                 try:
-                    self[col]
-                except KeyError:
+                    found = col in self.columns
+                except TypeError:
                     tipo = type(col)
-                    raise ValueError(err_msg,
-                                     'Received column of type {}'.format(tipo))
+                    raise TypeError(err_msg + ' Received column of '
+                                    'type {}'.format(tipo))
+                else:
+                    if not found:
+                        missing.append(col)
 
         if missing:
             raise KeyError('{}'.format(missing))

diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py
@@ -255,21 +255,40 @@ def test_set_index_raise_keys(self, frame_of_index_cols, drop, append):
 
     @pytest.mark.parametrize('append', [True, False])
     @pytest.mark.parametrize('drop', [True, False])
-    @pytest.mark.parametrize('box', [set, iter])
-    def test_set_index_raise_on_type(self, frame_of_index_cols, box,
-                                     drop, append):
+    @pytest.mark.parametrize('box', [iter, lambda x: (y for y in x)],
+                             ids=['iter', 'generator'])
+    def test_set_index_raise_on_type_iter(self, frame_of_index_cols, box,
+                                          drop, append):
         df = frame_of_index_cols
 
         msg = 'The parameter "keys" may be a column key, .*'
-        # forbidden type, e.g. set/tuple/iter
+        # forbidden type, e.g. iter/generator
         with pytest.raises(ValueError, match=msg):
             df.set_index(box(df['A']), drop=drop, append=append)
 
-        # forbidden type in list, e.g. set/tuple/iter
+        # forbidden type in list, e.g. iter/generator
         with pytest.raises(ValueError, match=msg):
             df.set_index(['A', df['A'], box(df['A'])],
                          drop=drop, append=append)
 
+    @pytest.mark.parametrize('append', [True, False])
+    @pytest.mark.parametrize('drop', [True, False])
+    @pytest.mark.parametrize('box', [set, lambda x: dict(zip(x, x)).keys()],
+                             ids=['set', 'dict-view'])
+    def test_set_index_raise_on_type_unhashable(self, frame_of_index_cols, box,
+                                                drop, append):
+        df = frame_of_index_cols
+
+        msg = 'The parameter "keys" may be a column key, .*'
+        # forbidden type that is unhashable, e.g. set/dict-view
+        with pytest.raises(TypeError, match=msg):
+            df.set_index(box(df['A']), drop=drop, append=append)
+
+        # forbidden type in list that is unhashable, e.g. set/dict-view
+        with pytest.raises(TypeError, match=msg):
+            df.set_index(['A', df['A'], box(df['A'])],
+                         drop=drop, append=append)
+
     def test_set_index_custom_label_type(self):
         # GH 24969
 
@@ -281,6 +300,10 @@ def __init__(self, name, color):
             def __str__(self):
                 return "<Thing %r>" % (self.name,)
 
+            def __repr__(self):
+                # necessary for pretty KeyError
+                return self.__str__()
+
         thing1 = Thing('One', 'red')
         thing2 = Thing('Two', 'blue')
         df = DataFrame({thing1: [0, 1], thing2: [2, 3]})
@@ -295,6 +318,43 @@ def __str__(self):
         result = df.set_index([thing2])
         tm.assert_frame_equal(result, expected)
 
+        # missing key
+        thing3 = Thing('Three', 'pink')
+        msg = "<Thing 'Three'>"
+        with pytest.raises(KeyError, match=msg):
+            # missing label directly
+            df.set_index(thing3)
+
+        with pytest.raises(KeyError, match=msg):
+            # missing label in list
+            df.set_index([thing3])
+
+    def test_set_index_custom_label_type_raises(self):
+        # GH 24969
+
+        # purposefully inherit from something unhashable
+        class Thing(set):
+            def __init__(self, name, color):
+                self.name = name
+                self.color = color
+
+            def __str__(self):
+                return "<Thing %r>" % (self.name,)
+
+        thing1 = Thing('One', 'red')
+        thing2 = Thing('Two', 'blue')
+        df = DataFrame([[0, 2], [1, 3]], columns=[thing1, thing2])
+
+        msg = 'The parameter "keys" may be a column key, .*'
+
+        with pytest.raises(TypeError, match=msg):
+            # use custom label directly
+            df.set_index(thing2)
+
+        with pytest.raises(TypeError, match=msg):
+            # custom label wrapped in list
+            df.set_index([thing2])
+
     def test_construction_with_categorical_index(self):
         ci = tm.makeCategoricalIndex(10)
         ci.name = 'B'