@@ -838,7 +838,7 @@ def aggregate_all_and_stack(
838
838
]
839
839
result_expr = self .expr .aggregate (aggregations , dropna = dropna ).unpivot (
840
840
row_labels = self .column_labels .to_list (),
841
- index_col_id = "index" ,
841
+ index_col_ids = [ "index" ] ,
842
842
unpivot_columns = [(value_col_id , self .value_columns )],
843
843
dtype = dtype ,
844
844
)
@@ -849,7 +849,7 @@ def aggregate_all_and_stack(
849
849
expr_with_offsets , offset_col = self .expr .promote_offsets ()
850
850
stacked_expr = expr_with_offsets .unpivot (
851
851
row_labels = self .column_labels .to_list (),
852
- index_col_id = guid .generate_guid (),
852
+ index_col_ids = [ guid .generate_guid ()] ,
853
853
unpivot_columns = [(value_col_id , self .value_columns )],
854
854
passthrough_columns = [* self .index_columns , offset_col ],
855
855
dtype = dtype ,
@@ -1041,7 +1041,7 @@ def summarize(
1041
1041
expr = self .expr .aggregate (aggregations ).unpivot (
1042
1042
labels ,
1043
1043
unpivot_columns = columns ,
1044
- index_col_id = label_col_id ,
1044
+ index_col_ids = [ label_col_id ] ,
1045
1045
)
1046
1046
labels = self ._get_labels_for_columns (column_ids )
1047
1047
return Block (expr , column_labels = labels , index_columns = [label_col_id ])
@@ -1225,116 +1225,83 @@ def pivot(
1225
1225
1226
1226
return result_block .with_column_labels (column_index )
1227
1227
1228
- def stack (self ):
1228
+ def stack (self , how = "left" , dropna = True , sort = True , levels : int = 1 ):
1229
1229
"""Unpivot last column axis level into row axis"""
1230
- if isinstance (self .column_labels , pd .MultiIndex ):
1231
- return self ._stack_multi ()
1232
- else :
1233
- return self ._stack_mono ()
1234
-
1235
- def _stack_mono (self ):
1236
- if isinstance (self .column_labels , pd .MultiIndex ):
1237
- raise ValueError ("Expected single level index" )
1238
-
1239
1230
# These are the values that will be turned into rows
1240
- stack_values = self .column_labels .drop_duplicates ().sort_values ()
1241
1231
1242
- # Get matching columns
1243
- unpivot_columns : List [Tuple [str , List [str ]]] = []
1244
- dtypes : List [bigframes .dtypes .Dtype ] = []
1245
- col_id = guid .generate_guid ("unpivot_" )
1246
- dtype = None
1247
- input_columns : Sequence [Optional [str ]] = []
1248
- for uvalue in stack_values :
1249
- matching_ids = self .label_to_col_id .get (uvalue , [])
1250
- input_id = matching_ids [0 ] if len (matching_ids ) > 0 else None
1251
- if input_id :
1252
- if dtype and dtype != self ._column_type (input_id ):
1253
- raise NotImplementedError (
1254
- "Cannot stack columns with non-matching dtypes."
1255
- )
1256
- else :
1257
- dtype = self ._column_type (input_id )
1258
- input_columns .append (input_id )
1259
- unpivot_columns .append ((col_id , input_columns ))
1260
- if dtype :
1261
- dtypes .append (dtype or pd .Float64Dtype ())
1232
+ col_labels , row_labels = utils .split_index (self .column_labels , levels = levels )
1233
+ if dropna :
1234
+ row_labels = row_labels .drop_duplicates ()
1235
+ if sort :
1236
+ row_labels = row_labels .sort_values ()
1262
1237
1263
- added_index_column = col_id = guid .generate_guid ()
1264
- unpivot_expr = self ._expr .unpivot (
1265
- row_labels = stack_values ,
1266
- passthrough_columns = self .index_columns ,
1267
- unpivot_columns = unpivot_columns ,
1268
- index_col_id = added_index_column ,
1269
- dtype = dtypes ,
1270
- )
1271
- block = Block (
1272
- unpivot_expr ,
1273
- index_columns = [* self .index_columns , added_index_column ],
1274
- column_labels = [None ],
1275
- index_labels = [* self ._index_labels , self .column_labels .names [- 1 ]],
1276
- )
1277
- return block
1278
-
1279
- def _stack_multi (self ):
1280
- if not isinstance (self .column_labels , pd .MultiIndex ):
1281
- raise ValueError ("Expected multi-index" )
1282
-
1283
- # These are the values that will be turned into rows
1284
- stack_values = (
1285
- self .column_labels .get_level_values (- 1 ).drop_duplicates ().sort_values ()
1286
- )
1238
+ row_label_tuples = utils .index_as_tuples (row_labels )
1287
1239
1288
- result_col_labels = (
1289
- self . column_labels . droplevel ( - 1 )
1290
- . drop_duplicates ( )
1291
- . sort_values ()
1292
- . dropna ( how = "all" )
1293
- )
1240
+ if col_labels is not None :
1241
+ result_index = col_labels . drop_duplicates (). sort_values (). dropna ( how = "all" )
1242
+ result_col_labels = utils . index_as_tuples ( result_index )
1243
+ else :
1244
+ result_index = pd . Index ([ None ] )
1245
+ result_col_labels = list ([()] )
1294
1246
1295
1247
# Get matching columns
1296
1248
unpivot_columns : List [Tuple [str , List [str ]]] = []
1297
1249
dtypes = []
1298
1250
for val in result_col_labels :
1299
1251
col_id = guid .generate_guid ("unpivot_" )
1300
- dtype = None
1301
- input_columns : Sequence [Optional [str ]] = []
1302
- for uvalue in stack_values :
1303
- # Need to unpack if still a multi-index after dropping 1 level
1304
- label_to_match = (
1305
- (val , uvalue ) if result_col_labels .nlevels == 1 else (* val , uvalue )
1306
- )
1307
- matching_ids = self .label_to_col_id .get (label_to_match , [])
1308
- input_id = matching_ids [0 ] if len (matching_ids ) > 0 else None
1309
- if input_id :
1310
- if dtype and dtype != self ._column_type (input_id ):
1311
- raise NotImplementedError (
1312
- "Cannot stack columns with non-matching dtypes."
1313
- )
1314
- else :
1315
- dtype = self ._column_type (input_id )
1316
- input_columns .append (input_id )
1317
- # Input column i is the first one that
1252
+ input_columns , dtype = self ._create_stack_column (val , row_label_tuples )
1318
1253
unpivot_columns .append ((col_id , input_columns ))
1319
1254
if dtype :
1320
1255
dtypes .append (dtype or pd .Float64Dtype ())
1321
1256
1322
- added_index_column = col_id = guid .generate_guid ()
1257
+ added_index_columns = [ guid .generate_guid () for _ in range ( row_labels . nlevels )]
1323
1258
unpivot_expr = self ._expr .unpivot (
1324
- row_labels = stack_values ,
1259
+ row_labels = row_label_tuples ,
1325
1260
passthrough_columns = self .index_columns ,
1326
1261
unpivot_columns = unpivot_columns ,
1327
- index_col_id = added_index_column ,
1262
+ index_col_ids = added_index_columns ,
1328
1263
dtype = dtypes ,
1264
+ how = how ,
1329
1265
)
1266
+ new_index_level_names = self .column_labels .names [- levels :]
1267
+ if how == "left" :
1268
+ index_columns = [* self .index_columns , * added_index_columns ]
1269
+ index_labels = [* self ._index_labels , * new_index_level_names ]
1270
+ else :
1271
+ index_columns = [* added_index_columns , * self .index_columns ]
1272
+ index_labels = [* new_index_level_names , * self ._index_labels ]
1273
+
1330
1274
block = Block (
1331
1275
unpivot_expr ,
1332
- index_columns = [ * self . index_columns , added_index_column ] ,
1333
- column_labels = result_col_labels ,
1334
- index_labels = [ * self . _index_labels , self . column_labels . names [ - 1 ]] ,
1276
+ index_columns = index_columns ,
1277
+ column_labels = result_index ,
1278
+ index_labels = index_labels ,
1335
1279
)
1336
1280
return block
1337
1281
1282
+ def _create_stack_column (
1283
+ self , col_label : typing .Tuple , stack_labels : typing .Sequence [typing .Tuple ]
1284
+ ):
1285
+ dtype = None
1286
+ input_columns : list [Optional [str ]] = []
1287
+ for uvalue in stack_labels :
1288
+ label_to_match = (* col_label , * uvalue )
1289
+ label_to_match = (
1290
+ label_to_match [0 ] if len (label_to_match ) == 1 else label_to_match
1291
+ )
1292
+ matching_ids = self .label_to_col_id .get (label_to_match , [])
1293
+ input_id = matching_ids [0 ] if len (matching_ids ) > 0 else None
1294
+ if input_id :
1295
+ if dtype and dtype != self ._column_type (input_id ):
1296
+ raise NotImplementedError (
1297
+ "Cannot stack columns with non-matching dtypes."
1298
+ )
1299
+ else :
1300
+ dtype = self ._column_type (input_id )
1301
+ input_columns .append (input_id )
1302
+ # Input column i is the first one that
1303
+ return input_columns , dtype or pd .Float64Dtype ()
1304
+
1338
1305
def _column_type (self , col_id : str ) -> bigframes .dtypes .Dtype :
1339
1306
col_offset = self .value_columns .index (col_id )
1340
1307
dtype = self .dtypes [col_offset ]
0 commit comments