Skip to content

[MLIR][TOSA-Linalg] Fix rescale lowering for unsigned input zp #138313

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
May 6, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 14 additions & 31 deletions mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,15 +82,6 @@ materializeBinaryNanCheckIfRequired(OpTy op, PatternRewriter &rewriter,
rhsOrResult);
}

template <typename T>
static arith::ConstantOp
createConstOpFromZpVal(Operation *op, const int64_t &zp, Type requiredAttrType,
OpBuilder &rewriter) {
auto castedN = static_cast<T>(zp);
return rewriter.create<arith::ConstantOp>(
op->getLoc(), IntegerAttr::get(requiredAttrType, castedN));
}

static Value createLinalgBodyCalculationForElementwiseOp(
Operation *op, ValueRange args, ArrayRef<Type> resultTypes,
ConversionPatternRewriter &rewriter) {
Expand Down Expand Up @@ -1467,21 +1458,19 @@ class RescaleConverter : public OpRewritePattern<tosa::RescaleOp> {
Value value = blockArgs[0];
Type valueTy = value.getType();

// For now we do all of our math in 64-bit. This is not optimal but
// should be correct for now, consider computing correct bit depth
// later.
int32_t inBitwidth = valueTy.getIntOrFloatBitWidth() > 32 ? 48 : 32;

FailureOr<int64_t> maybeIZp = op.getInputZeroPoint();
if (failed(maybeIZp)) {
(void)rewriter.notifyMatchFailure(
op, "input zero point cannot be statically determined");
return;
}

auto inputZp = createConstOpFromZpVal<int32_t>(
op, *maybeIZp, nestedBuilder.getIntegerType(inBitwidth),
nestedBuilder);
const int32_t inBitwidth = valueTy.getIntOrFloatBitWidth();
// Extend zeropoint for sub-32bits widths.
const int32_t inAttrBitwidth = inBitwidth > 32 ? inBitwidth : 32;
auto inputZp = nestedBuilder.create<arith::ConstantOp>(
loc, IntegerAttr::get(rewriter.getIntegerType(inAttrBitwidth),
*maybeIZp));

FailureOr<int64_t> maybeOZp = op.getOutputZeroPoint();
if (failed(maybeOZp)) {
Expand All @@ -1490,16 +1479,14 @@ class RescaleConverter : public OpRewritePattern<tosa::RescaleOp> {
return;
};

// pre-process OutputZP as it can be unsigned
auto outBitwidth = outputTy.getElementType().getIntOrFloatBitWidth();
APInt OZp(outBitwidth, !op.getOutputUnsigned());
OZp = static_cast<int64_t>(*maybeOZp);
*maybeOZp = op.getOutputUnsigned()
? static_cast<int64_t>(OZp.getZExtValue())
: OZp.getSExtValue();

auto outputZp = createConstOpFromZpVal<int32_t>(
op, *maybeOZp, nestedBuilder.getI32Type(), nestedBuilder);
IntegerType outIntType =
cast<IntegerType>(blockArgs.back().getType());
unsigned outBitWidth = outIntType.getWidth();
const int32_t outAttrBitwidth = 32;
assert(outBitWidth <= 32 && "Unexpected output zeropoint bitwidth");
auto outputZp = nestedBuilder.create<arith::ConstantOp>(
loc, IntegerAttr::get(rewriter.getIntegerType(outAttrBitwidth),
*maybeOZp));

Value multiplier = multiplierConstant ? multiplierConstant
: blockArgs[multiplierArg];
Expand Down Expand Up @@ -1527,10 +1514,6 @@ class RescaleConverter : public OpRewritePattern<tosa::RescaleOp> {
nestedBuilder.create<arith::AddIOp>(nestedLoc, value, outputZp);

// Saturate to the output size.
IntegerType outIntType =
cast<IntegerType>(blockArgs.back().getType());
unsigned outBitWidth = outIntType.getWidth();

int32_t intMin = APInt::getSignedMinValue(outBitWidth).getSExtValue();
int32_t intMax = APInt::getSignedMaxValue(outBitWidth).getSExtValue();

Expand Down
43 changes: 23 additions & 20 deletions mlir/lib/Dialect/Tosa/IR/TosaOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2118,7 +2118,7 @@ llvm::LogicalResult tosa::ReshapeOp::verify() {
// return failure if val is not a constant
// set zp to -1 if val is non-zero float or val is not integer nor float
// otherwise set zp to val's constant value
static FailureOr<int64_t> getZeroPoint(Value val) {
static FailureOr<int64_t> getZeroPoint(Value val, bool signExtend) {
ElementsAttr zpAttr;
if (!matchPattern(val, m_Constant(&zpAttr))) {
return failure();
Expand All @@ -2135,7 +2135,10 @@ static FailureOr<int64_t> getZeroPoint(Value val) {
}

if (llvm::isa<IntegerType>(zpElemType)) {
return zpAttr.getValues<APInt>()[0].getSExtValue();
if (signExtend)
return zpAttr.getValues<APInt>()[0].getSExtValue();
else
return zpAttr.getValues<APInt>()[0].getZExtValue();
}

// return non-zero value to trigger error check
Expand Down Expand Up @@ -2186,30 +2189,30 @@ static LogicalResult verifyZeroPoint(tosa::RescaleOp op, Value zpVal,
return success();
}

#define ZERO_POINT_HELPER(OP, OPERAND_NAME) \
#define ZERO_POINT_HELPER(OP, OPERAND_NAME, SIGN_EXTEND) \
FailureOr<int64_t> tosa::OP::get##OPERAND_NAME##ZeroPoint() { \
return getZeroPoint(get##OPERAND_NAME##Zp()); \
return getZeroPoint(get##OPERAND_NAME##Zp(), SIGN_EXTEND); \
} \
LogicalResult tosa::OP::verify##OPERAND_NAME##ZeroPoint(int64_t zp) { \
return verifyZeroPoint(*this, get##OPERAND_NAME##Zp(), zp, #OPERAND_NAME); \
}

ZERO_POINT_HELPER(Conv2DOp, Input)
ZERO_POINT_HELPER(Conv2DOp, Weight)
ZERO_POINT_HELPER(Conv3DOp, Input)
ZERO_POINT_HELPER(Conv3DOp, Weight)
ZERO_POINT_HELPER(DepthwiseConv2DOp, Input)
ZERO_POINT_HELPER(DepthwiseConv2DOp, Weight)
ZERO_POINT_HELPER(TransposeConv2DOp, Input)
ZERO_POINT_HELPER(TransposeConv2DOp, Weight)
ZERO_POINT_HELPER(AvgPool2dOp, Input)
ZERO_POINT_HELPER(AvgPool2dOp, Output)
ZERO_POINT_HELPER(MatMulOp, A)
ZERO_POINT_HELPER(MatMulOp, B)
ZERO_POINT_HELPER(NegateOp, Input1)
ZERO_POINT_HELPER(NegateOp, Output)
ZERO_POINT_HELPER(RescaleOp, Input)
ZERO_POINT_HELPER(RescaleOp, Output)
ZERO_POINT_HELPER(Conv2DOp, Input, true)
ZERO_POINT_HELPER(Conv2DOp, Weight, true)
ZERO_POINT_HELPER(Conv3DOp, Input, true)
ZERO_POINT_HELPER(Conv3DOp, Weight, true)
ZERO_POINT_HELPER(DepthwiseConv2DOp, Input, true)
ZERO_POINT_HELPER(DepthwiseConv2DOp, Weight, true)
ZERO_POINT_HELPER(TransposeConv2DOp, Input, true)
ZERO_POINT_HELPER(TransposeConv2DOp, Weight, true)
ZERO_POINT_HELPER(AvgPool2dOp, Input, true)
ZERO_POINT_HELPER(AvgPool2dOp, Output, true)
ZERO_POINT_HELPER(MatMulOp, A, true)
ZERO_POINT_HELPER(MatMulOp, B, true)
ZERO_POINT_HELPER(NegateOp, Input1, true)
ZERO_POINT_HELPER(NegateOp, Output, true)
ZERO_POINT_HELPER(RescaleOp, Input, !getInputUnsigned())
ZERO_POINT_HELPER(RescaleOp, Output, !getOutputUnsigned())
#undef ZERO_POINT_HELPER

LogicalResult tosa::TransposeOp::inferReturnTypeComponents(
Expand Down
38 changes: 35 additions & 3 deletions mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -1241,10 +1241,10 @@ func.func @rescale_i8_unsigned_input(%arg0 : tensor<2xi8>) -> () {
// CHECK: [[INIT:%.+]] = tensor.empty()
// CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel"]} ins(%[[ARG0]] : tensor<2xi8>) outs([[INIT]] : tensor<2xi8>)
// CHECK: ^bb0([[IN:%.+]]: i8, [[UNUSED:%.+]]: i8):
// CHECK: [[C17:%.+]] = arith.constant 17
// CHECK: [[C128:%.+]] = arith.constant 128
// CHECK: [[C22:%.+]] = arith.constant 22
// CHECK-DAG: [[IN32:%.+]] = arith.extui [[IN]]
// CHECK-DAG: [[IN_ZEROED:%.+]] = arith.subi [[IN32]], [[C17]]
// CHECK-DAG: [[IN_ZEROED:%.+]] = arith.subi [[IN32]], [[C128]]
// CHECK-DAG: [[SCALED:%.+]] = tosa.apply_scale [[IN_ZEROED]], [[C0]], [[C1]] {rounding_mode = "SINGLE_ROUND"}
// CHECK-DAG: [[SCALED_ZEROED:%.+]] = arith.addi [[SCALED]], [[C22]]
// CHECK-DAG: [[CMIN:%.+]] = arith.constant -128
Expand All @@ -1255,13 +1255,45 @@ func.func @rescale_i8_unsigned_input(%arg0 : tensor<2xi8>) -> () {
// CHECK: linalg.yield [[TRUNC]]
%multiplier = "tosa.const"() {values = dense<19689> : tensor<1xi16> } : () -> tensor<1xi16>
%shift = "tosa.const"() {values = dense<15> : tensor<1xi8> } : () -> tensor<1xi8>
%input_zp = "tosa.const"() {values = dense<17> : tensor<1xi8>} : () -> tensor<1xi8>
%input_zp = "tosa.const"() {values = dense<-128> : tensor<1xi8>} : () -> tensor<1xi8>
%output_zp = "tosa.const"() {values = dense<22> : tensor<1xi8>} : () -> tensor<1xi8>
%0 = tosa.rescale %arg0, %multiplier, %shift, %input_zp, %output_zp {scale32 = false, rounding_mode = "SINGLE_ROUND", per_channel = false, input_unsigned = true, output_unsigned = false} : (tensor<2xi8>, tensor<1xi16>, tensor<1xi8>, tensor<1xi8>, tensor<1xi8>) -> tensor<2xi8>

return
}

// -----
// CHECK: #[[$MAP0:.*]] = affine_map<(d0) -> (d0)>

// CHECK-LABEL: @rescale_i48_unsigned_output
// CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]:
func.func @rescale_i48_unsigned_output(%arg0 : tensor<2xi48>) -> () {
// CHECK: [[C19689:%.+]] = arith.constant 19689
// CHECK: [[C15:%.+]] = arith.constant 15
// CHECK: [[INIT:%.+]] = tensor.empty()
// CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel"]} ins(%[[ARG0]] : tensor<2xi48>) outs([[INIT]] : tensor<2xi8>)
// CHECK: ^bb0([[IN:%.+]]: i48, [[UNUSED:%.+]]: i8):
// CHECK: [[C0:%.+]] = arith.constant 0
// CHECK: [[C234:%.+]] = arith.constant 234
// CHECK-DAG: [[IN_ZEROED:%.+]] = arith.subi [[IN]], [[C0]]
// CHECK-DAG: [[SCALED:%.+]] = tosa.apply_scale [[IN_ZEROED]], [[C19689]], [[C15]] {rounding_mode = "SINGLE_ROUND"}
// CHECK-DAG: [[SCALED_ZEROED:%.+]] = arith.addi [[SCALED]], [[C234]]
// CHECK-DAG: [[CMIN:%.+]] = arith.constant 0
// CHECK-DAG: [[CMAX:%.+]] = arith.constant 255
// CHECK-DAG: [[LOWER:%.+]] = arith.maxsi [[CMIN]], [[SCALED_ZEROED]]
// CHECK-DAG: [[BOUNDED:%.+]] = arith.minsi [[CMAX]], [[LOWER]]
// CHECK-DAG: [[TRUNC:%.+]] = arith.trunci [[BOUNDED]]
// CHECK: linalg.yield [[TRUNC]]
%multiplier = "tosa.const"() {values = dense<19689> : tensor<1xi16> } : () -> tensor<1xi16>
%shift = "tosa.const"() {values = dense<15> : tensor<1xi8> } : () -> tensor<1xi8>
%input_zp = "tosa.const"() {values = dense<0> : tensor<1xi48>} : () -> tensor<1xi48>
%output_zp = "tosa.const"() {values = dense<-22> : tensor<1xi8>} : () -> tensor<1xi8>
%1 = tosa.rescale %arg0, %multiplier, %shift, %input_zp, %output_zp {scale32 = false, rounding_mode = "SINGLE_ROUND", per_channel = false, input_unsigned = false, output_unsigned = true} : (tensor<2xi48>, tensor<1xi16>, tensor<1xi8>, tensor<1xi48>, tensor<1xi8>) -> tensor<2xi8>

// CHECK: return
return
}

// -----

// CHECK: #[[$MAP0:.*]] = affine_map<(d0) -> (d0)>
Expand Down
2 changes: 1 addition & 1 deletion mlir/test/Dialect/Tosa/invalid.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -1517,7 +1517,7 @@ func.func @test_rescale_invalid_output_zp_u16(%arg0: tensor<13x21x3xi16>) -> ten
%shift = "tosa.const"() {values = dense<30> : tensor<1xi8> } : () -> tensor<1xi8>
%input_zp = "tosa.const"() {values = dense<0> : tensor<1xi16>} : () -> tensor<1xi16>
%output_zp = "tosa.const"() {values = dense<-1> : tensor<1xi16>} : () -> tensor<1xi16>
// expected-error@+1 {{'tosa.rescale' op expect output_zp of 0 or 32768 for unsigned int16 output, got -1}}
// expected-error@+1 {{'tosa.rescale' op expect output_zp of 0 or 32768 for unsigned int16 output, got 65535}}
%0 = tosa.rescale %arg0, %multiplier, %shift, %input_zp, %output_zp {rounding_mode = "SINGLE_ROUND", per_channel = false, scale32 = true, input_unsigned = false, output_unsigned = true} : (tensor<13x21x3xi16>, tensor<1xi32>, tensor<1xi8>, tensor<1xi16>, tensor<1xi16>) -> tensor<13x21x3xi16>
return %0 : tensor<13x21x3xi16>
}
Expand Down