expose ordered/unordered/nanless intirnsics
diff --git a/src/librustc_trans/builder.rs b/src/librustc_trans/builder.rs
index 2c38197..371f530 100644
--- a/src/librustc_trans/builder.rs
+++ b/src/librustc_trans/builder.rs
@@ -958,6 +958,9 @@
pub fn vector_reduce_fadd_fast(&self, acc: ValueRef, src: ValueRef) -> ValueRef {
self.count_insn("vector.reduce.fadd_fast");
unsafe {
+ // FIXME: add a non-fast math version once
+ // https://bugs.llvm.org/show_bug.cgi?id=36732
+ // is fixed.
let instr = llvm::LLVMRustBuildVectorReduceFAdd(self.llbuilder, acc, src);
llvm::LLVMRustSetHasUnsafeAlgebra(instr);
instr
@@ -966,6 +969,9 @@
pub fn vector_reduce_fmul_fast(&self, acc: ValueRef, src: ValueRef) -> ValueRef {
self.count_insn("vector.reduce.fmul_fast");
unsafe {
+ // FIXME: add a non-fast math version once
+ // https://bugs.llvm.org/show_bug.cgi?id=36732
+ // is fixed.
let instr = llvm::LLVMRustBuildVectorReduceFMul(self.llbuilder, acc, src);
llvm::LLVMRustSetHasUnsafeAlgebra(instr);
instr
@@ -1001,6 +1007,18 @@
llvm::LLVMRustBuildVectorReduceXor(self.llbuilder, src)
}
}
+ pub fn vector_reduce_fmin(&self, src: ValueRef) -> ValueRef {
+ self.count_insn("vector.reduce.fmin");
+ unsafe {
+ llvm::LLVMRustBuildVectorReduceFMin(self.llbuilder, src, true)
+ }
+ }
+ pub fn vector_reduce_fmax(&self, src: ValueRef) -> ValueRef {
+ self.count_insn("vector.reduce.fmax");
+ unsafe {
+ llvm::LLVMRustBuildVectorReduceFMax(self.llbuilder, src, true)
+ }
+ }
pub fn vector_reduce_fmin_fast(&self, src: ValueRef) -> ValueRef {
self.count_insn("vector.reduce.fmin_fast");
unsafe {
diff --git a/src/librustc_trans/intrinsic.rs b/src/librustc_trans/intrinsic.rs
index 011273f..8b62a1b 100644
--- a/src/librustc_trans/intrinsic.rs
+++ b/src/librustc_trans/intrinsic.rs
@@ -1150,210 +1150,134 @@
return Ok(bx.extract_element(args[0].immediate(), args[1].immediate()))
}
- if name == "simd_reduce_add" {
- require!(ret_ty == in_elem,
- "expected return type `{}` (element of input `{}`), found `{}`",
- in_elem, in_ty, ret_ty);
- return match in_elem.sty {
- ty::TyInt(_i) => {
- Ok(bx.vector_reduce_add(args[0].immediate()))
- },
- ty::TyUint(_u) => {
- Ok(bx.vector_reduce_add(args[0].immediate()))
- },
- ty::TyFloat(f) => {
- // undef as accumulator makes the reduction unordered:
- let acc = match f.bit_width() {
- 32 => C_undef(Type::f32(bx.cx)),
- 64 => C_undef(Type::f64(bx.cx)),
- v => {
- return_error!(
- "unsupported {} from `{}` with element `{}` of size `{}` to `{}`",
- "simd_reduce_add", in_ty, in_elem, v, ret_ty)
+ macro_rules! arith_red {
+ ($name:tt : $integer_reduce:ident, $float_reduce:ident, $ordered:expr) => {
+ if name == $name {
+ require!(ret_ty == in_elem,
+ "expected return type `{}` (element of input `{}`), found `{}`",
+ in_elem, in_ty, ret_ty);
+ return match in_elem.sty {
+ ty::TyInt(_) | ty::TyUint(_) => {
+ let r = bx.$integer_reduce(args[0].immediate());
+ if $ordered {
+ // if overflow occurs, the result is the
+ // mathematical result modulo 2^n:
+ if name.contains("mul") {
+ Ok(bx.mul(args[1].immediate(), r))
+ } else {
+ Ok(bx.add(args[1].immediate(), r))
+ }
+ } else {
+ Ok(bx.$integer_reduce(args[0].immediate()))
+ }
+ },
+ ty::TyFloat(f) => {
+ // ordered arithmetic reductions take an accumulator
+ let acc = if $ordered {
+ args[1].immediate()
+ } else {
+ // unordered arithmetic reductions do not:
+ match f.bit_width() {
+ 32 => C_undef(Type::f32(bx.cx)),
+ 64 => C_undef(Type::f64(bx.cx)),
+ v => {
+ return_error!(
+ "unsupported {} from `{}` with element `{}` of size `{}` to `{}`",
+ $name, in_ty, in_elem, v, ret_ty
+ )
+ }
+ }
+
+ };
+ Ok(bx.$float_reduce(acc, args[0].immediate()))
}
- };
- Ok(bx.vector_reduce_fadd_fast(acc, args[0].immediate()))
- }
- _ => {
- return_error!("unsupported {} from `{}` with element `{}` to `{}`",
- "simd_reduce_add", in_ty, in_elem, ret_ty)
- },
- }
- }
-
- if name == "simd_reduce_mul" {
- require!(ret_ty == in_elem,
- "expected return type `{}` (element of input `{}`), found `{}`",
- in_elem, in_ty, ret_ty);
- return match in_elem.sty {
- ty::TyInt(_i) => {
- Ok(bx.vector_reduce_mul(args[0].immediate()))
- },
- ty::TyUint(_u) => {
- Ok(bx.vector_reduce_mul(args[0].immediate()))
- },
- ty::TyFloat(f) => {
- // undef as accumulator makes the reduction unordered:
- let acc = match f.bit_width() {
- 32 => C_undef(Type::f32(bx.cx)),
- 64 => C_undef(Type::f64(bx.cx)),
- v => {
+ _ => {
return_error!(
- "unsupported {} from `{}` with element `{}` of size `{}` to `{}`",
- "simd_reduce_mul", in_ty, in_elem, v, ret_ty)
+ "unsupported {} from `{}` with element `{}` to `{}`",
+ $name, in_ty, in_elem, ret_ty
+ )
+ },
+ }
+ }
+ }
+ }
+
+ arith_red!("simd_reduce_add_ordered": vector_reduce_add, vector_reduce_fadd_fast, true);
+ arith_red!("simd_reduce_mul_ordered": vector_reduce_mul, vector_reduce_fmul_fast, true);
+ arith_red!("simd_reduce_add_unordered": vector_reduce_add, vector_reduce_fadd_fast, false);
+ arith_red!("simd_reduce_mul_unordered": vector_reduce_mul, vector_reduce_fmul_fast, false);
+
+ macro_rules! minmax_red {
+ ($name:tt: $int_red:ident, $float_red:ident) => {
+ if name == $name {
+ require!(ret_ty == in_elem,
+ "expected return type `{}` (element of input `{}`), found `{}`",
+ in_elem, in_ty, ret_ty);
+ return match in_elem.sty {
+ ty::TyInt(_i) => {
+ Ok(bx.$int_red(args[0].immediate(), true))
+ },
+ ty::TyUint(_u) => {
+ Ok(bx.$int_red(args[0].immediate(), false))
+ },
+ ty::TyFloat(_f) => {
+ Ok(bx.$float_red(args[0].immediate()))
}
+ _ => {
+ return_error!("unsupported {} from `{}` with element `{}` to `{}`",
+ $name, in_ty, in_elem, ret_ty)
+ },
+ }
+ }
+
+ }
+ }
+
+ minmax_red!("simd_reduce_min": vector_reduce_min, vector_reduce_fmin);
+ minmax_red!("simd_reduce_max": vector_reduce_max, vector_reduce_fmax);
+
+ minmax_red!("simd_reduce_min_nanless": vector_reduce_min, vector_reduce_fmin_fast);
+ minmax_red!("simd_reduce_max_nanless": vector_reduce_max, vector_reduce_fmax_fast);
+
+ macro_rules! bitwise_red {
+ ($name:tt : $red:ident, $boolean:expr) => {
+ if name == $name {
+ let input = if !$boolean {
+ require!(ret_ty == in_elem,
+ "expected return type `{}` (element of input `{}`), found `{}`",
+ in_elem, in_ty, ret_ty);
+ args[0].immediate()
+ } else {
+ // boolean reductions operate on vectors of i1s:
+ let i1 = Type::i1(bx.cx);
+ let i1xn = Type::vector(&i1, in_len as u64);
+ bx.trunc(args[0].immediate(), i1xn)
};
- Ok(bx.vector_reduce_fmul_fast(acc, args[0].immediate()))
+ return match in_elem.sty {
+ ty::TyInt(_) | ty::TyUint(_) => {
+ let r = bx.$red(input);
+ Ok(
+ if !$boolean {
+ r
+ } else {
+ bx.zext(r, Type::bool(bx.cx))
+ }
+ )
+ },
+ _ => {
+ return_error!("unsupported {} from `{}` with element `{}` to `{}`",
+ $name, in_ty, in_elem, ret_ty)
+ },
+ }
}
- _ => {
- return_error!("unsupported {} from `{}` with element `{}` to `{}`",
- "simd_reduce_mul", in_ty, in_elem, ret_ty)
- },
}
}
- if name == "simd_reduce_min" {
- require!(ret_ty == in_elem,
- "expected return type `{}` (element of input `{}`), found `{}`",
- in_elem, in_ty, ret_ty);
- return match in_elem.sty {
- ty::TyInt(_i) => {
- Ok(bx.vector_reduce_min(args[0].immediate(), true))
- },
- ty::TyUint(_u) => {
- Ok(bx.vector_reduce_min(args[0].immediate(), false))
- },
- ty::TyFloat(_f) => {
- Ok(bx.vector_reduce_fmin_fast(args[0].immediate()))
- }
- _ => {
- return_error!("unsupported {} from `{}` with element `{}` to `{}`",
- "simd_reduce_min", in_ty, in_elem, ret_ty)
- },
- }
- }
-
- if name == "simd_reduce_max" {
- require!(ret_ty == in_elem,
- "expected return type `{}` (element of input `{}`), found `{}`",
- in_elem, in_ty, ret_ty);
- return match in_elem.sty {
- ty::TyInt(_i) => {
- Ok(bx.vector_reduce_max(args[0].immediate(), true))
- },
- ty::TyUint(_u) => {
- Ok(bx.vector_reduce_max(args[0].immediate(), false))
- },
- ty::TyFloat(_f) => {
- Ok(bx.vector_reduce_fmax_fast(args[0].immediate()))
- }
- _ => {
- return_error!("unsupported {} from `{}` with element `{}` to `{}`",
- "simd_reduce_max", in_ty, in_elem, ret_ty)
- },
- }
- }
-
- if name == "simd_reduce_and" {
- require!(ret_ty == in_elem,
- "expected return type `{}` (element of input `{}`), found `{}`",
- in_elem, in_ty, ret_ty);
- return match in_elem.sty {
- ty::TyInt(_i) => {
- Ok(bx.vector_reduce_and(args[0].immediate()))
- },
- ty::TyUint(_u) => {
- Ok(bx.vector_reduce_and(args[0].immediate()))
- },
- _ => {
- return_error!("unsupported {} from `{}` with element `{}` to `{}`",
- "simd_reduce_and", in_ty, in_elem, ret_ty)
- },
- }
- }
-
- if name == "simd_reduce_or" {
- require!(ret_ty == in_elem,
- "expected return type `{}` (element of input `{}`), found `{}`",
- in_elem, in_ty, ret_ty);
- return match in_elem.sty {
- ty::TyInt(_i) => {
- Ok(bx.vector_reduce_or(args[0].immediate()))
- },
- ty::TyUint(_u) => {
- Ok(bx.vector_reduce_or(args[0].immediate()))
- },
- _ => {
- return_error!("unsupported {} from `{}` with element `{}` to `{}`",
- "simd_reduce_or", in_ty, in_elem, ret_ty)
- },
- }
- }
-
- if name == "simd_reduce_xor" {
- require!(ret_ty == in_elem,
- "expected return type `{}` (element of input `{}`), found `{}`",
- in_elem, in_ty, ret_ty);
- return match in_elem.sty {
- ty::TyInt(_i) => {
- Ok(bx.vector_reduce_xor(args[0].immediate()))
- },
- ty::TyUint(_u) => {
- Ok(bx.vector_reduce_xor(args[0].immediate()))
- },
- _ => {
- return_error!("unsupported {} from `{}` with element `{}` to `{}`",
- "simd_reduce_xor", in_ty, in_elem, ret_ty)
- },
- }
- }
-
- if name == "simd_reduce_all" {
- //require!(ret_ty == in_elem,
- // "expected return type `{}` (element of input `{}`), found `{}`",
- // in_elem, in_ty, ret_ty);
- let i1 = Type::i1(bx.cx);
- let i1xn = Type::vector(&i1, in_len as u64);
- let v = bx.trunc(args[0].immediate(), i1xn);
-
- let red = match in_elem.sty {
- ty::TyInt(_i) => {
- bx.vector_reduce_and(v)
- },
- ty::TyUint(_u) => {
- bx.vector_reduce_and(v)
- },
- _ => {
- return_error!("unsupported {} from `{}` with element `{}` to `{}`",
- "simd_reduce_and", in_ty, in_elem, ret_ty)
- },
- };
- return Ok(bx.zext(red, Type::bool(bx.cx)));
- }
-
- if name == "simd_reduce_any" {
- //require!(ret_ty == in_elem,
- // "expected return type `{}` (element of input `{}`), found `{}`",
- // in_elem, in_ty, ret_ty);
- let i1 = Type::i1(bx.cx);
- let i1xn = Type::vector(&i1, in_len as u64);
- let v = bx.trunc(args[0].immediate(), i1xn);
-
- let red = match in_elem.sty {
- ty::TyInt(_i) => {
- bx.vector_reduce_or(v)
- },
- ty::TyUint(_u) => {
- bx.vector_reduce_or(v)
- },
- _ => {
- return_error!("unsupported {} from `{}` with element `{}` to `{}`",
- "simd_reduce_and", in_ty, in_elem, ret_ty)
- },
- };
- return Ok(bx.zext(red, Type::bool(bx.cx)));
- }
-
+ bitwise_red!("simd_reduce_and": vector_reduce_and, false);
+ bitwise_red!("simd_reduce_or": vector_reduce_or, false);
+ bitwise_red!("simd_reduce_xor": vector_reduce_xor, false);
+ bitwise_red!("simd_reduce_all": vector_reduce_and, true);
+ bitwise_red!("simd_reduce_any": vector_reduce_or, true);
if name == "simd_cast" {
require_simd!(ret_ty, "return");
diff --git a/src/librustc_typeck/check/intrinsic.rs b/src/librustc_typeck/check/intrinsic.rs
index f2d01c5..b87b8aa 100644
--- a/src/librustc_typeck/check/intrinsic.rs
+++ b/src/librustc_typeck/check/intrinsic.rs
@@ -362,9 +362,12 @@
"simd_extract" => (2, vec![param(0), tcx.types.u32], param(1)),
"simd_cast" => (2, vec![param(0)], param(1)),
"simd_reduce_all" | "simd_reduce_any" => (1, vec![param(0)], tcx.types.bool),
- "simd_reduce_add" | "simd_reduce_mul" |
+ "simd_reduce_add_ordered" | "simd_reduce_mul_ordered"
+ => (2, vec![param(0), param(1)], param(1)),
+ "simd_reduce_add_unordered" | "simd_reduce_mul_unordered" |
"simd_reduce_and" | "simd_reduce_or" | "simd_reduce_xor" |
- "simd_reduce_min" | "simd_reduce_max"
+ "simd_reduce_min" | "simd_reduce_max" |
+ "simd_reduce_min_nanless" | "simd_reduce_max_nanless"
=> (2, vec![param(0)], param(1)),
name if name.starts_with("simd_shuffle") => {
match name["simd_shuffle".len()..].parse() {
diff --git a/src/rustllvm/RustWrapper.cpp b/src/rustllvm/RustWrapper.cpp
index e749549..9d5f904 100644
--- a/src/rustllvm/RustWrapper.cpp
+++ b/src/rustllvm/RustWrapper.cpp
@@ -1397,6 +1397,7 @@
}
// Vector reductions:
+#if LLVM_VERSION_GE(6, 0)
extern "C" LLVMValueRef
LLVMRustBuildVectorReduceFAdd(LLVMBuilderRef B, LLVMValueRef Acc, LLVMValueRef Src) {
return wrap(unwrap(B)->CreateFAddReduce(unwrap(Acc),unwrap(Src)));
@@ -1441,3 +1442,4 @@
LLVMRustBuildVectorReduceFMax(LLVMBuilderRef B, LLVMValueRef Src, bool NoNaN) {
return wrap(unwrap(B)->CreateFPMaxReduce(unwrap(Src), NoNaN));
}
+#endif
diff --git a/src/test/run-pass/simd-intrinsic-generic-reduction.rs b/src/test/run-pass/simd-intrinsic-generic-reduction.rs
index 15b291a..6755c92 100644
--- a/src/test/run-pass/simd-intrinsic-generic-reduction.rs
+++ b/src/test/run-pass/simd-intrinsic-generic-reduction.rs
@@ -39,10 +39,14 @@
);
extern "platform-intrinsic" {
- fn simd_reduce_add<T, U>(x: T) -> U;
- fn simd_reduce_mul<T, U>(x: T) -> U;
+ fn simd_reduce_add_unordered<T, U>(x: T) -> U;
+ fn simd_reduce_mul_unordered<T, U>(x: T) -> U;
+ fn simd_reduce_add_ordered<T, U>(x: T, acc: U) -> U;
+ fn simd_reduce_mul_ordered<T, U>(x: T, acc: U) -> U;
fn simd_reduce_min<T, U>(x: T) -> U;
fn simd_reduce_max<T, U>(x: T) -> U;
+ fn simd_reduce_min_nanless<T, U>(x: T) -> U;
+ fn simd_reduce_max_nanless<T, U>(x: T) -> U;
fn simd_reduce_and<T, U>(x: T) -> U;
fn simd_reduce_or<T, U>(x: T) -> U;
fn simd_reduce_xor<T, U>(x: T) -> U;
@@ -53,91 +57,113 @@
fn main() {
unsafe {
let x = i32x4(1, -2, 3, 4);
- let r: i32 = simd_reduce_add(x);
- assert!(r == 6_i32);
- let r: i32 = simd_reduce_mul(x);
- assert!(r == -24_i32);
+ let r: i32 = simd_reduce_add_unordered(x);
+ assert_eq!(r, 6_i32);
+ let r: i32 = simd_reduce_mul_unordered(x);
+ assert_eq!(r, -24_i32);
+ let r: i32 = simd_reduce_add_ordered(x, -1);
+ assert_eq!(r, 5_i32);
+ let r: i32 = simd_reduce_mul_ordered(x, -1);
+ assert_eq!(r, 24_i32);
+
let r: i32 = simd_reduce_min(x);
- assert!(r == -21_i32);
+ assert_eq!(r, -2_i32);
let r: i32 = simd_reduce_max(x);
- assert!(r == 4_i32);
+ assert_eq!(r, 4_i32);
let x = i32x4(-1, -1, -1, -1);
let r: i32 = simd_reduce_and(x);
- assert!(r == -1_i32);
+ assert_eq!(r, -1_i32);
let r: i32 = simd_reduce_or(x);
- assert!(r == -1_i32);
+ assert_eq!(r, -1_i32);
let r: i32 = simd_reduce_xor(x);
- assert!(r == 0_i32);
+ assert_eq!(r, 0_i32);
let x = i32x4(-1, -1, 0, -1);
let r: i32 = simd_reduce_and(x);
- assert!(r == 0_i32);
+ assert_eq!(r, 0_i32);
let r: i32 = simd_reduce_or(x);
- assert!(r == -1_i32);
+ assert_eq!(r, -1_i32);
let r: i32 = simd_reduce_xor(x);
- assert!(r == -1_i32);
+ assert_eq!(r, -1_i32);
}
unsafe {
let x = u32x4(1, 2, 3, 4);
- let r: u32 = simd_reduce_add(x);
- assert!(r == 10_u32);
- let r: u32 = simd_reduce_mul(x);
- assert!(r == 24_u32);
+ let r: u32 = simd_reduce_add_unordered(x);
+ assert_eq!(r, 10_u32);
+ let r: u32 = simd_reduce_mul_unordered(x);
+ assert_eq!(r, 24_u32);
+ let r: u32 = simd_reduce_add_ordered(x, 1);
+ assert_eq!(r, 11_u32);
+ let r: u32 = simd_reduce_mul_ordered(x, 2);
+ assert_eq!(r, 48_u32);
+
let r: u32 = simd_reduce_min(x);
- assert!(r == 1_u32);
+ assert_eq!(r, 1_u32);
let r: u32 = simd_reduce_max(x);
- assert!(r == 4_u32);
+ assert_eq!(r, 4_u32);
let t = u32::max_value();
let x = u32x4(t, t, t, t);
let r: u32 = simd_reduce_and(x);
- assert!(r == t);
+ assert_eq!(r, t);
let r: u32 = simd_reduce_or(x);
- assert!(r == t);
+ assert_eq!(r, t);
let r: u32 = simd_reduce_xor(x);
- assert!(r == 0_u32);
+ assert_eq!(r, 0_u32);
let x = u32x4(t, t, 0, t);
let r: u32 = simd_reduce_and(x);
- assert!(r == 0_u32);
+ assert_eq!(r, 0_u32);
let r: u32 = simd_reduce_or(x);
- assert!(r == t);
+ assert_eq!(r, t);
let r: u32 = simd_reduce_xor(x);
- assert!(r == t);
+ assert_eq!(r, t);
}
unsafe {
let x = f32x4(1., -2., 3., 4.);
- let r: f32 = simd_reduce_add(x);
- assert!(r == 6_f32);
- let r: f32 = simd_reduce_mul(x);
- assert!(r == -24_f32);
+ let r: f32 = simd_reduce_add_unordered(x);
+ assert_eq!(r, 6_f32);
+ let r: f32 = simd_reduce_mul_unordered(x);
+ assert_eq!(r, -24_f32);
+ // FIXME: only works correctly for accumulator, 0:
+ // https://bugs.llvm.org/show_bug.cgi?id=36734
+ let r: f32 = simd_reduce_add_ordered(x, 0.);
+ assert_eq!(r, 6_f32);
+ // FIXME: only works correctly for accumulator, 1:
+ // https://bugs.llvm.org/show_bug.cgi?id=36734
+ let r: f32 = simd_reduce_mul_ordered(x, 1.);
+ assert_eq!(r, -24_f32);
+
let r: f32 = simd_reduce_min(x);
- assert!(r == -2_f32);
+ assert_eq!(r, -2_f32);
let r: f32 = simd_reduce_max(x);
- assert!(r == 4_f32);
+ assert_eq!(r, 4_f32);
+ let r: f32 = simd_reduce_min_nanless(x);
+ assert_eq!(r, -2_f32);
+ let r: f32 = simd_reduce_max_nanless(x);
+ assert_eq!(r, 4_f32);
}
unsafe {
let x = b8x4(!0, !0, !0, !0);
let r: bool = simd_reduce_all(x);
- //let r: bool = foobar(x);
- assert!(r);
+ assert_eq!(r, true);
let r: bool = simd_reduce_any(x);
- assert!(r);
+ assert_eq!(r, true);
let x = b8x4(!0, !0, 0, !0);
let r: bool = simd_reduce_all(x);
- assert!(!r);
+ assert_eq!(r, false);
let r: bool = simd_reduce_any(x);
- assert!(r);
+ assert_eq!(r, true);
let x = b8x4(0, 0, 0, 0);
let r: bool = simd_reduce_all(x);
- assert!(!r);
+ assert_eq!(r, false);
let r: bool = simd_reduce_any(x);
- assert!(!r);
+ assert_eq!(r, false);
}
}