Skip to content

Commit

Permalink
Add support for i16x8.q15mulr_sat_s in wasm (#5853)
Browse files Browse the repository at this point in the history
* Add support for i16x8.q15mulr_sat_s in wasm

Also, some drive-by clarifications to other wasm-simd instructions in simd_op_check -- some of the yet-to-be-implemented ones are of dubious use in Halide and may not be worth implementing.
  • Loading branch information
steven-johnson authored Mar 30, 2021
1 parent 07f880e commit e0461e9
Show file tree
Hide file tree
Showing 2 changed files with 55 additions and 7 deletions.
38 changes: 38 additions & 0 deletions src/CodeGen_WebAssembly.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ class CodeGen_WebAssembly : public CodeGen_Posix {
bool use_soft_float_abi() const override;
int native_vector_bits() const override;
bool use_pic() const override;

void visit(const Cast *) override;
void codegen_vector_reduce(const VectorReduce *, const Expr &) override;
};

Expand Down Expand Up @@ -91,6 +93,9 @@ const WasmIntrinsic intrinsic_defs[] = {
// since the result will be the same for our purposes here
{"llvm.wasm.extadd.pairwise.unsigned.v8i16", Int(16, 8), "pairwise_widening_add", {UInt(8, 16)}, Target::WasmSimd128},
{"llvm.wasm.extadd.pairwise.unsigned.v4i32", Int(32, 4), "pairwise_widening_add", {UInt(16, 8)}, Target::WasmSimd128},

// Basically like ARM's SQRDMULH
{"llvm.wasm.q15mulr.sat.signed", Int(16, 8), "q15mulr_sat_s", {Int(16, 8), Int(16, 8)}, Target::WasmSimd128},
#endif

// TODO: LLVM should support this directly, but doesn't yet.
Expand Down Expand Up @@ -128,6 +133,39 @@ void CodeGen_WebAssembly::init_module() {
}
}

void CodeGen_WebAssembly::visit(const Cast *op) {
#if LLVM_VERSION >= 130
struct Pattern {
std::string intrin; ///< Name of the intrinsic
Expr pattern; ///< The pattern to match against
Target::Feature required_feature;
};

// clang-format off
static const Pattern patterns[] = {
{"q15mulr_sat_s", i16_sat(rounding_shift_right(widening_mul(wild_i16x_, wild_i16x_), u16(15))), Target::WasmSimd128},
};
// clang-format on

if (op->type.is_vector()) {
std::vector<Expr> matches;
for (const Pattern &p : patterns) {
if (!target.has_feature(p.required_feature)) {
continue;
}
if (expr_match(p.pattern, op, matches)) {
value = call_overloaded_intrin(op->type, p.intrin, matches);
if (value) {
return;
}
}
}
}
#endif // LLVM_VERSION >= 130

CodeGen_Posix::visit(op);
}

void CodeGen_WebAssembly::codegen_vector_reduce(const VectorReduce *op, const Expr &init) {
#if LLVM_VERSION >= 130
struct Pattern {
Expand Down
24 changes: 17 additions & 7 deletions test/correctness/simd_op_check.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1809,9 +1809,12 @@ class SimdOpCheck : public SimdOpCheckTest {
check("i8x16.sub_" + sat + "_u", 16 * w, u8_sat(i16(u8_1) - i16(u8_2)));
check("i16x8.sub_" + sat + "_u", 8 * w, u16_sat(i32(u16_1) - i32(u16_2)));

// Saturating integer Q-format rounding multiplication
// TODO: see arm's qrdmulh, probably
// check("i16x8.q15mulr_sat_s", ???, ???);
if (Halide::Internal::get_llvm_version() >= 130) {
// Saturating integer Q-format rounding multiplication
// Note: division in Halide always rounds down (not towards
// zero). Otherwise these patterns would be more complicated.
check("i16x8.q15mulr_sat_s", 8 * w, i16_sat((i32(i16_1) * i32(i16_2) + (1 << 14)) / (1 << 15)));
}

// Lane-wise integer minimum
check("i8x16.min_s", 16 * w, min(i8_1, i8_2));
Expand Down Expand Up @@ -1936,12 +1939,17 @@ class SimdOpCheck : public SimdOpCheckTest {
// check("i8x16.popcnt", 8 * w, popcount(i32_1));
// check("i8x16.popcnt", 8 * w, popcount(u32_1));

// Any lane true
// All lanes true
// TODO: does Halide have any idiom that obviously generates these?
// Any lane true -- for VectorReduce::Or on 8-bit data
// All lanes true -- for VectorReduce::And on 8-bit data
// TODO: does Halide have any idiom that could usefully use these?
// - v128.any_true could be used for VectorReduce::Or with type bool.
// - i8x16.all_true could be used for VectorReduce::And with type bool.
// - the other all_true variants seem unlikely to be obviously useful in Halide.

// Bitmask extraction
// TODO:
// TODO: does Halide have any idiom that could usefully use these?
// They all extract the high bit of each lane and return a scalar mask of them.
// These all seem unlikely to be obviously useful in Halide.
// check("i8x16.bitmask", 16 * w, ???);
// check("i16x8.bitmask", 8 * w, ???);
// check("i32x4.bitmask", 4 * w, ???);
Expand Down Expand Up @@ -2004,6 +2012,8 @@ class SimdOpCheck : public SimdOpCheckTest {

// Load and Zero-Pad
// TODO
// check("v128.load32_zero", 2 * w, in_u32(0));
// check("v128.load64_zero", 2 * w, in_u64(0));

// Load vector with identical lanes
if (Halide::Internal::get_llvm_version() >= 120) {
Expand Down

0 comments on commit e0461e9

Please sign in to comment.