From b304792dd58d1b56b3c6235a22a950b7c0826fea Mon Sep 17 00:00:00 2001 From: Brendan Dahl Date: Tue, 7 Apr 2026 22:58:11 +0000 Subject: [PATCH] [FP16] Implement f16x8.demote_{f64x2, f32x5}_zero. Specified at https://github.com/WebAssembly/half-precision/blob/main/proposals/half-precision/Overview.md --- scripts/gen-s-parser.py | 2 ++ src/gen-s-parser.inc | 32 ++++++++++++++++++---- src/ir/child-typer.h | 2 ++ src/ir/cost.h | 2 ++ src/literal.h | 2 ++ src/passes/Print.cpp | 6 +++++ src/wasm-binary.h | 2 ++ src/wasm-interpreter.h | 4 +++ src/wasm.h | 2 ++ src/wasm/literal.cpp | 24 +++++++++++++++++ src/wasm/wasm-binary.cpp | 4 +++ src/wasm/wasm-stack.cpp | 8 ++++++ src/wasm/wasm-validator.cpp | 2 ++ src/wasm/wasm.cpp | 2 ++ test/lit/basic/f16.wast | 42 +++++++++++++++++++++++++++++ test/spec/f16.wast | 54 +++++++++++++++++++++++++++++++++++++ 16 files changed, 185 insertions(+), 5 deletions(-) diff --git a/scripts/gen-s-parser.py b/scripts/gen-s-parser.py index d0f08d0546b..aa848ad8a52 100755 --- a/scripts/gen-s-parser.py +++ b/scripts/gen-s-parser.py @@ -550,6 +550,8 @@ ("f16x8.convert_i16x8_s", "makeUnary(UnaryOp::ConvertSVecI16x8ToVecF16x8)"), ("f16x8.convert_i16x8_u", "makeUnary(UnaryOp::ConvertUVecI16x8ToVecF16x8)"), ("f32x4.promote_low_f16x8", "makeUnary(UnaryOp::PromoteLowVecF16x8ToVecF32x4)"), + ("f16x8.demote_f32x4_zero", "makeUnary(UnaryOp::DemoteZeroVecF32x4ToVecF16x8)"), + ("f16x8.demote_f64x2_zero", "makeUnary(UnaryOp::DemoteZeroVecF64x2ToVecF16x8)"), ("f16x8.madd", "makeSIMDTernary(SIMDTernaryOp::MaddVecF16x8)"), ("f16x8.nmadd", "makeSIMDTernary(SIMDTernaryOp::NmaddVecF16x8)"), diff --git a/src/gen-s-parser.inc b/src/gen-s-parser.inc index eca86c6ed77..345afa302aa 100644 --- a/src/gen-s-parser.inc +++ b/src/gen-s-parser.inc @@ -505,12 +505,34 @@ switch (buf[0]) { default: goto parse_error; } } - case 'd': - if (op == "f16x8.div"sv) { - CHECK_ERR(makeBinary(ctx, pos, annotations, BinaryOp::DivVecF16x8)); - return Ok{}; + case 'd': { + switch (buf[7]) { + case 'e': { + switch (buf[14]) { + case '3': + if (op == "f16x8.demote_f32x4_zero"sv) { + CHECK_ERR(makeUnary(ctx, pos, annotations, UnaryOp::DemoteZeroVecF32x4ToVecF16x8)); + return Ok{}; + } + goto parse_error; + case '6': + if (op == "f16x8.demote_f64x2_zero"sv) { + CHECK_ERR(makeUnary(ctx, pos, annotations, UnaryOp::DemoteZeroVecF64x2ToVecF16x8)); + return Ok{}; + } + goto parse_error; + default: goto parse_error; + } + } + case 'i': + if (op == "f16x8.div"sv) { + CHECK_ERR(makeBinary(ctx, pos, annotations, BinaryOp::DivVecF16x8)); + return Ok{}; + } + goto parse_error; + default: goto parse_error; } - goto parse_error; + } case 'e': { switch (buf[7]) { case 'q': diff --git a/src/ir/child-typer.h b/src/ir/child-typer.h index 385e0fa8290..e223eb2ad59 100644 --- a/src/ir/child-typer.h +++ b/src/ir/child-typer.h @@ -448,6 +448,8 @@ template struct ChildTyper : OverriddenVisitor { case ConvertSVecI16x8ToVecF16x8: case ConvertUVecI16x8ToVecF16x8: case PromoteLowVecF16x8ToVecF32x4: + case DemoteZeroVecF32x4ToVecF16x8: + case DemoteZeroVecF64x2ToVecF16x8: case AnyTrueVec128: case AllTrueVecI8x16: case AllTrueVecI16x8: diff --git a/src/ir/cost.h b/src/ir/cost.h index 0042d27bcb2..7c02dafce7b 100644 --- a/src/ir/cost.h +++ b/src/ir/cost.h @@ -285,6 +285,8 @@ struct CostAnalyzer : public OverriddenVisitor { case ConvertSVecI16x8ToVecF16x8: case ConvertUVecI16x8ToVecF16x8: case PromoteLowVecF16x8ToVecF32x4: + case DemoteZeroVecF32x4ToVecF16x8: + case DemoteZeroVecF64x2ToVecF16x8: ret = 1; break; case InvalidUnary: diff --git a/src/literal.h b/src/literal.h index 4fcb2ee8a2e..686348d1942 100644 --- a/src/literal.h +++ b/src/literal.h @@ -724,6 +724,8 @@ class Literal { Literal demoteZeroToF32x4() const; Literal promoteLowToF64x2() const; Literal promoteLowF16x8ToF32x4() const; + Literal demoteZeroF32x4ToF16x8() const; + Literal demoteZeroF64x2ToF16x8() const; Literal truncSatToSI16x8() const; Literal truncSatToUI16x8() const; Literal convertSToF16x8() const; diff --git a/src/passes/Print.cpp b/src/passes/Print.cpp index d043735f315..b8d5990e35a 100644 --- a/src/passes/Print.cpp +++ b/src/passes/Print.cpp @@ -1404,6 +1404,12 @@ struct PrintExpressionContents case PromoteLowVecF16x8ToVecF32x4: o << "f32x4.promote_low_f16x8"; break; + case DemoteZeroVecF32x4ToVecF16x8: + o << "f16x8.demote_f32x4_zero"; + break; + case DemoteZeroVecF64x2ToVecF16x8: + o << "f16x8.demote_f64x2_zero"; + break; case InvalidUnary: WASM_UNREACHABLE("unvalid unary operator"); } diff --git a/src/wasm-binary.h b/src/wasm-binary.h index 386f495a905..90a32dff114 100644 --- a/src/wasm-binary.h +++ b/src/wasm-binary.h @@ -1126,6 +1126,8 @@ enum ASTNodes { I16x8TruncSatF16x8U = 0x146, F16x8ConvertI16x8S = 0x147, F16x8ConvertI16x8U = 0x148, + F16x8DemoteF32x4Zero = 0x149, + F16x8DemoteF64x2Zero = 0x14a, F32x4PromoteLowF16x8 = 0x14b, // bulk memory opcodes diff --git a/src/wasm-interpreter.h b/src/wasm-interpreter.h index e47559b597d..a57f05ea66f 100644 --- a/src/wasm-interpreter.h +++ b/src/wasm-interpreter.h @@ -1166,6 +1166,10 @@ class ExpressionRunner : public OverriddenVisitor { return value.convertUToF16x8(); case PromoteLowVecF16x8ToVecF32x4: return value.promoteLowF16x8ToF32x4(); + case DemoteZeroVecF32x4ToVecF16x8: + return value.demoteZeroF32x4ToF16x8(); + case DemoteZeroVecF64x2ToVecF16x8: + return value.demoteZeroF64x2ToF16x8(); case InvalidUnary: WASM_UNREACHABLE("invalid unary op"); } diff --git a/src/wasm.h b/src/wasm.h index 5935cd47c66..738a874fde1 100644 --- a/src/wasm.h +++ b/src/wasm.h @@ -252,6 +252,8 @@ enum UnaryOp { ConvertSVecI16x8ToVecF16x8, ConvertUVecI16x8ToVecF16x8, PromoteLowVecF16x8ToVecF32x4, + DemoteZeroVecF32x4ToVecF16x8, + DemoteZeroVecF64x2ToVecF16x8, InvalidUnary }; diff --git a/src/wasm/literal.cpp b/src/wasm/literal.cpp index b3156fab0b3..66728712ac8 100644 --- a/src/wasm/literal.cpp +++ b/src/wasm/literal.cpp @@ -2912,6 +2912,30 @@ Literal Literal::truncSatZeroUToI32x4() const { Literal Literal::demoteZeroToF32x4() const { return unary_zero<4, &Literal::getLanesF64x2, &Literal::demote>(*this); } +Literal Literal::demoteZeroF32x4ToF16x8() const { + auto lanes = getLanesF32x4(); + LaneArray<8> result; + for (size_t i = 0; i < 4; ++i) { + result[i] = Literal(fp16_ieee_from_fp32_value(lanes[i].getf32())); + } + for (size_t i = 4; i < 8; ++i) { + result[i] = Literal(int32_t(0)); + } + return Literal(result); +} + +Literal Literal::demoteZeroF64x2ToF16x8() const { + auto lanes = getLanesF64x2(); + LaneArray<8> result; + for (size_t i = 0; i < 2; ++i) { + result[i] = Literal(fp16_ieee_from_fp32_value(lanes[i].demote().getf32())); + } + for (size_t i = 2; i < 8; ++i) { + result[i] = Literal(int32_t(0)); + } + return Literal(result); +} + Literal Literal::promoteLowToF64x2() const { return extendF32(*this); } diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp index da49533f55d..f801a93d1bd 100644 --- a/src/wasm/wasm-binary.cpp +++ b/src/wasm/wasm-binary.cpp @@ -4474,6 +4474,10 @@ Result<> WasmBinaryReader::readInst() { return builder.makeUnary(ConvertSVecI16x8ToVecF16x8); case BinaryConsts::F16x8ConvertI16x8U: return builder.makeUnary(ConvertUVecI16x8ToVecF16x8); + case BinaryConsts::F16x8DemoteF32x4Zero: + return builder.makeUnary(DemoteZeroVecF32x4ToVecF16x8); + case BinaryConsts::F16x8DemoteF64x2Zero: + return builder.makeUnary(DemoteZeroVecF64x2ToVecF16x8); case BinaryConsts::F32x4PromoteLowF16x8: return builder.makeUnary(PromoteLowVecF16x8ToVecF32x4); case BinaryConsts::I8x16ExtractLaneS: diff --git a/src/wasm/wasm-stack.cpp b/src/wasm/wasm-stack.cpp index cb308271bc7..21a5bda5a94 100644 --- a/src/wasm/wasm-stack.cpp +++ b/src/wasm/wasm-stack.cpp @@ -1459,6 +1459,14 @@ void BinaryInstWriter::visitUnary(Unary* curr) { o << static_cast(BinaryConsts::SIMDPrefix) << U32LEB(BinaryConsts::F16x8ConvertI16x8U); break; + case DemoteZeroVecF32x4ToVecF16x8: + o << static_cast(BinaryConsts::SIMDPrefix) + << U32LEB(BinaryConsts::F16x8DemoteF32x4Zero); + break; + case DemoteZeroVecF64x2ToVecF16x8: + o << static_cast(BinaryConsts::SIMDPrefix) + << U32LEB(BinaryConsts::F16x8DemoteF64x2Zero); + break; case PromoteLowVecF16x8ToVecF32x4: o << static_cast(BinaryConsts::SIMDPrefix) << U32LEB(BinaryConsts::F32x4PromoteLowF16x8); diff --git a/src/wasm/wasm-validator.cpp b/src/wasm/wasm-validator.cpp index f8c394072dd..3e809d0157d 100644 --- a/src/wasm/wasm-validator.cpp +++ b/src/wasm/wasm-validator.cpp @@ -2381,6 +2381,8 @@ void FunctionValidator::visitUnary(Unary* curr) { case DemoteZeroVecF64x2ToVecF32x4: case PromoteLowVecF32x4ToVecF64x2: case PromoteLowVecF16x8ToVecF32x4: + case DemoteZeroVecF32x4ToVecF16x8: + case DemoteZeroVecF64x2ToVecF16x8: case RelaxedTruncSVecF32x4ToVecI32x4: case RelaxedTruncUVecF32x4ToVecI32x4: case RelaxedTruncZeroSVecF64x2ToVecI32x4: diff --git a/src/wasm/wasm.cpp b/src/wasm/wasm.cpp index a77a25ce874..cbc0c48bd20 100644 --- a/src/wasm/wasm.cpp +++ b/src/wasm/wasm.cpp @@ -715,6 +715,8 @@ void Unary::finalize() { case ConvertSVecI16x8ToVecF16x8: case ConvertUVecI16x8ToVecF16x8: case PromoteLowVecF16x8ToVecF32x4: + case DemoteZeroVecF32x4ToVecF16x8: + case DemoteZeroVecF64x2ToVecF16x8: type = Type::v128; break; case AnyTrueVec128: diff --git a/test/lit/basic/f16.wast b/test/lit/basic/f16.wast index d5e204d87f7..43ab593ab09 100644 --- a/test/lit/basic/f16.wast +++ b/test/lit/basic/f16.wast @@ -613,6 +613,36 @@ (local.get $0) ) ) + ;; CHECK-TEXT: (func $f16x8.demote_f32x4_zero (type $1) (param $0 v128) (result v128) + ;; CHECK-TEXT-NEXT: (f16x8.demote_f32x4_zero + ;; CHECK-TEXT-NEXT: (local.get $0) + ;; CHECK-TEXT-NEXT: ) + ;; CHECK-TEXT-NEXT: ) + ;; CHECK-BIN: (func $f16x8.demote_f32x4_zero (type $1) (param $0 v128) (result v128) + ;; CHECK-BIN-NEXT: (f16x8.demote_f32x4_zero + ;; CHECK-BIN-NEXT: (local.get $0) + ;; CHECK-BIN-NEXT: ) + ;; CHECK-BIN-NEXT: ) + (func $f16x8.demote_f32x4_zero (param $0 v128) (result v128) + (f16x8.demote_f32x4_zero + (local.get $0) + ) + ) + ;; CHECK-TEXT: (func $f16x8.demote_f64x2_zero (type $1) (param $0 v128) (result v128) + ;; CHECK-TEXT-NEXT: (f16x8.demote_f64x2_zero + ;; CHECK-TEXT-NEXT: (local.get $0) + ;; CHECK-TEXT-NEXT: ) + ;; CHECK-TEXT-NEXT: ) + ;; CHECK-BIN: (func $f16x8.demote_f64x2_zero (type $1) (param $0 v128) (result v128) + ;; CHECK-BIN-NEXT: (f16x8.demote_f64x2_zero + ;; CHECK-BIN-NEXT: (local.get $0) + ;; CHECK-BIN-NEXT: ) + ;; CHECK-BIN-NEXT: ) + (func $f16x8.demote_f64x2_zero (param $0 v128) (result v128) + (f16x8.demote_f64x2_zero + (local.get $0) + ) + ) ) ;; CHECK-BIN-NODEBUG: (type $0 (func (param v128 v128) (result v128))) @@ -849,3 +879,15 @@ ;; CHECK-BIN-NODEBUG-NEXT: (local.get $0) ;; CHECK-BIN-NODEBUG-NEXT: ) ;; CHECK-BIN-NODEBUG-NEXT: ) + +;; CHECK-BIN-NODEBUG: (func $33 (type $1) (param $0 v128) (result v128) +;; CHECK-BIN-NODEBUG-NEXT: (f16x8.demote_f32x4_zero +;; CHECK-BIN-NODEBUG-NEXT: (local.get $0) +;; CHECK-BIN-NODEBUG-NEXT: ) +;; CHECK-BIN-NODEBUG-NEXT: ) + +;; CHECK-BIN-NODEBUG: (func $34 (type $1) (param $0 v128) (result v128) +;; CHECK-BIN-NODEBUG-NEXT: (f16x8.demote_f64x2_zero +;; CHECK-BIN-NODEBUG-NEXT: (local.get $0) +;; CHECK-BIN-NODEBUG-NEXT: ) +;; CHECK-BIN-NODEBUG-NEXT: ) diff --git a/test/spec/f16.wast b/test/spec/f16.wast index a36d5032d4f..5b700d05a3c 100644 --- a/test/spec/f16.wast +++ b/test/spec/f16.wast @@ -39,6 +39,8 @@ (func (export "f16x8.convert_i16x8_s") (param $0 v128) (result v128) (f16x8.convert_i16x8_s (local.get $0))) (func (export "f16x8.convert_i16x8_u") (param $0 v128) (result v128) (f16x8.convert_i16x8_u (local.get $0))) (func (export "f32x4.promote_low_f16x8") (param $0 v128) (result v128) (f32x4.promote_low_f16x8 (local.get $0))) + (func (export "f16x8.demote_f32x4_zero") (param $0 v128) (result v128) (f16x8.demote_f32x4_zero (local.get $0))) + (func (export "f16x8.demote_f64x2_zero") (param $0 v128) (result v128) (f16x8.demote_f64x2_zero (local.get $0))) ;; Multiple operation tests: (func (export "splat_replace") (result v128) (f16x8.replace_lane 0 (f16x8.splat (f32.const 1)) (f32.const 99)) ) @@ -268,3 +270,55 @@ (v128.const i16x8 0x0001 0 0 0 0 0 0 0)) ;; 2^-24 (v128.const i32x4 0x33800000 0 0 0)) + +(assert_return (invoke "f16x8.demote_f32x4_zero" + ;; 1.0 2.0 3.0 4.0 + (v128.const i32x4 0x3f800000 0x40000000 0x40400000 0x40800000)) + ;; 1.0 2.0 3.0 4.0 0 0 0 0 + (v128.const i16x8 0x3c00 0x4000 0x4200 0x4400 0 0 0 0)) + +(assert_return (invoke "f16x8.demote_f64x2_zero" + ;; 1.0 2.0 + (v128.const i64x2 0x3ff0000000000000 0x4000000000000000)) + ;; 1.0 2.0 0 0 0 0 0 0 + (v128.const i16x8 0x3c00 0x4000 0 0 0 0 0 0)) + +;; Edge cases: Infinities, NaNs, Zeros +(assert_return (invoke "f16x8.demote_f32x4_zero" + ;; inf -inf nan -0.0 + (v128.const i32x4 0x7f800000 0xff800000 0x7fc00000 0x80000000)) + ;; inf -inf nan -0.0 0 0 0 0 + (v128.const i16x8 0x7c00 0xfc00 0x7e00 0x8000 0 0 0 0)) + +;; Edge cases: Overflow +(assert_return (invoke "f16x8.demote_f32x4_zero" + ;; 1e5 -1e5 65504 -65504 + (v128.const i32x4 0x47c35000 0xc7c35000 0x477fe000 0xc77fe000)) + ;; inf -inf 65504 -65504 0 0 0 0 + (v128.const i16x8 0x7c00 0xfc00 0x7bff 0xfbff 0 0 0 0)) + +;; Edge cases: Infinities, NaNs, Zeros +(assert_return (invoke "f16x8.demote_f64x2_zero" + ;; inf -inf + (v128.const i64x2 0x7ff0000000000000 0xfff0000000000000)) + ;; inf -inf 0 0 0 0 0 0 + (v128.const i16x8 0x7c00 0xfc00 0 0 0 0 0 0)) + +(assert_return (invoke "f16x8.demote_f64x2_zero" + ;; nan -0.0 + (v128.const i64x2 0x7ff8000000000000 0x8000000000000000)) + ;; nan -0.0 0 0 0 0 0 0 + (v128.const i16x8 0x7e00 0x8000 0 0 0 0 0 0)) + +;; Edge cases: Overflow +(assert_return (invoke "f16x8.demote_f64x2_zero" + ;; 1e5 -1e5 + (v128.const i64x2 0x40f86a0000000000 0xc0f86a0000000000)) + ;; inf -inf 0 0 0 0 0 0 + (v128.const i16x8 0x7c00 0xfc00 0 0 0 0 0 0)) + +(assert_return (invoke "f16x8.demote_f64x2_zero" + ;; 65504 -65504 + (v128.const i64x2 0x40effc0000000000 0xc0effc0000000000)) + ;; 65504 -65504 0 0 0 0 0 0 + (v128.const i16x8 0x7bff 0xfbff 0 0 0 0 0 0))