mirror of
https://github.com/encounter/dawn-cmake.git
synced 2025-12-10 05:57:51 +00:00
tint: Implement const-eval of quantizeToF16
Bug: tint:1581 Change-Id: I5cf9806bde7875282d3b67731dbb88666523f598 Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/108142 Reviewed-by: Antonio Maiorano <amaiorano@google.com> Commit-Queue: Ben Clayton <bclayton@google.com> Kokoro: Ben Clayton <bclayton@google.com> Kokoro: Kokoro <noreply+kokoro@google.com>
This commit is contained in:
committed by
Dawn LUCI CQ
parent
749abeaafb
commit
c81f9dce07
@@ -514,8 +514,8 @@ fn pack4x8snorm(vec4<f32>) -> u32
|
||||
fn pack4x8unorm(vec4<f32>) -> u32
|
||||
fn pow<T: f32_f16>(T, T) -> T
|
||||
fn pow<N: num, T: f32_f16>(vec<N, T>, vec<N, T>) -> vec<N, T>
|
||||
fn quantizeToF16(f32) -> f32
|
||||
fn quantizeToF16<N: num>(vec<N, f32>) -> vec<N, f32>
|
||||
@const fn quantizeToF16(f32) -> f32
|
||||
@const fn quantizeToF16<N: num>(vec<N, f32>) -> vec<N, f32>
|
||||
fn radians<T: f32_f16>(T) -> T
|
||||
fn radians<N: num, T: f32_f16>(vec<N, T>) -> vec<N, T>
|
||||
fn reflect<N: num, T: f32_f16>(vec<N, T>, vec<N, T>) -> vec<N, T>
|
||||
|
||||
@@ -1869,6 +1869,27 @@ ConstEval::Result ConstEval::step(const sem::Type* ty,
|
||||
return TransformElements(builder, ty, transform, args[0], args[1]);
|
||||
}
|
||||
|
||||
ConstEval::Result ConstEval::quantizeToF16(const sem::Type* ty,
|
||||
utils::VectorRef<const sem::Constant*> args,
|
||||
const Source&) {
|
||||
auto transform = [&](const sem::Constant* c) {
|
||||
auto conv = CheckedConvert<f32>(f16(c->As<f32>()));
|
||||
if (!conv) {
|
||||
// https://www.w3.org/TR/WGSL/#quantizeToF16-builtin
|
||||
// If e is outside the finite range of binary16, then the result is any value of type
|
||||
// f32
|
||||
switch (conv.Failure()) {
|
||||
case ConversionFailure::kExceedsNegativeLimit:
|
||||
return CreateElement(builder, c->Type(), f16(f16::kLowestValue));
|
||||
case ConversionFailure::kExceedsPositiveLimit:
|
||||
return CreateElement(builder, c->Type(), f16(f16::kHighestValue));
|
||||
}
|
||||
}
|
||||
return CreateElement(builder, c->Type(), conv.Get());
|
||||
};
|
||||
return TransformElements(builder, ty, transform, args[0]);
|
||||
}
|
||||
|
||||
ConstEval::Result ConstEval::Convert(const sem::Type* target_ty,
|
||||
const sem::Constant* value,
|
||||
const Source& source) {
|
||||
|
||||
@@ -530,6 +530,15 @@ class ConstEval {
|
||||
utils::VectorRef<const sem::Constant*> args,
|
||||
const Source& source);
|
||||
|
||||
/// quantizeToF16 builtin
|
||||
/// @param ty the expression type
|
||||
/// @param args the input arguments
|
||||
/// @param source the source location of the conversion
|
||||
/// @return the result value, or null if the value cannot be calculated
|
||||
Result quantizeToF16(const sem::Type* ty,
|
||||
utils::VectorRef<const sem::Constant*> args,
|
||||
const Source& source);
|
||||
|
||||
private:
|
||||
/// Adds the given error message to the diagnostics
|
||||
void AddError(const std::string& msg, const Source& source) const;
|
||||
|
||||
@@ -842,5 +842,62 @@ INSTANTIATE_TEST_SUITE_P( //
|
||||
StepCases<f32>(),
|
||||
StepCases<f16>()))));
|
||||
|
||||
std::vector<Case> QuantizeToF16Cases() {
|
||||
return {
|
||||
C({0_f}, 0_f), //
|
||||
C({-0_f}, -0_f), //
|
||||
C({1_f}, 1_f), //
|
||||
C({-1_f}, -1_f), //
|
||||
|
||||
// 0.00006106496 quantized to 0.000061035156 = 0x1p-14
|
||||
C({0.00006106496_f}, 0.000061035156_f), //
|
||||
C({-0.00006106496_f}, -0.000061035156_f), //
|
||||
|
||||
// 1.0004883 quantized to 1.0 = 0x1p0
|
||||
C({1.0004883_f}, 1.0_f), //
|
||||
C({-1.0004883_f}, -1.0_f), //
|
||||
|
||||
// 8196.0 quantized to 8192.0 = 0x1p13
|
||||
C({8196_f}, 8192_f), //
|
||||
C({-8196_f}, -8192_f), //
|
||||
|
||||
// Value in subnormal f16 range
|
||||
C({0x0.034p-14_f}, 0x0.034p-14_f), //
|
||||
C({-0x0.034p-14_f}, -0x0.034p-14_f), //
|
||||
C({0x0.068p-14_f}, 0x0.068p-14_f), //
|
||||
C({-0x0.068p-14_f}, -0x0.068p-14_f), //
|
||||
|
||||
// 0x0.06b7p-14 quantized to 0x0.068p-14
|
||||
C({0x0.06b7p-14_f}, 0x0.068p-14_f), //
|
||||
C({-0x0.06b7p-14_f}, -0x0.068p-14_f), //
|
||||
|
||||
// Value out of f16 range
|
||||
C({65504.003_f}, 65504_f), //
|
||||
C({-65504.003_f}, -65504_f), //
|
||||
C({0x1.234p56_f}, 65504_f), //
|
||||
C({-0x4.321p65_f}, -65504_f), //
|
||||
|
||||
// Vector tests
|
||||
C({Vec(0_f, -0_f)}, Vec(0_f, -0_f)), //
|
||||
C({Vec(1_f, -1_f)}, Vec(1_f, -1_f)), //
|
||||
|
||||
C({Vec(0.00006106496_f, -0.00006106496_f, 1.0004883_f, -1.0004883_f)},
|
||||
Vec(0.000061035156_f, -0.000061035156_f, 1.0_f, -1.0_f)),
|
||||
|
||||
C({Vec(8196_f, 8192_f, 0x0.034p-14_f)}, Vec(8192_f, 8192_f, 0x0.034p-14_f)),
|
||||
|
||||
C({Vec(0x0.034p-14_f, -0x0.034p-14_f, 0x0.068p-14_f, -0x0.068p-14_f)},
|
||||
Vec(0x0.034p-14_f, -0x0.034p-14_f, 0x0.068p-14_f, -0x0.068p-14_f)),
|
||||
|
||||
C({Vec(65504.003_f, 0x1.234p56_f)}, Vec(65504_f, 65504_f)),
|
||||
C({Vec(-0x1.234p56_f, -65504.003_f)}, Vec(-65504_f, -65504_f)),
|
||||
};
|
||||
}
|
||||
INSTANTIATE_TEST_SUITE_P( //
|
||||
QuantizeToF16,
|
||||
ResolverConstEvalBuiltinTest,
|
||||
testing::Combine(testing::Values(sem::BuiltinType::kQuantizeToF16),
|
||||
testing::ValuesIn(QuantizeToF16Cases())));
|
||||
|
||||
} // namespace
|
||||
} // namespace tint::resolver
|
||||
|
||||
@@ -12302,7 +12302,7 @@ constexpr OverloadInfo kOverloads[] = {
|
||||
/* parameters */ &kParameters[880],
|
||||
/* return matcher indices */ &kMatcherIndices[62],
|
||||
/* flags */ OverloadFlags(OverloadFlag::kIsBuiltin, OverloadFlag::kSupportsVertexPipeline, OverloadFlag::kSupportsFragmentPipeline, OverloadFlag::kSupportsComputePipeline),
|
||||
/* const eval */ nullptr,
|
||||
/* const eval */ &ConstEval::quantizeToF16,
|
||||
},
|
||||
{
|
||||
/* [331] */
|
||||
@@ -12314,7 +12314,7 @@ constexpr OverloadInfo kOverloads[] = {
|
||||
/* parameters */ &kParameters[879],
|
||||
/* return matcher indices */ &kMatcherIndices[60],
|
||||
/* flags */ OverloadFlags(OverloadFlag::kIsBuiltin, OverloadFlag::kSupportsVertexPipeline, OverloadFlag::kSupportsFragmentPipeline, OverloadFlag::kSupportsComputePipeline),
|
||||
/* const eval */ nullptr,
|
||||
/* const eval */ &ConstEval::quantizeToF16,
|
||||
},
|
||||
{
|
||||
/* [332] */
|
||||
|
||||
Reference in New Issue
Block a user