diff --git a/src/tint/fuzzers/tint_regex_fuzzer/fuzzer.cc b/src/tint/fuzzers/tint_regex_fuzzer/fuzzer.cc index c5d2aba8cd..ea1aea1b87 100644 --- a/src/tint/fuzzers/tint_regex_fuzzer/fuzzer.cc +++ b/src/tint/fuzzers/tint_regex_fuzzer/fuzzer.cc @@ -39,6 +39,7 @@ enum class MutationKind { kInsertReturnStatement, kReplaceOperator, kInsertBreakOrContinue, + kReplaceFunctionCallWithBuiltin, kNumMutationKinds }; @@ -115,6 +116,11 @@ extern "C" size_t LLVMFuzzerCustomMutator(uint8_t* data, return 0; } break; + case MutationKind::kReplaceFunctionCallWithBuiltin: + if (!mutator.ReplaceFunctionCallWithBuiltin(wgsl_code)) { + return 0; + } + break; default: assert(false && "Unreachable"); return 0; diff --git a/src/tint/fuzzers/tint_regex_fuzzer/regex_fuzzer_tests.cc b/src/tint/fuzzers/tint_regex_fuzzer/regex_fuzzer_tests.cc index 55ec02885e..9e0b1a0c3f 100644 --- a/src/tint/fuzzers/tint_regex_fuzzer/regex_fuzzer_tests.cc +++ b/src/tint/fuzzers/tint_regex_fuzzer/regex_fuzzer_tests.cc @@ -31,6 +31,7 @@ class WgslMutatorTest : public WgslMutator { using WgslMutator::FindClosingBrace; using WgslMutator::FindOperatorOccurrence; using WgslMutator::GetFunctionBodyPositions; + using WgslMutator::GetFunctionCallIdentifiers; using WgslMutator::GetIdentifiers; using WgslMutator::GetIntLiterals; using WgslMutator::GetLoopBodyPositions; @@ -236,18 +237,12 @@ TEST(GetIdentifierTest, GetIdentifierTest1) { })"; std::vector> identifiers_pos = mutator.GetIdentifiers(wgsl_code); - std::vector> ground_truth = { - std::make_pair(3, 12), std::make_pair(28, 3), std::make_pair(37, 4), - std::make_pair(49, 5), std::make_pair(60, 3), std::make_pair(68, 4), - std::make_pair(81, 4), std::make_pair(110, 6), std::make_pair(123, 2), - std::make_pair(133, 4), std::make_pair(144, 7), std::make_pair(162, 4), - std::make_pair(183, 12), std::make_pair(209, 6), std::make_pair(221, 3), - std::make_pair(244, 8), std::make_pair(259, 2), std::make_pair(271, 4), - std::make_pair(288, 12), std::make_pair(319, 7), std::make_pair(328, 14), - std::make_pair(352, 2), std::make_pair(363, 4), std::make_pair(381, 3), - std::make_pair(394, 3), std::make_pair(399, 3), std::make_pair(418, 12)}; - + {0, 2}, {3, 12}, {28, 3}, {32, 3}, {37, 4}, {42, 3}, {49, 5}, {55, 4}, + {60, 3}, {68, 4}, {73, 3}, {81, 4}, {86, 3}, {110, 6}, {123, 2}, {126, 11}, + {144, 7}, {152, 8}, {162, 4}, {167, 3}, {183, 12}, {209, 6}, {216, 4}, {221, 3}, + {244, 8}, {259, 2}, {262, 13}, {288, 12}, {319, 7}, {328, 14}, {352, 2}, {355, 12}, + {381, 3}, {385, 7}, {394, 3}, {399, 3}, {418, 12}}; ASSERT_EQ(ground_truth, identifiers_pos); } @@ -603,13 +598,46 @@ TEST(TestInsertBreakOrContinue, TestLoopPositions3) { TEST(TestInsertBreakOrContinue, TestLoopPositions4) { RandomGenerator generator(0); WgslMutatorTest mutator(generator); - // This WGSL-like code is not valid, but it suffices to test regex-based matching (which is - // intended to work well on semi-valid code). - std::string wgsl_code = R"(unifor { } uniform { } sloop { } _loop { } _while { } awhile { } )"; + std::string wgsl_code = + R"(fn clamp_0acf8f() { + var res: vec2 = clamp(vec2(), vec2(), vec2()); + } + @vertex + fn vertex_main() -> @builtin(position) vec4 { + clamp_0acf8f();" + return vec4(); + } + @fragment + fn fragment_main() { + clamp_0acf8f(); + } + @compute @workgroup_size(1) + fn compute_main() {" + var foo: f32 = 0.0; + clamp_0acf8f (); + })"; std::vector loop_positions = mutator.GetLoopBodyPositions(wgsl_code); ASSERT_TRUE(loop_positions.empty()); } +TEST(TestReplaceFunctionCallWithBuiltin, FindFunctionCalls) { + RandomGenerator generator(0); + WgslMutatorTest mutator(generator); + std::string function_body = R"({ + var foo: f32 = 0.0; + var foo_2: i32 = 10; + clamp_0acf8f (); + _0acf8f(); + f +(); + j = (i * 30); + })"; + std::vector> call_identifiers = + mutator.GetFunctionCallIdentifiers(function_body); + std::vector> ground_truth{{82, 12}, {110, 7}, {131, 1}}; + ASSERT_EQ(ground_truth, call_identifiers); +} + } // namespace } // namespace tint::fuzzers::regex_fuzzer diff --git a/src/tint/fuzzers/tint_regex_fuzzer/wgsl_mutator.cc b/src/tint/fuzzers/tint_regex_fuzzer/wgsl_mutator.cc index 0d2083119d..a96561347e 100644 --- a/src/tint/fuzzers/tint_regex_fuzzer/wgsl_mutator.cc +++ b/src/tint/fuzzers/tint_regex_fuzzer/wgsl_mutator.cc @@ -47,18 +47,32 @@ std::vector> WgslMutator::GetIdentifiers(const std::st // by a character which cannot be part of a WGSL identifer. The regex // for the WGSL identifier is obtained from: // https://www.w3.org/TR/WGSL/#identifiers. - std::regex wgsl_identifier_regex("[^a-zA-Z]([a-zA-Z][0-9a-zA-Z_]*)[^0-9a-zA-Z_]"); + std::regex identifier_regex("[_a-zA-Z][0-9a-zA-Z_]*"); - std::smatch match; + auto identifiers_begin = + std::sregex_iterator(wgsl_code.begin(), wgsl_code.end(), identifier_regex); + auto identifiers_end = std::sregex_iterator(); - std::string::const_iterator search_start(wgsl_code.cbegin()); - std::string prefix; + for (std::sregex_iterator i = identifiers_begin; i != identifiers_end; ++i) { + result.push_back( + {static_cast(i->prefix().second - wgsl_code.cbegin()), i->str().size()}); + } + return result; +} - while (regex_search(search_start, wgsl_code.cend(), match, wgsl_identifier_regex) == true) { - prefix += match.prefix(); - result.push_back(std::make_pair(prefix.size() + 1, match.str(1).size())); - prefix += match.str(0); - search_start = match.suffix().first; +std::vector> WgslMutator::GetFunctionCallIdentifiers( + const std::string& wgsl_code) { + std::vector> result; + + std::regex call_regex("([_a-zA-Z][0-9a-zA-Z_]*)[ \\n]*\\("); + + auto identifiers_begin = std::sregex_iterator(wgsl_code.begin(), wgsl_code.end(), call_regex); + auto identifiers_end = std::sregex_iterator(); + + for (std::sregex_iterator i = identifiers_begin; i != identifiers_end; ++i) { + auto submatch = (*i)[1]; + result.push_back( + {static_cast(submatch.first - wgsl_code.cbegin()), submatch.str().size()}); } return result; } @@ -489,4 +503,153 @@ std::optional> WgslMutator::FindOperatorOccurrence return {}; } +bool WgslMutator::ReplaceFunctionCallWithBuiltin(std::string& wgsl_code) { + std::vector> function_body_positions = + GetFunctionBodyPositions(wgsl_code); + + // No function was found in wgsl_code. + if (function_body_positions.empty()) { + return false; + } + + // Pick a random function + auto function = generator_.GetRandomElement(function_body_positions); + + // Find the corresponding closing bracket for the function, and find a semi-colon within the + // function body. + size_t left_bracket_pos = function.first; + + size_t right_bracket_pos = FindClosingBrace(left_bracket_pos, wgsl_code); + + if (right_bracket_pos == 0) { + return false; + } + + std::string function_body( + wgsl_code.substr(left_bracket_pos, right_bracket_pos - left_bracket_pos)); + + std::vector> function_call_identifiers = + GetFunctionCallIdentifiers(function_body); + if (function_call_identifiers.empty()) { + return false; + } + auto function_call_identifier = generator_.GetRandomElement(function_call_identifiers); + + std::vector builtin_functions{"all", + "any", + "select", + "arrayLength", + "abs", + "acos", + "acosh", + "asin", + "asinh", + "atan", + "atanh", + "atan2", + "ceil", + "clamp", + "cos", + "cosh", + "cross", + "degrees", + "distance", + "exp", + "exp2", + "faceForward", + "floor", + "fma", + "fract", + "frexp", + "inverseSqrt", + "ldexp", + "length", + "log", + "log2", + "max", + "min", + "mix", + "modf", + "normalize", + "pow", + "quantizeToF16", + "radians", + "reflect", + "refract", + "round", + "saturate", + "sign", + "sin", + "sinh", + "smoothstep", + "sqrt", + "step", + "tan", + "tanh", + "trunc", + "abs", + "clamp", + "countLeadingZeros", + "countOneBits", + "countTrailingZeros", + "extractBits", + "firstLeadingBit", + "firstTrailingBit", + "insertBits", + "max", + "min", + "reverseBits", + "determinant", + "transpose", + "dot", + "dpdx", + "dpdxCoarse", + "dpdxFine", + "dpdy", + "dpdyCoarse", + "dpdyFine", + "fwidth", + "fwidthCoarse", + "fwidthFine", + "textureDimensions", + "textureGather", + "textureGatherCompare", + "textureLoad", + "textureNumLayers", + "textureNumLevels", + "textureNumSamples", + "textureSample", + "textureSampleBias", + "textureSampleCompare", + "textureSampleCompareLevel", + "textureSampleGrad", + "textureSampleLevel", + "textureStore", + "atomicLoad", + "atomicStore", + "atomicAdd", + "atomicSub", + "atomicMax", + "atomicMin", + "atomicAnd", + "atomicOr", + "atomicXor", + "pack4x8snorm", + "pack4x8unorm", + "pack2x16snorm", + "pack2x16unorm", + "pack2x16float", + "unpack4x8snorm", + "unpack4x8unorm", + "unpack2x16snorm", + "unpack2x16unorm", + "unpack2x16float", + "storageBarrier", + "workgroupBarrier"}; + wgsl_code.replace(left_bracket_pos + function_call_identifier.first, + function_call_identifier.second, + generator_.GetRandomElement(builtin_functions)); + return true; +} + } // namespace tint::fuzzers::regex_fuzzer diff --git a/src/tint/fuzzers/tint_regex_fuzzer/wgsl_mutator.h b/src/tint/fuzzers/tint_regex_fuzzer/wgsl_mutator.h index 5308bf84a5..fde4611d81 100644 --- a/src/tint/fuzzers/tint_regex_fuzzer/wgsl_mutator.h +++ b/src/tint/fuzzers/tint_regex_fuzzer/wgsl_mutator.h @@ -84,6 +84,14 @@ class WgslMutator { /// @return true if an operator replacement happened or false otherwise. bool ReplaceRandomOperator(std::string& wgsl_code); + /// Given a WGSL-like string, replaces a random identifier that appears to be a function call + /// with the name of a built-in function. This will often lead to an invalid module, as the + /// mutation does not aim to check whether the original and replacement function have the same + /// number or types of arguments. + /// @param wgsl_code - the initial WGSL-like string that will be mutated. + /// @return true if a function call replacement happened or false otherwise. + bool ReplaceFunctionCallWithBuiltin(std::string& wgsl_code); + protected: /// Given index idx1 it delets the region of length interval_len /// starting at index idx1; @@ -128,6 +136,13 @@ class WgslMutator { /// identifiers in wgsl_code. std::vector> GetIdentifiers(const std::string& wgsl_code); + /// A function that finds the identifiers in a WGSL-like string that appear to be used as + /// function names in function call expressions. + /// @param wgsl_code - the WGSL-like string where the identifiers will be found. + /// @return a vector with the positions and the length of all the + /// identifiers in wgsl_code. + std::vector> GetFunctionCallIdentifiers(const std::string& wgsl_code); + /// A function that returns returns the starting position /// and the length of all the integer literals in a WGSL-like string. /// @param wgsl_code - the WGSL-like string where the int literals