diff --git a/src/tint/fuzzers/tint_regex_fuzzer/fuzzer.cc b/src/tint/fuzzers/tint_regex_fuzzer/fuzzer.cc index ea1aea1b87..9ffea24a5e 100644 --- a/src/tint/fuzzers/tint_regex_fuzzer/fuzzer.cc +++ b/src/tint/fuzzers/tint_regex_fuzzer/fuzzer.cc @@ -40,6 +40,7 @@ enum class MutationKind { kReplaceOperator, kInsertBreakOrContinue, kReplaceFunctionCallWithBuiltin, + kAddSwizzle, kNumMutationKinds }; @@ -121,6 +122,11 @@ extern "C" size_t LLVMFuzzerCustomMutator(uint8_t* data, return 0; } break; + case MutationKind::kAddSwizzle: + if (!mutator.AddSwizzle(wgsl_code)) { + return 0; + } + break; default: assert(false && "Unreachable"); return 0; diff --git a/src/tint/fuzzers/tint_regex_fuzzer/regex_fuzzer_tests.cc b/src/tint/fuzzers/tint_regex_fuzzer/regex_fuzzer_tests.cc index 9e0b1a0c3f..fb1d5089a6 100644 --- a/src/tint/fuzzers/tint_regex_fuzzer/regex_fuzzer_tests.cc +++ b/src/tint/fuzzers/tint_regex_fuzzer/regex_fuzzer_tests.cc @@ -28,13 +28,15 @@ class WgslMutatorTest : public WgslMutator { using WgslMutator::DeleteInterval; using WgslMutator::DuplicateInterval; - using WgslMutator::FindClosingBrace; + using WgslMutator::FindClosingBracket; using WgslMutator::FindOperatorOccurrence; using WgslMutator::GetFunctionBodyPositions; using WgslMutator::GetFunctionCallIdentifiers; using WgslMutator::GetIdentifiers; using WgslMutator::GetIntLiterals; using WgslMutator::GetLoopBodyPositions; + using WgslMutator::GetSwizzles; + using WgslMutator::GetVectorConstructors; using WgslMutator::ReplaceRegion; using WgslMutator::SwapIntervals; }; @@ -238,11 +240,8 @@ TEST(GetIdentifierTest, GetIdentifierTest1) { std::vector> identifiers_pos = mutator.GetIdentifiers(wgsl_code); std::vector> ground_truth = { - {0, 2}, {3, 12}, {28, 3}, {32, 3}, {37, 4}, {42, 3}, {49, 5}, {55, 4}, - {60, 3}, {68, 4}, {73, 3}, {81, 4}, {86, 3}, {110, 6}, {123, 2}, {126, 11}, - {144, 7}, {152, 8}, {162, 4}, {167, 3}, {183, 12}, {209, 6}, {216, 4}, {221, 3}, - {244, 8}, {259, 2}, {262, 13}, {288, 12}, {319, 7}, {328, 14}, {352, 2}, {355, 12}, - {381, 3}, {385, 7}, {394, 3}, {399, 3}, {418, 12}}; + {3, 12}, {32, 3}, {49, 5}, {126, 11}, {144, 7}, {152, 8}, {183, 12}, + {262, 13}, {288, 12}, {328, 14}, {355, 12}, {385, 7}, {394, 3}, {418, 12}}; ASSERT_EQ(ground_truth, identifiers_pos); } @@ -316,7 +315,8 @@ TEST(InsertReturnTest, FindClosingBrace) { var foo_3 : i32 = -20; )"; size_t opening_bracket_pos = 18; - size_t closing_bracket_pos = mutator.FindClosingBrace(opening_bracket_pos, wgsl_code); + size_t closing_bracket_pos = + mutator.FindClosingBracket(opening_bracket_pos, wgsl_code, '{', '}'); // The -1 is needed since the function body starts after the left bracket. std::string function_body = @@ -363,7 +363,8 @@ TEST(InsertReturnTest, FindClosingBraceFailing) { foo_1 = 5 + 7; var foo_3 : i32 = -20;)"; size_t opening_bracket_pos = 18; - size_t closing_bracket_pos = mutator.FindClosingBrace(opening_bracket_pos, wgsl_code); + size_t closing_bracket_pos = + mutator.FindClosingBracket(opening_bracket_pos, wgsl_code, '{', '}'); // The -1 is needed since the function body starts after the left bracket. std::string function_body = @@ -639,5 +640,30 @@ TEST(TestReplaceFunctionCallWithBuiltin, FindFunctionCalls) { ASSERT_EQ(ground_truth, call_identifiers); } +TEST(TestAddSwizzle, FindSwizzles) { + RandomGenerator generator(0); + WgslMutatorTest mutator(generator); + std::string code = R"(x +v.xxyy.wz.x; +u.rgba.rrg.b)"; + std::vector> swizzles = mutator.GetSwizzles(code); + std::vector> ground_truth{{3, 5}, {8, 3}, {11, 2}, + {16, 5}, {21, 4}, {25, 2}}; + ASSERT_EQ(ground_truth, swizzles); +} + +TEST(TestAddSwizzle, FindVectorConstructors) { + RandomGenerator generator(0); + WgslMutatorTest mutator(generator); + std::string code = R"( +vec4(vec2(1, 2), vec2(3)) + +vec2(1, abs(abs(2))) +)"; + std::vector> swizzles = mutator.GetVectorConstructors(code); + std::vector> ground_truth{{1, 40}, {11, 15}, {28, 12}, {43, 25}}; + ASSERT_EQ(ground_truth, swizzles); +} + } // namespace } // namespace tint::fuzzers::regex_fuzzer diff --git a/src/tint/fuzzers/tint_regex_fuzzer/wgsl_mutator.cc b/src/tint/fuzzers/tint_regex_fuzzer/wgsl_mutator.cc index a96561347e..46db837ed0 100644 --- a/src/tint/fuzzers/tint_regex_fuzzer/wgsl_mutator.cc +++ b/src/tint/fuzzers/tint_regex_fuzzer/wgsl_mutator.cc @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -39,9 +40,19 @@ std::vector WgslMutator::FindDelimiterIndices(const std::string& delimit return result; } +std::unordered_set WgslMutator::GetCommonKeywords() { + return {"array", "bool", "break", "compute", "continue", "f32", "fn", "fragment", + "i32", "if", "for", "let", "location", "loop", "ptr", "return", + "struct", "u32", "var", "vec2", "vec3", "vec4", "vertex", "while"}; +} + std::vector> WgslMutator::GetIdentifiers(const std::string& wgsl_code) { std::vector> result; + // To reduce the rate that invalid programs are produced, common keywords will be excluded from + // the identifiers that are returned. + std::unordered_set common_keywords = GetCommonKeywords(); + // This regular expression works by looking for a character that // is not part of an identifier followed by a WGSL identifier, followed // by a character which cannot be part of a WGSL identifer. The regex @@ -54,6 +65,10 @@ std::vector> WgslMutator::GetIdentifiers(const std::st auto identifiers_end = std::sregex_iterator(); for (std::sregex_iterator i = identifiers_begin; i != identifiers_end; ++i) { + if (common_keywords.count(i->str()) > 0) { + // This is a common keyword, so skip it. + continue; + } result.push_back( {static_cast(i->prefix().second - wgsl_code.cbegin()), i->str().size()}); } @@ -99,13 +114,16 @@ std::vector> WgslMutator::GetIntLiterals(const std::st return result; } -size_t WgslMutator::FindClosingBrace(size_t opening_bracket_pos, const std::string& wgsl_code) { +size_t WgslMutator::FindClosingBracket(size_t opening_bracket_pos, + const std::string& wgsl_code, + char opening_bracket_character, + char closing_bracket_character) { size_t open_bracket_count = 1; size_t pos = opening_bracket_pos + 1; while (open_bracket_count >= 1 && pos < wgsl_code.size()) { - if (wgsl_code[pos] == '{') { + if (wgsl_code[pos] == opening_bracket_character) { ++open_bracket_count; - } else if (wgsl_code[pos] == '}') { + } else if (wgsl_code[pos] == closing_bracket_character) { --open_bracket_count; } ++pos; @@ -160,7 +178,7 @@ bool WgslMutator::InsertReturnStatement(std::string& wgsl_code) { // function body. size_t left_bracket_pos = function.first; - size_t right_bracket_pos = FindClosingBrace(left_bracket_pos, wgsl_code); + size_t right_bracket_pos = FindClosingBracket(left_bracket_pos, wgsl_code, '{', '}'); if (right_bracket_pos == 0) { return false; @@ -206,7 +224,7 @@ bool WgslMutator::InsertBreakOrContinue(std::string& wgsl_code) { // bracket, and find a semi-colon within the loop body. size_t left_bracket_pos = generator_.GetRandomElement(loop_body_positions); - size_t right_bracket_pos = FindClosingBrace(left_bracket_pos, wgsl_code); + size_t right_bracket_pos = FindClosingBracket(left_bracket_pos, wgsl_code, '{', '}'); if (right_bracket_pos == 0) { return false; @@ -515,11 +533,10 @@ bool WgslMutator::ReplaceFunctionCallWithBuiltin(std::string& wgsl_code) { // Pick a random function auto function = generator_.GetRandomElement(function_body_positions); - // Find the corresponding closing bracket for the function, and find a semi-colon within the - // function body. + // Find the corresponding closing bracket for the function. size_t left_bracket_pos = function.first; - size_t right_bracket_pos = FindClosingBrace(left_bracket_pos, wgsl_code); + size_t right_bracket_pos = FindClosingBracket(left_bracket_pos, wgsl_code, '{', '}'); if (right_bracket_pos == 0) { return false; @@ -652,4 +669,127 @@ bool WgslMutator::ReplaceFunctionCallWithBuiltin(std::string& wgsl_code) { return true; } +bool WgslMutator::AddSwizzle(std::string& wgsl_code) { + std::vector> function_body_positions = + GetFunctionBodyPositions(wgsl_code); + + // No function was found in wgsl_code. + if (function_body_positions.empty()) { + return false; + } + + // Pick a random function + auto function = generator_.GetRandomElement(function_body_positions); + + // Find the corresponding closing bracket for the function. + size_t left_bracket_pos = function.first; + size_t right_bracket_pos = FindClosingBracket(left_bracket_pos, wgsl_code, '{', '}'); + + if (right_bracket_pos == 0) { + return false; + } + + std::string function_body( + wgsl_code.substr(left_bracket_pos, right_bracket_pos - left_bracket_pos)); + + // It makes sense to try applying swizzles to: + // - identifiers, because they might be vectors + auto identifiers = GetIdentifiers(function_body); + // - existing swizzles, e.g. to turn v.xy into v.xy.xx + auto swizzles = GetSwizzles(function_body); + // - vector constructors, e.g. to turn vec3(...) into vec3(...).yyz + auto vector_constructors = GetVectorConstructors(function_body); + + // Create a combined vector of all the possibilities for swizzling, so that they can be sampled + // from as a whole. + std::vector> combined; + combined.insert(combined.end(), identifiers.begin(), identifiers.end()); + combined.insert(combined.end(), swizzles.begin(), swizzles.end()); + combined.insert(combined.end(), vector_constructors.begin(), vector_constructors.end()); + + if (combined.empty()) { + // No opportunities for swizzling: give up. + return false; + } + + // Randomly create a swizzle operation. This is done without checking the potential length of + // the target vector. For identifiers this isn't possible without proper context. For existing + // swizzles and vector constructors it would be possible to check the length, but it is anyway + // good to stress-test swizzle validation code paths. + std::string swizzle = "."; + { + // Choose a swizzle length between 1 and 4, inclusive. + uint32_t swizzle_length = generator_.GetUInt32(1, 5); + // Decide whether to use xyzw or rgba as convenience names. + bool use_xyzw = generator_.GetBool(); + // Randomly choose a convenience name for each component of the swizzle. + for (uint32_t i = 0; i < swizzle_length; i++) { + switch (generator_.GetUInt32(4)) { + case 0: + swizzle += use_xyzw ? "x" : "r"; + break; + case 1: + swizzle += use_xyzw ? "y" : "g"; + break; + case 2: + swizzle += use_xyzw ? "z" : "b"; + break; + case 3: + swizzle += use_xyzw ? "w" : "a"; + break; + default: + assert(false && "Unreachable"); + break; + } + } + } + // Choose a random opportunity for swizzling and add the swizzle right after it. + auto target = generator_.GetRandomElement(combined); + wgsl_code.insert(left_bracket_pos + target.first + target.second, swizzle); + return true; +} + +std::vector> WgslMutator::GetSwizzles(const std::string& wgsl_code) { + std::regex swizzle_regex("\\.(([xyzw]+)|([rgba]+))"); + std::vector> result; + + auto swizzles_begin = std::sregex_iterator(wgsl_code.begin(), wgsl_code.end(), swizzle_regex); + auto swizles_end = std::sregex_iterator(); + + for (std::sregex_iterator i = swizzles_begin; i != swizles_end; ++i) { + result.push_back( + {static_cast(i->prefix().second - wgsl_code.cbegin()), i->str().size()}); + } + return result; +} + +std::vector> WgslMutator::GetVectorConstructors( + const std::string& wgsl_code) { + // This regex recognises the prefixes of vector constructors, which have the form: + // "vecn(", with possible whitespace between tokens. + std::regex vector_constructor_prefix_regex("vec\\d[ \\n]*<[ \\n]*[a-z0-9_]+[ \\n]*>[^\\(]*\\("); + std::vector> result; + + auto vector_constructor_prefixes_begin = + std::sregex_iterator(wgsl_code.begin(), wgsl_code.end(), vector_constructor_prefix_regex); + auto vector_constructor_prefixes_end = std::sregex_iterator(); + + // Look through all of the vector constructor prefixes and see whether each one appears to + // correspond to a complete vector construction. + for (std::sregex_iterator i = vector_constructor_prefixes_begin; + i != vector_constructor_prefixes_end; ++i) { + // A prefix is deemed to correspond to a complete vector construction if it is possible to + // find a corresponding closing bracket for the "(" at the end of the prefix. + size_t closing_bracket = FindClosingBracket( + static_cast(i->suffix().first - wgsl_code.cbegin()), wgsl_code, '(', ')'); + if (closing_bracket != 0) { + // A closing bracket was found, so record the start and size of the entire vector + // constructor. + size_t start = static_cast(i->prefix().second - wgsl_code.cbegin()); + result.push_back({start, closing_bracket - start + 1}); + } + } + return result; +} + } // namespace tint::fuzzers::regex_fuzzer diff --git a/src/tint/fuzzers/tint_regex_fuzzer/wgsl_mutator.h b/src/tint/fuzzers/tint_regex_fuzzer/wgsl_mutator.h index fde4611d81..cd1f2b6032 100644 --- a/src/tint/fuzzers/tint_regex_fuzzer/wgsl_mutator.h +++ b/src/tint/fuzzers/tint_regex_fuzzer/wgsl_mutator.h @@ -17,6 +17,7 @@ #include #include +#include #include #include @@ -92,6 +93,12 @@ class WgslMutator { /// @return true if a function call replacement happened or false otherwise. bool ReplaceFunctionCallWithBuiltin(std::string& wgsl_code); + /// Given a WGSL-like string, adds a swizzle operation to either (a) an identifier, (b) a vector + /// constructor, or (c) an existing swizzle. + /// @param wgsl_code - the initial WGSL-like string that will be mutated. + /// @return true if a swizzle operation is added or false otherwise. + bool AddSwizzle(std::string& wgsl_code); + protected: /// Given index idx1 it delets the region of length interval_len /// starting at index idx1; @@ -108,13 +115,18 @@ class WgslMutator { /// @param wgsl_code - the string where the swap will occur. void DuplicateInterval(size_t idx1, size_t reg1_len, size_t idx2, std::string& wgsl_code); - /// Finds a possible closing brace corresponding to the opening - /// brace at position opening_bracket_pos. - /// @param opening_bracket_pos - the position of the opening brace. - /// @param wgsl_code - the WGSL-like string where the closing brace. + /// Finds a possible closing bracket corresponding to the opening + /// bracket at position opening_bracket_pos. + /// @param opening_bracket_pos - the position of the opening bracket. + /// @param wgsl_code - the WGSL-like string where the closing bracket. + /// @param opening_bracket_character - the opening bracket character, e.g. (, {, <, or [ + /// @param closing_bracket_character - the closing bracket character, e.g. ), }, >, or ] /// @return the position of the closing bracket or 0 if there is no closing - /// brace. - size_t FindClosingBrace(size_t opening_bracket_pos, const std::string& wgsl_code); + /// bracket. + size_t FindClosingBracket(size_t opening_bracket_pos, + const std::string& wgsl_code, + char opening_bracket_character, + char closing_bracket_character); /// Returns the starting position of the bodies of the functions identified by an appropriate /// function, together with a boolean indicating whether the function returns a value or not. @@ -195,6 +207,16 @@ class WgslMutator { const std::string& wgsl_code, uint32_t start_index); + /// Finds all the swizzle operations in a WGSL-like string. + /// @param wgsl_code - the WGSL-like string where the swizzles will be found. + /// @return a vector with the positions and lengths of all the swizzles in wgsl_code. + std::vector> GetSwizzles(const std::string& wgsl_code); + + /// Finds all the vector constructors in a WGSL-like string. + /// @param wgsl_code - the WGSL-like string where the vector constructors will be found. + /// @return a vector with the positions and lengths of all the vector constructors in wgsl_code. + std::vector> GetVectorConstructors(const std::string& wgsl_code); + private: /// A function that given a delimiter, returns a vector that contains /// all the positions of the delimiter in the WGSL code. @@ -222,6 +244,14 @@ class WgslMutator { /// @return another WGSL operator falling into the same category. std::string ChooseRandomReplacementForOperator(const std::string& existing_operator); + /// Yields a fixed set of commonly-used WGSL keywords. The regex fuzzer relies heavily on + /// recognizing possible identifiers via regular expressions. There is a high chance that + /// keywords will be recognized as identifiers, which will leads to invalid code. It is valuable + /// for this to occur to some extent (to stress test validation), but it is useful to be able to + /// exclude the most common keywords so that invalidity does not occur too often. + /// @return a set of commonly-used WGSL keywords. + static std::unordered_set GetCommonKeywords(); + RandomGenerator& generator_; };