Regex fuzzer: Add return statement inside a randomly-chosen function

Adds a return statement in the body of a randomly-chosen function.
The return value is a randomly-chosen identifier or literal from
the WGSL shader.

Fixes: tint:1115.

Change-Id: Icdc4ff669cda343244e158ce791b4085fd52f7b9
Reviewed-on: https://dawn-review.googlesource.com/c/tint/+/61781
Kokoro: Kokoro <noreply+kokoro@google.com>
Reviewed-by: Alastair Donaldson <afdx@google.com>
Commit-Queue: Alastair Donaldson <afdx@google.com>
This commit is contained in:
egj 2021-09-28 14:57:54 +00:00 committed by Tint LUCI CQ
parent 9c7cd9e9c3
commit 2228ad19af
5 changed files with 419 additions and 41 deletions

View File

@ -83,6 +83,14 @@ class RandomGenerator {
/// @param size - number of elements in |data|, must be > 0 /// @param size - number of elements in |data|, must be > 0
static uint64_t CalculateSeed(const uint8_t* data, size_t size); static uint64_t CalculateSeed(const uint8_t* data, size_t size);
/// Returns a randomly-chosen element from vector v.
/// @param v - the vector from which the random element will be selected.
/// @return a random element of vector v.
template <typename T>
inline T GetRandomElement(const std::vector<T>& v) {
return v[GetUInt64(0, v.size() - 1)];
}
private: private:
std::mt19937 engine_; std::mt19937 engine_;

View File

@ -37,6 +37,7 @@ enum class MutationKind {
kDuplicateInterval, kDuplicateInterval,
kReplaceIdentifier, kReplaceIdentifier,
kReplaceLiteral, kReplaceLiteral,
kInsertReturnStatement,
kNumMutationKinds kNumMutationKinds
}; };
@ -95,6 +96,12 @@ extern "C" size_t LLVMFuzzerCustomMutator(uint8_t* data,
} }
break; break;
case MutationKind::kInsertReturnStatement:
if (!InsertReturnStatement(wgsl_code, generator)) {
return 0;
}
break;
default: default:
assert(false && "Unreachable"); assert(false && "Unreachable");
return 0; return 0;

View File

@ -195,56 +195,64 @@ TEST(ReplaceIdentifierTest, ReplaceIdentifierTest2) {
TEST(GetIdentifierTest, GetIdentifierTest1) { TEST(GetIdentifierTest, GetIdentifierTest1) {
std::string wgsl_code = std::string wgsl_code =
"fn clamp_0acf8f() {" R"(fn clamp_0acf8f() {
"var res: vec2<f32> = clamp(vec2<f32>(), vec2<f32>(), vec2<f32>());}" var res: vec2<f32> = clamp(vec2<f32>(), vec2<f32>(), vec2<f32>());
"[[stage(vertex)]]" }
"fn vertex_main() -> [[builtin(position)]] vec4<f32> {" [[stage(vertex)]]
" clamp_0acf8f();" fn vertex_main() -> [[builtin(position)]] vec4<f32> {
" return vec4<f32>();}" clamp_0acf8f();"
"[[stage(fragment)]]" return vec4<f32>();
"fn fragment_main() {" }
" clamp_0acf8f();}" [[stage(fragment)]]
"[[stage(compute), workgroup_size(1)]]" fn fragment_main() {
"fn compute_main() {" clamp_0acf8f();
"var<private> foo: f32 = 0.0;" }
" clamp_0acf8f();}"; [[stage(compute), workgroup_size(1)]]
fn compute_main() {"
var<private> foo: f32 = 0.0;
clamp_0acf8f();
})";
std::vector<std::pair<size_t, size_t>> identifiers_pos = std::vector<std::pair<size_t, size_t>> identifiers_pos =
GetIdentifiers(wgsl_code); GetIdentifiers(wgsl_code);
std::vector<std::pair<size_t, size_t>> ground_truth = { std::vector<std::pair<size_t, size_t>> ground_truth = {
std::make_pair(3, 12), std::make_pair(19, 3), std::make_pair(28, 4), std::make_pair(3, 12), std::make_pair(28, 3), std::make_pair(37, 4),
std::make_pair(40, 5), std::make_pair(51, 3), std::make_pair(59, 4), std::make_pair(49, 5), std::make_pair(60, 3), std::make_pair(68, 4),
std::make_pair(72, 4), std::make_pair(88, 5), std::make_pair(103, 2), std::make_pair(81, 4), std::make_pair(111, 5), std::make_pair(133, 2),
std::make_pair(113, 4), std::make_pair(125, 7), std::make_pair(145, 4), std::make_pair(143, 4), std::make_pair(155, 7), std::make_pair(175, 4),
std::make_pair(158, 12), std::make_pair(175, 6), std::make_pair(187, 3), std::make_pair(196, 12), std::make_pair(222, 6), std::make_pair(234, 3),
std::make_pair(197, 5), std::make_pair(214, 2), std::make_pair(226, 4), std::make_pair(258, 5), std::make_pair(282, 2), std::make_pair(294, 4),
std::make_pair(236, 12), std::make_pair(254, 5), std::make_pair(270, 14), std::make_pair(311, 12), std::make_pair(343, 5), std::make_pair(359, 14),
std::make_pair(289, 2), std::make_pair(300, 4), std::make_pair(308, 3), std::make_pair(385, 2), std::make_pair(396, 4), std::make_pair(414, 3),
std::make_pair(321, 3), std::make_pair(326, 3), std::make_pair(338, 12)}; std::make_pair(427, 3), std::make_pair(432, 3), std::make_pair(451, 12)};
ASSERT_EQ(ground_truth, identifiers_pos); ASSERT_EQ(ground_truth, identifiers_pos);
} }
TEST(TestGetLiteralsValues, TestGetLiteralsValues1) { TEST(TestGetLiteralsValues, TestGetLiteralsValues1) {
std::string wgsl_code = std::string wgsl_code =
"fn clamp_0acf8f() {" R"(fn clamp_0acf8f() {
"var res: vec2<f32> = clamp(vec2<f32>(), vec2<f32>(), vec2<f32>());}" var res: vec2<f32> = clamp(vec2<f32>(), vec2<f32>(), vec2<f32>());
"[[stage(vertex)]]" }
"fn vertex_main() -> [[builtin(position)]] vec4<f32> {" [[stage(vertex)]]
" clamp_0acf8f();" fn vertex_main() -> [[builtin(position)]] vec4<f32> {
"var foo_1: i32 = 3;" clamp_0acf8f();
" return vec4<f32>();}" var foo_1: i32 = 3;
"[[stage(fragment)]]" return vec4<f32>();
"fn fragment_main() {" }
" clamp_0acf8f();}" [[stage(fragment)]]
"[[stage(compute), workgroup_size(1)]]" fn fragment_main() {
"fn compute_main() {" clamp_0acf8f();
"var<private> foo: f32 = 0.0;" }
"var foo_2: i32 = 10;" [[stage(compute), workgroup_size(1)]]
" clamp_0acf8f();}" fn compute_main() {
"foo_1 = 5 + 7;" var<private> foo: f32 = 0.0;
"var foo_3 : i32 = -20;"; var foo_2: i32 = 10;
clamp_0acf8f();
}
foo_1 = 5 + 7;
var foo_3 : i32 = -20;)";
std::vector<std::pair<size_t, size_t>> literals_pos = std::vector<std::pair<size_t, size_t>> literals_pos =
GetIntLiterals(wgsl_code); GetIntLiterals(wgsl_code);
@ -260,6 +268,253 @@ TEST(TestGetLiteralsValues, TestGetLiteralsValues1) {
ASSERT_EQ(ground_truth, result); ASSERT_EQ(ground_truth, result);
} }
TEST(InsertReturnTest, FindClosingBrace) {
std::string wgsl_code =
R"(fn clamp_0acf8f() {
if(false){
} else{
var res: vec2<f32> = clamp(vec2<f32>(), vec2<f32>(), vec2<f32>());
}
}
[[stage(vertex)]]
fn vertex_main() -> [[builtin(position)]] vec4<f32> {
clamp_0acf8f();
var foo_1: i32 = 3;
return vec4<f32>();
}
[[stage(fragment)]]
fn fragment_main() {
clamp_0acf8f();
}
[[stage(compute), workgroup_size(1)]]
fn compute_main() {
var<private> foo: f32 = 0.0;
var foo_2: i32 = 10;
clamp_0acf8f();
}
foo_1 = 5 + 7;
var foo_3 : i32 = -20;
)";
size_t opening_bracket_pos = 18;
size_t closing_bracket_pos = FindClosingBrace(opening_bracket_pos, wgsl_code);
// The -1 is needed since the function body starts after the left bracket.
std::string function_body = wgsl_code.substr(
opening_bracket_pos + 1, closing_bracket_pos - opening_bracket_pos - 1);
std::string expected =
R"(
if(false){
} else{
var res: vec2<f32> = clamp(vec2<f32>(), vec2<f32>(), vec2<f32>());
}
)";
ASSERT_EQ(expected, function_body);
}
TEST(InsertReturnTest, FindClosingBraceFailing) {
std::string wgsl_code =
R"(fn clamp_0acf8f() {
// This comment } causes the test to fail.
"if(false){
} else{
var res: vec2<f32> = clamp(vec2<f32>(), vec2<f32>(), vec2<f32>());
}
}
[[stage(vertex)]]
fn vertex_main() -> [[builtin(position)]] vec4<f32> {
clamp_0acf8f();
var foo_1: i32 = 3;
return vec4<f32>();
}
[[stage(fragment)]]
fn fragment_main() {
clamp_0acf8f();
}
[[stage(compute), workgroup_size(1)]]
fn compute_main() {
var<private> foo: f32 = 0.0;
var foo_2: i32 = 10;
clamp_0acf8f();
}
foo_1 = 5 + 7;
var foo_3 : i32 = -20;)";
size_t opening_bracket_pos = 18;
size_t closing_bracket_pos = FindClosingBrace(opening_bracket_pos, wgsl_code);
// The -1 is needed since the function body starts after the left bracket.
std::string function_body = wgsl_code.substr(
opening_bracket_pos + 1, closing_bracket_pos - opening_bracket_pos - 1);
std::string expected =
R"(// This comment } causes the test to fail.
"if(false){
} else{
var res: vec2<f32> = clamp(vec2<f32>(), vec2<f32>(), vec2<f32>());
})";
ASSERT_NE(expected, function_body);
}
TEST(TestInsertReturn, TestInsertReturn1) {
std::string wgsl_code =
R"(fn clamp_0acf8f() {
var res: vec2<f32> = clamp(vec2<f32>(), vec2<f32>(), vec2<f32>());
}
[[stage(vertex)]]
fn vertex_main() -> [[builtin(position)]] vec4<f32> {
clamp_0acf8f();
var foo_1: i32 = 3;
return vec4<f32>();
}
[[stage(fragment)]]
fn fragment_main() {
clamp_0acf8f();
}
[[stage(compute), workgroup_size(1)]]
fn compute_main() {
var<private> foo: f32 = 0.0;
var foo_2: i32 = 10;
clamp_0acf8f();
}
foo_1 = 5 + 7;
var foo_3 : i32 = -20;)";
std::vector<size_t> semicolon_pos;
for (size_t pos = wgsl_code.find(";", 0); pos != std::string::npos;
pos = wgsl_code.find(";", pos + 1)) {
semicolon_pos.push_back(pos);
}
// should insert a return true statement after the first semicolon of the
// first function the the WGSL-like string above.
wgsl_code.insert(semicolon_pos[0] + 1, "return true;");
std::string expected_wgsl_code =
R"(fn clamp_0acf8f() {
var res: vec2<f32> = clamp(vec2<f32>(), vec2<f32>(), vec2<f32>());return true;
}
[[stage(vertex)]]
fn vertex_main() -> [[builtin(position)]] vec4<f32> {
clamp_0acf8f();
var foo_1: i32 = 3;
return vec4<f32>();
}
[[stage(fragment)]]
fn fragment_main() {
clamp_0acf8f();
}
[[stage(compute), workgroup_size(1)]]
fn compute_main() {
var<private> foo: f32 = 0.0;
var foo_2: i32 = 10;
clamp_0acf8f();
}
foo_1 = 5 + 7;
var foo_3 : i32 = -20;)";
ASSERT_EQ(expected_wgsl_code, wgsl_code);
}
TEST(TestInsertReturn, TestFunctionPositions) {
std::string wgsl_code =
R"(fn clamp_0acf8f() {
var res: vec2<f32> = clamp(vec2<f32>(), vec2<f32>(), vec2<f32>());
}
[[stage(vertex)]]
fn vertex_main() -> [[builtin(position)]] vec4<f32> {
clamp_0acf8f();
var foo_1: i32 = 3;
return vec4<f32>();
}
[[stage(fragment)]]
fn fragment_main() {
clamp_0acf8f();
}
[[stage(compute), workgroup_size(1)]]
fn compute_main() {
var<private> foo: f32 = 0.0;
var foo_2: i32 = 10;
clamp_0acf8f();
}
fn vert_main() -> [[builtin(position)]] vec4<f32> {
clamp_0acf8f();
var foo_1: i32 = 3;
return vec4<f32>();
}
foo_1 = 5 + 7;
var foo_3 : i32 = -20;)";
std::vector<size_t> function_positions = GetFunctionBodyPositions(wgsl_code);
std::vector<size_t> expected_positions = {193, 622};
ASSERT_EQ(expected_positions, function_positions);
}
TEST(TestInsertReturn, TestMissingSemicolon) {
std::string wgsl_code =
R"(fn clamp_0acf8f() {
var res: vec2<f32> = clamp(vec2<f32>(), vec2<f32>(), vec2<f32>())
}
[[stage(vertex)]]
fn vertex_main() -> [[builtin(position)]] vec4<f32> {
clamp_0acf8f()
var foo_1: i32 = 3
return vec4<f32>()
}
[[stage(fragment)]]
fn fragment_main() {
clamp_0acf8f();
}
[[stage(compute), workgroup_size(1)]]
fn compute_main() {
var<private> foo: f32 = 0.0;
var foo_2: i32 = 10;
clamp_0acf8f();
}
fn vert_main() -> [[builtin(position)]] vec4<f32> {
clamp_0acf8f()
var foo_1: i32 = 3
return vec4<f32>()
}
foo_1 = 5 + 7;
var foo_3 : i32 = -20;)";
RandomGenerator generator(0);
InsertReturnStatement(wgsl_code, generator);
// No semicolons found in the function's body, so wgsl_code
// should remain unchanged.
std::string expected_wgsl_code =
R"(fn clamp_0acf8f() {
var res: vec2<f32> = clamp(vec2<f32>(), vec2<f32>(), vec2<f32>())
}
[[stage(vertex)]]
fn vertex_main() -> [[builtin(position)]] vec4<f32> {
clamp_0acf8f()
var foo_1: i32 = 3
return vec4<f32>()
}
[[stage(fragment)]]
fn fragment_main() {
clamp_0acf8f();
}
[[stage(compute), workgroup_size(1)]]
fn compute_main() {
var<private> foo: f32 = 0.0;
var foo_2: i32 = 10;
clamp_0acf8f();
}
fn vert_main() -> [[builtin(position)]] vec4<f32> {
clamp_0acf8f()
var foo_1: i32 = 3
return vec4<f32>()
}
foo_1 = 5 + 7;
var foo_3 : i32 = -20;)";
ASSERT_EQ(expected_wgsl_code, wgsl_code);
}
} // namespace } // namespace
} // namespace regex_fuzzer } // namespace regex_fuzzer
} // namespace fuzzers } // namespace fuzzers

View File

@ -89,6 +89,89 @@ std::vector<std::pair<size_t, size_t>> GetIntLiterals(const std::string& s) {
return result; return result;
} }
size_t FindClosingBrace(size_t opening_bracket_pos,
const std::string& wgsl_code) {
size_t open_bracket_count = 1;
size_t pos = opening_bracket_pos + 1;
while (open_bracket_count >= 1 && pos < wgsl_code.size()) {
if (wgsl_code[pos] == '{') {
++open_bracket_count;
} else if (wgsl_code[pos] == '}') {
--open_bracket_count;
}
++pos;
}
return (pos == wgsl_code.size() && open_bracket_count >= 1) ? 0 : pos - 1;
}
std::vector<size_t> GetFunctionBodyPositions(const std::string& wgsl_code) {
// Finds all the functions with a non-void return value.
std::regex function_regex("fn.*?->.*?\\{");
std::smatch match;
std::vector<size_t> result;
auto search_start(wgsl_code.cbegin());
std::string prefix = "";
while (std::regex_search(search_start, wgsl_code.cend(), match,
function_regex)) {
result.push_back(
static_cast<size_t>(match.suffix().first - wgsl_code.cbegin() - 1L));
search_start = match.suffix().first;
}
return result;
}
bool InsertReturnStatement(std::string& wgsl_code, RandomGenerator& generator) {
std::vector<size_t> function_body_positions =
GetFunctionBodyPositions(wgsl_code);
// No function was found in wgsl_code.
if (function_body_positions.empty()) {
return false;
}
// Pick a random function's opening bracket, find the corresponding closing
// bracket, and find a semi-colon within the function body.
size_t left_bracket_pos = generator.GetRandomElement(function_body_positions);
size_t right_bracket_pos = FindClosingBrace(left_bracket_pos, wgsl_code);
if (right_bracket_pos == 0) {
return false;
}
std::vector<size_t> semicolon_positions;
for (size_t pos = wgsl_code.find(";", left_bracket_pos + 1);
pos < right_bracket_pos; pos = wgsl_code.find(";", pos + 1)) {
semicolon_positions.push_back(pos);
}
if (semicolon_positions.empty()) {
return false;
}
size_t semicolon_position = generator.GetRandomElement(semicolon_positions);
// Get all identifiers and integer literals to use as potential return values.
std::vector<std::pair<size_t, size_t>> identifiers =
GetIdentifiers(wgsl_code);
auto return_values = identifiers;
std::vector<std::pair<size_t, size_t>> int_literals =
GetIntLiterals(wgsl_code);
return_values.insert(return_values.end(), int_literals.begin(),
int_literals.end());
std::pair<size_t, size_t> return_value =
generator.GetRandomElement(return_values);
std::string return_statement =
"return " + wgsl_code.substr(return_value.first, return_value.second) +
";";
// Insert the return statement immediately after the semicolon.
wgsl_code.insert(semicolon_position + 1, return_statement);
return true;
}
void SwapIntervals(size_t idx1, void SwapIntervals(size_t idx1,
size_t reg1_len, size_t reg1_len,
size_t idx2, size_t idx2,
@ -140,7 +223,7 @@ bool SwapRandomIntervals(const std::string& delimiter,
std::vector<size_t> delimiter_positions = std::vector<size_t> delimiter_positions =
FindDelimiterIndices(delimiter, wgsl_code); FindDelimiterIndices(delimiter, wgsl_code);
// Need to have at least 3 indices // Need to have at least 3 indices.
if (delimiter_positions.size() < 3) { if (delimiter_positions.size() < 3) {
return false; return false;
} }
@ -174,7 +257,7 @@ bool DeleteRandomInterval(const std::string& delimiter,
std::vector<size_t> delimiter_positions = std::vector<size_t> delimiter_positions =
FindDelimiterIndices(delimiter, wgsl_code); FindDelimiterIndices(delimiter, wgsl_code);
// Need to have at least 2 indices // Need to have at least 2 indices.
if (delimiter_positions.size() < 2) { if (delimiter_positions.size() < 2) {
return false; return false;
} }

View File

@ -49,6 +49,24 @@ std::vector<std::pair<size_t, size_t>> GetIdentifiers(
std::vector<std::pair<size_t, size_t>> GetIntLiterals( std::vector<std::pair<size_t, size_t>> GetIntLiterals(
const std::string& wgsl_code); const std::string& wgsl_code);
/// Finds a possible closing brace corresponding to the opening
/// brace at position opening_bracket_pos.
/// @param opening_bracket_pos - the position of the opening brace.
/// @param wgsl_code - the WGSL-like string where the closing brace.
/// @return the position of the closing bracket or 0 if there is no closing
/// brace.
size_t FindClosingBrace(size_t opening_bracket_pos,
const std::string& wgsl_code);
/// Returns the starting_position of the bodies of the functions
/// that follow the regular expression: fn.*?->.*?\\{, which searches for the
/// keyword fn followed by the function name, its return type and opening brace.
/// @param wgsl_code - the WGSL-like string where the functions will be
/// searched.
/// @return a vector with the starting position of the function bodies in
/// wgsl_code.
std::vector<size_t> GetFunctionBodyPositions(const std::string& wgsl_code);
/// Given 4 indices, idx1, idx2, idx3 and idx4 it swaps the regions /// Given 4 indices, idx1, idx2, idx3 and idx4 it swaps the regions
/// in the interval (idx1, idx2] with the region in the interval (idx3, idx4] /// in the interval (idx1, idx2] with the region in the interval (idx3, idx4]
/// in wgsl_text. /// in wgsl_text.
@ -154,6 +172,13 @@ bool ReplaceRandomIdentifier(std::string& wgsl_code,
bool ReplaceRandomIntLiteral(std::string& wgsl_code, bool ReplaceRandomIntLiteral(std::string& wgsl_code,
RandomGenerator& generator); RandomGenerator& generator);
/// Inserts a return statement in a randomly chosen function of a
/// WGSL-like string. The return value is a randomly-chosen identifier
/// or literal in the string.
/// @param wgsl_code - WGSL-like string that will be mutated.
/// @param generator - the random number generator.
/// @return true if the mutation was succesful or false otherwise.
bool InsertReturnStatement(std::string& wgsl_code, RandomGenerator& generator);
} // namespace regex_fuzzer } // namespace regex_fuzzer
} // namespace fuzzers } // namespace fuzzers
} // namespace tint } // namespace tint