Regex fuzzer: Add return statement inside a randomly-chosen function

Adds a return statement in the body of a randomly-chosen function.
The return value is a randomly-chosen identifier or literal from
the WGSL shader.

Fixes: tint:1115.

Change-Id: Icdc4ff669cda343244e158ce791b4085fd52f7b9
Reviewed-on: https://dawn-review.googlesource.com/c/tint/+/61781
Kokoro: Kokoro <noreply+kokoro@google.com>
Reviewed-by: Alastair Donaldson <afdx@google.com>
Commit-Queue: Alastair Donaldson <afdx@google.com>
This commit is contained in:
egj 2021-09-28 14:57:54 +00:00 committed by Tint LUCI CQ
parent 9c7cd9e9c3
commit 2228ad19af
5 changed files with 419 additions and 41 deletions

View File

@ -83,6 +83,14 @@ class RandomGenerator {
/// @param size - number of elements in |data|, must be > 0
static uint64_t CalculateSeed(const uint8_t* data, size_t size);
/// Returns a randomly-chosen element from vector v.
/// @param v - the vector from which the random element will be selected.
/// @return a random element of vector v.
template <typename T>
inline T GetRandomElement(const std::vector<T>& v) {
return v[GetUInt64(0, v.size() - 1)];
}
private:
std::mt19937 engine_;

View File

@ -37,6 +37,7 @@ enum class MutationKind {
kDuplicateInterval,
kReplaceIdentifier,
kReplaceLiteral,
kInsertReturnStatement,
kNumMutationKinds
};
@ -95,6 +96,12 @@ extern "C" size_t LLVMFuzzerCustomMutator(uint8_t* data,
}
break;
case MutationKind::kInsertReturnStatement:
if (!InsertReturnStatement(wgsl_code, generator)) {
return 0;
}
break;
default:
assert(false && "Unreachable");
return 0;

View File

@ -195,56 +195,64 @@ TEST(ReplaceIdentifierTest, ReplaceIdentifierTest2) {
TEST(GetIdentifierTest, GetIdentifierTest1) {
std::string wgsl_code =
"fn clamp_0acf8f() {"
"var res: vec2<f32> = clamp(vec2<f32>(), vec2<f32>(), vec2<f32>());}"
"[[stage(vertex)]]"
"fn vertex_main() -> [[builtin(position)]] vec4<f32> {"
" clamp_0acf8f();"
" return vec4<f32>();}"
"[[stage(fragment)]]"
"fn fragment_main() {"
" clamp_0acf8f();}"
"[[stage(compute), workgroup_size(1)]]"
"fn compute_main() {"
"var<private> foo: f32 = 0.0;"
" clamp_0acf8f();}";
R"(fn clamp_0acf8f() {
var res: vec2<f32> = clamp(vec2<f32>(), vec2<f32>(), vec2<f32>());
}
[[stage(vertex)]]
fn vertex_main() -> [[builtin(position)]] vec4<f32> {
clamp_0acf8f();"
return vec4<f32>();
}
[[stage(fragment)]]
fn fragment_main() {
clamp_0acf8f();
}
[[stage(compute), workgroup_size(1)]]
fn compute_main() {"
var<private> foo: f32 = 0.0;
clamp_0acf8f();
})";
std::vector<std::pair<size_t, size_t>> identifiers_pos =
GetIdentifiers(wgsl_code);
std::vector<std::pair<size_t, size_t>> ground_truth = {
std::make_pair(3, 12), std::make_pair(19, 3), std::make_pair(28, 4),
std::make_pair(40, 5), std::make_pair(51, 3), std::make_pair(59, 4),
std::make_pair(72, 4), std::make_pair(88, 5), std::make_pair(103, 2),
std::make_pair(113, 4), std::make_pair(125, 7), std::make_pair(145, 4),
std::make_pair(158, 12), std::make_pair(175, 6), std::make_pair(187, 3),
std::make_pair(197, 5), std::make_pair(214, 2), std::make_pair(226, 4),
std::make_pair(236, 12), std::make_pair(254, 5), std::make_pair(270, 14),
std::make_pair(289, 2), std::make_pair(300, 4), std::make_pair(308, 3),
std::make_pair(321, 3), std::make_pair(326, 3), std::make_pair(338, 12)};
std::make_pair(3, 12), std::make_pair(28, 3), std::make_pair(37, 4),
std::make_pair(49, 5), std::make_pair(60, 3), std::make_pair(68, 4),
std::make_pair(81, 4), std::make_pair(111, 5), std::make_pair(133, 2),
std::make_pair(143, 4), std::make_pair(155, 7), std::make_pair(175, 4),
std::make_pair(196, 12), std::make_pair(222, 6), std::make_pair(234, 3),
std::make_pair(258, 5), std::make_pair(282, 2), std::make_pair(294, 4),
std::make_pair(311, 12), std::make_pair(343, 5), std::make_pair(359, 14),
std::make_pair(385, 2), std::make_pair(396, 4), std::make_pair(414, 3),
std::make_pair(427, 3), std::make_pair(432, 3), std::make_pair(451, 12)};
ASSERT_EQ(ground_truth, identifiers_pos);
}
TEST(TestGetLiteralsValues, TestGetLiteralsValues1) {
std::string wgsl_code =
"fn clamp_0acf8f() {"
"var res: vec2<f32> = clamp(vec2<f32>(), vec2<f32>(), vec2<f32>());}"
"[[stage(vertex)]]"
"fn vertex_main() -> [[builtin(position)]] vec4<f32> {"
" clamp_0acf8f();"
"var foo_1: i32 = 3;"
" return vec4<f32>();}"
"[[stage(fragment)]]"
"fn fragment_main() {"
" clamp_0acf8f();}"
"[[stage(compute), workgroup_size(1)]]"
"fn compute_main() {"
"var<private> foo: f32 = 0.0;"
"var foo_2: i32 = 10;"
" clamp_0acf8f();}"
"foo_1 = 5 + 7;"
"var foo_3 : i32 = -20;";
R"(fn clamp_0acf8f() {
var res: vec2<f32> = clamp(vec2<f32>(), vec2<f32>(), vec2<f32>());
}
[[stage(vertex)]]
fn vertex_main() -> [[builtin(position)]] vec4<f32> {
clamp_0acf8f();
var foo_1: i32 = 3;
return vec4<f32>();
}
[[stage(fragment)]]
fn fragment_main() {
clamp_0acf8f();
}
[[stage(compute), workgroup_size(1)]]
fn compute_main() {
var<private> foo: f32 = 0.0;
var foo_2: i32 = 10;
clamp_0acf8f();
}
foo_1 = 5 + 7;
var foo_3 : i32 = -20;)";
std::vector<std::pair<size_t, size_t>> literals_pos =
GetIntLiterals(wgsl_code);
@ -260,6 +268,253 @@ TEST(TestGetLiteralsValues, TestGetLiteralsValues1) {
ASSERT_EQ(ground_truth, result);
}
TEST(InsertReturnTest, FindClosingBrace) {
std::string wgsl_code =
R"(fn clamp_0acf8f() {
if(false){
} else{
var res: vec2<f32> = clamp(vec2<f32>(), vec2<f32>(), vec2<f32>());
}
}
[[stage(vertex)]]
fn vertex_main() -> [[builtin(position)]] vec4<f32> {
clamp_0acf8f();
var foo_1: i32 = 3;
return vec4<f32>();
}
[[stage(fragment)]]
fn fragment_main() {
clamp_0acf8f();
}
[[stage(compute), workgroup_size(1)]]
fn compute_main() {
var<private> foo: f32 = 0.0;
var foo_2: i32 = 10;
clamp_0acf8f();
}
foo_1 = 5 + 7;
var foo_3 : i32 = -20;
)";
size_t opening_bracket_pos = 18;
size_t closing_bracket_pos = FindClosingBrace(opening_bracket_pos, wgsl_code);
// The -1 is needed since the function body starts after the left bracket.
std::string function_body = wgsl_code.substr(
opening_bracket_pos + 1, closing_bracket_pos - opening_bracket_pos - 1);
std::string expected =
R"(
if(false){
} else{
var res: vec2<f32> = clamp(vec2<f32>(), vec2<f32>(), vec2<f32>());
}
)";
ASSERT_EQ(expected, function_body);
}
TEST(InsertReturnTest, FindClosingBraceFailing) {
std::string wgsl_code =
R"(fn clamp_0acf8f() {
// This comment } causes the test to fail.
"if(false){
} else{
var res: vec2<f32> = clamp(vec2<f32>(), vec2<f32>(), vec2<f32>());
}
}
[[stage(vertex)]]
fn vertex_main() -> [[builtin(position)]] vec4<f32> {
clamp_0acf8f();
var foo_1: i32 = 3;
return vec4<f32>();
}
[[stage(fragment)]]
fn fragment_main() {
clamp_0acf8f();
}
[[stage(compute), workgroup_size(1)]]
fn compute_main() {
var<private> foo: f32 = 0.0;
var foo_2: i32 = 10;
clamp_0acf8f();
}
foo_1 = 5 + 7;
var foo_3 : i32 = -20;)";
size_t opening_bracket_pos = 18;
size_t closing_bracket_pos = FindClosingBrace(opening_bracket_pos, wgsl_code);
// The -1 is needed since the function body starts after the left bracket.
std::string function_body = wgsl_code.substr(
opening_bracket_pos + 1, closing_bracket_pos - opening_bracket_pos - 1);
std::string expected =
R"(// This comment } causes the test to fail.
"if(false){
} else{
var res: vec2<f32> = clamp(vec2<f32>(), vec2<f32>(), vec2<f32>());
})";
ASSERT_NE(expected, function_body);
}
TEST(TestInsertReturn, TestInsertReturn1) {
std::string wgsl_code =
R"(fn clamp_0acf8f() {
var res: vec2<f32> = clamp(vec2<f32>(), vec2<f32>(), vec2<f32>());
}
[[stage(vertex)]]
fn vertex_main() -> [[builtin(position)]] vec4<f32> {
clamp_0acf8f();
var foo_1: i32 = 3;
return vec4<f32>();
}
[[stage(fragment)]]
fn fragment_main() {
clamp_0acf8f();
}
[[stage(compute), workgroup_size(1)]]
fn compute_main() {
var<private> foo: f32 = 0.0;
var foo_2: i32 = 10;
clamp_0acf8f();
}
foo_1 = 5 + 7;
var foo_3 : i32 = -20;)";
std::vector<size_t> semicolon_pos;
for (size_t pos = wgsl_code.find(";", 0); pos != std::string::npos;
pos = wgsl_code.find(";", pos + 1)) {
semicolon_pos.push_back(pos);
}
// should insert a return true statement after the first semicolon of the
// first function the the WGSL-like string above.
wgsl_code.insert(semicolon_pos[0] + 1, "return true;");
std::string expected_wgsl_code =
R"(fn clamp_0acf8f() {
var res: vec2<f32> = clamp(vec2<f32>(), vec2<f32>(), vec2<f32>());return true;
}
[[stage(vertex)]]
fn vertex_main() -> [[builtin(position)]] vec4<f32> {
clamp_0acf8f();
var foo_1: i32 = 3;
return vec4<f32>();
}
[[stage(fragment)]]
fn fragment_main() {
clamp_0acf8f();
}
[[stage(compute), workgroup_size(1)]]
fn compute_main() {
var<private> foo: f32 = 0.0;
var foo_2: i32 = 10;
clamp_0acf8f();
}
foo_1 = 5 + 7;
var foo_3 : i32 = -20;)";
ASSERT_EQ(expected_wgsl_code, wgsl_code);
}
TEST(TestInsertReturn, TestFunctionPositions) {
std::string wgsl_code =
R"(fn clamp_0acf8f() {
var res: vec2<f32> = clamp(vec2<f32>(), vec2<f32>(), vec2<f32>());
}
[[stage(vertex)]]
fn vertex_main() -> [[builtin(position)]] vec4<f32> {
clamp_0acf8f();
var foo_1: i32 = 3;
return vec4<f32>();
}
[[stage(fragment)]]
fn fragment_main() {
clamp_0acf8f();
}
[[stage(compute), workgroup_size(1)]]
fn compute_main() {
var<private> foo: f32 = 0.0;
var foo_2: i32 = 10;
clamp_0acf8f();
}
fn vert_main() -> [[builtin(position)]] vec4<f32> {
clamp_0acf8f();
var foo_1: i32 = 3;
return vec4<f32>();
}
foo_1 = 5 + 7;
var foo_3 : i32 = -20;)";
std::vector<size_t> function_positions = GetFunctionBodyPositions(wgsl_code);
std::vector<size_t> expected_positions = {193, 622};
ASSERT_EQ(expected_positions, function_positions);
}
TEST(TestInsertReturn, TestMissingSemicolon) {
std::string wgsl_code =
R"(fn clamp_0acf8f() {
var res: vec2<f32> = clamp(vec2<f32>(), vec2<f32>(), vec2<f32>())
}
[[stage(vertex)]]
fn vertex_main() -> [[builtin(position)]] vec4<f32> {
clamp_0acf8f()
var foo_1: i32 = 3
return vec4<f32>()
}
[[stage(fragment)]]
fn fragment_main() {
clamp_0acf8f();
}
[[stage(compute), workgroup_size(1)]]
fn compute_main() {
var<private> foo: f32 = 0.0;
var foo_2: i32 = 10;
clamp_0acf8f();
}
fn vert_main() -> [[builtin(position)]] vec4<f32> {
clamp_0acf8f()
var foo_1: i32 = 3
return vec4<f32>()
}
foo_1 = 5 + 7;
var foo_3 : i32 = -20;)";
RandomGenerator generator(0);
InsertReturnStatement(wgsl_code, generator);
// No semicolons found in the function's body, so wgsl_code
// should remain unchanged.
std::string expected_wgsl_code =
R"(fn clamp_0acf8f() {
var res: vec2<f32> = clamp(vec2<f32>(), vec2<f32>(), vec2<f32>())
}
[[stage(vertex)]]
fn vertex_main() -> [[builtin(position)]] vec4<f32> {
clamp_0acf8f()
var foo_1: i32 = 3
return vec4<f32>()
}
[[stage(fragment)]]
fn fragment_main() {
clamp_0acf8f();
}
[[stage(compute), workgroup_size(1)]]
fn compute_main() {
var<private> foo: f32 = 0.0;
var foo_2: i32 = 10;
clamp_0acf8f();
}
fn vert_main() -> [[builtin(position)]] vec4<f32> {
clamp_0acf8f()
var foo_1: i32 = 3
return vec4<f32>()
}
foo_1 = 5 + 7;
var foo_3 : i32 = -20;)";
ASSERT_EQ(expected_wgsl_code, wgsl_code);
}
} // namespace
} // namespace regex_fuzzer
} // namespace fuzzers

View File

@ -89,6 +89,89 @@ std::vector<std::pair<size_t, size_t>> GetIntLiterals(const std::string& s) {
return result;
}
size_t FindClosingBrace(size_t opening_bracket_pos,
const std::string& wgsl_code) {
size_t open_bracket_count = 1;
size_t pos = opening_bracket_pos + 1;
while (open_bracket_count >= 1 && pos < wgsl_code.size()) {
if (wgsl_code[pos] == '{') {
++open_bracket_count;
} else if (wgsl_code[pos] == '}') {
--open_bracket_count;
}
++pos;
}
return (pos == wgsl_code.size() && open_bracket_count >= 1) ? 0 : pos - 1;
}
std::vector<size_t> GetFunctionBodyPositions(const std::string& wgsl_code) {
// Finds all the functions with a non-void return value.
std::regex function_regex("fn.*?->.*?\\{");
std::smatch match;
std::vector<size_t> result;
auto search_start(wgsl_code.cbegin());
std::string prefix = "";
while (std::regex_search(search_start, wgsl_code.cend(), match,
function_regex)) {
result.push_back(
static_cast<size_t>(match.suffix().first - wgsl_code.cbegin() - 1L));
search_start = match.suffix().first;
}
return result;
}
bool InsertReturnStatement(std::string& wgsl_code, RandomGenerator& generator) {
std::vector<size_t> function_body_positions =
GetFunctionBodyPositions(wgsl_code);
// No function was found in wgsl_code.
if (function_body_positions.empty()) {
return false;
}
// Pick a random function's opening bracket, find the corresponding closing
// bracket, and find a semi-colon within the function body.
size_t left_bracket_pos = generator.GetRandomElement(function_body_positions);
size_t right_bracket_pos = FindClosingBrace(left_bracket_pos, wgsl_code);
if (right_bracket_pos == 0) {
return false;
}
std::vector<size_t> semicolon_positions;
for (size_t pos = wgsl_code.find(";", left_bracket_pos + 1);
pos < right_bracket_pos; pos = wgsl_code.find(";", pos + 1)) {
semicolon_positions.push_back(pos);
}
if (semicolon_positions.empty()) {
return false;
}
size_t semicolon_position = generator.GetRandomElement(semicolon_positions);
// Get all identifiers and integer literals to use as potential return values.
std::vector<std::pair<size_t, size_t>> identifiers =
GetIdentifiers(wgsl_code);
auto return_values = identifiers;
std::vector<std::pair<size_t, size_t>> int_literals =
GetIntLiterals(wgsl_code);
return_values.insert(return_values.end(), int_literals.begin(),
int_literals.end());
std::pair<size_t, size_t> return_value =
generator.GetRandomElement(return_values);
std::string return_statement =
"return " + wgsl_code.substr(return_value.first, return_value.second) +
";";
// Insert the return statement immediately after the semicolon.
wgsl_code.insert(semicolon_position + 1, return_statement);
return true;
}
void SwapIntervals(size_t idx1,
size_t reg1_len,
size_t idx2,
@ -140,7 +223,7 @@ bool SwapRandomIntervals(const std::string& delimiter,
std::vector<size_t> delimiter_positions =
FindDelimiterIndices(delimiter, wgsl_code);
// Need to have at least 3 indices
// Need to have at least 3 indices.
if (delimiter_positions.size() < 3) {
return false;
}
@ -174,7 +257,7 @@ bool DeleteRandomInterval(const std::string& delimiter,
std::vector<size_t> delimiter_positions =
FindDelimiterIndices(delimiter, wgsl_code);
// Need to have at least 2 indices
// Need to have at least 2 indices.
if (delimiter_positions.size() < 2) {
return false;
}

View File

@ -49,6 +49,24 @@ std::vector<std::pair<size_t, size_t>> GetIdentifiers(
std::vector<std::pair<size_t, size_t>> GetIntLiterals(
const std::string& wgsl_code);
/// Finds a possible closing brace corresponding to the opening
/// brace at position opening_bracket_pos.
/// @param opening_bracket_pos - the position of the opening brace.
/// @param wgsl_code - the WGSL-like string where the closing brace.
/// @return the position of the closing bracket or 0 if there is no closing
/// brace.
size_t FindClosingBrace(size_t opening_bracket_pos,
const std::string& wgsl_code);
/// Returns the starting_position of the bodies of the functions
/// that follow the regular expression: fn.*?->.*?\\{, which searches for the
/// keyword fn followed by the function name, its return type and opening brace.
/// @param wgsl_code - the WGSL-like string where the functions will be
/// searched.
/// @return a vector with the starting position of the function bodies in
/// wgsl_code.
std::vector<size_t> GetFunctionBodyPositions(const std::string& wgsl_code);
/// Given 4 indices, idx1, idx2, idx3 and idx4 it swaps the regions
/// in the interval (idx1, idx2] with the region in the interval (idx3, idx4]
/// in wgsl_text.
@ -154,6 +172,13 @@ bool ReplaceRandomIdentifier(std::string& wgsl_code,
bool ReplaceRandomIntLiteral(std::string& wgsl_code,
RandomGenerator& generator);
/// Inserts a return statement in a randomly chosen function of a
/// WGSL-like string. The return value is a randomly-chosen identifier
/// or literal in the string.
/// @param wgsl_code - WGSL-like string that will be mutated.
/// @param generator - the random number generator.
/// @return true if the mutation was succesful or false otherwise.
bool InsertReturnStatement(std::string& wgsl_code, RandomGenerator& generator);
} // namespace regex_fuzzer
} // namespace fuzzers
} // namespace tint