Regex fuzzer: identifier mutation

Mutates a WGSL-like string by replacing a randomly-selected identifier
with a different randomly-selected identifier.

Change-Id: Iecf45ad2800677cf3609b30d415520e5f2a05ba0
Reviewed-on: https://dawn-review.googlesource.com/c/tint/+/60561
Kokoro: Kokoro <noreply+kokoro@google.com>
Reviewed-by: Alastair Donaldson <afdx@google.com>
Commit-Queue: Alastair Donaldson <afdx@google.com>
This commit is contained in:
egj 2021-08-04 07:12:20 +00:00 committed by Tint LUCI CQ
parent 51750f15d2
commit 98fbf241d8
3 changed files with 206 additions and 57 deletions

View File

@ -25,13 +25,13 @@ namespace {
// Swaps two non-consecutive regions in the edge // Swaps two non-consecutive regions in the edge
TEST(SwapRegionsTest, SwapIntervalsEdgeNonConsecutive) { TEST(SwapRegionsTest, SwapIntervalsEdgeNonConsecutive) {
std::string R1 = ";region1;", R2 = ";regionregion2", std::string R1 = ";region1;", R2 = ";regionregion2;",
R3 = ";regionregionregion3;"; R3 = ";regionregionregion3;";
std::string all_regions = R1 + R2 + R3; std::string all_regions = R1 + R2 + R3;
// this call should swap R1 with R3. // this call should swap R1 with R3.
SwapIntervals(0, R1.length() - 1, R1.length() + R2.length(), SwapIntervals(0, R1.length(), R1.length() + R2.length(), R3.length(),
all_regions.length() - 1, all_regions); all_regions);
ASSERT_EQ(R3 + R2 + R1, all_regions); ASSERT_EQ(R3 + R2 + R1, all_regions);
} }
@ -44,15 +44,15 @@ TEST(SwapRegionsTest, SwapIntervalsNonConsecutiveNonEdge) {
std::string all_regions = R1 + R2 + R3 + R4 + R5; std::string all_regions = R1 + R2 + R3 + R4 + R5;
// this call should swap R2 with R4. // this call should swap R2 with R4.
SwapIntervals(R1.length(), R1.length() + R2.length() - 1, SwapIntervals(R1.length(), R2.length(),
R1.length() + R2.length() + R3.length(), R1.length() + R2.length() + R3.length(), R4.length(),
R1.length() + R2.length() + R3.length() + R4.length() - 1,
all_regions); all_regions);
ASSERT_EQ(R1 + R4 + R3 + R2 + R5, all_regions); ASSERT_EQ(R1 + R4 + R3 + R2 + R5, all_regions);
} }
// Swaps two consecutive regions not in the edge (sorrounded by other regions) // Swaps two consecutive regions not in the edge (sorrounded by other
// regions)
TEST(SwapRegionsTest, SwapIntervalsConsecutiveEdge) { TEST(SwapRegionsTest, SwapIntervalsConsecutiveEdge) {
std::string R1 = ";region1;", R2 = ";regionregion2;", std::string R1 = ";region1;", R2 = ";regionregion2;",
R3 = ";regionregionregion3;", R4 = ";regionregionregionregion4;", R3 = ";regionregionregion3;", R4 = ";regionregionregionregion4;",
@ -60,9 +60,8 @@ TEST(SwapRegionsTest, SwapIntervalsConsecutiveEdge) {
std::string all_regions = R1 + R2 + R3 + R4; std::string all_regions = R1 + R2 + R3 + R4;
// this call should swap R2 with R3. // this call should swap R2 with R3.
SwapIntervals(R1.length(), R1.length() + R2.length() - 1, SwapIntervals(R1.length(), R2.length(), R1.length() + R2.length(),
R1.length() + R2.length(), R3.length(), all_regions);
R1.length() + R2.length() + R3.length() - 1, all_regions);
ASSERT_EQ(R1 + R3 + R2 + R4, all_regions); ASSERT_EQ(R1 + R3 + R2 + R4, all_regions);
} }
@ -76,12 +75,9 @@ TEST(SwapRegionsTest, SwapIntervalsConsecutiveNonEdge) {
std::string all_regions = R1 + R2 + R3 + R4 + R5; std::string all_regions = R1 + R2 + R3 + R4 + R5;
// this call should swap R4 with R5. // this call should swap R4 with R5.
SwapIntervals( SwapIntervals(R1.length() + R2.length() + R3.length(), R4.length(),
R1.length() + R2.length() + R3.length(), R1.length() + R2.length() + R3.length() + R4.length(),
R1.length() + R2.length() + R3.length() + R4.length() - 1, R5.length(), all_regions);
R1.length() + R2.length() + R3.length() + R4.length(),
R1.length() + R2.length() + R3.length() + R4.length() + R5.length() - 1,
all_regions);
ASSERT_EQ(R1 + R2 + R3 + R5 + R4, all_regions); ASSERT_EQ(R1 + R2 + R3 + R5 + R4, all_regions);
} }
@ -94,7 +90,7 @@ TEST(DeleteRegionTest, DeleteFirstRegion) {
std::string all_regions = R1 + R2 + R3 + R4 + R5; std::string all_regions = R1 + R2 + R3 + R4 + R5;
// This call should delete R1. // This call should delete R1.
DeleteInterval(0, R1.length() - 1, all_regions); DeleteInterval(0, R1.length(), all_regions);
ASSERT_EQ(";" + R2 + R3 + R4 + R5, all_regions); ASSERT_EQ(";" + R2 + R3 + R4 + R5, all_regions);
} }
@ -108,7 +104,7 @@ TEST(DeleteRegionTest, DeleteLastRegion) {
// This call should delete R5. // This call should delete R5.
DeleteInterval(R1.length() + R2.length() + R3.length() + R4.length(), DeleteInterval(R1.length() + R2.length() + R3.length() + R4.length(),
all_regions.length() - 1, all_regions); R5.length(), all_regions);
ASSERT_EQ(R1 + R2 + R3 + R4 + ";", all_regions); ASSERT_EQ(R1 + R2 + R3 + R4 + ";", all_regions);
} }
@ -121,8 +117,7 @@ TEST(DeleteRegionTest, DeleteMiddleRegion) {
std::string all_regions = R1 + R2 + R3 + R4 + R5; std::string all_regions = R1 + R2 + R3 + R4 + R5;
// This call should delete R3. // This call should delete R3.
DeleteInterval(R1.length() + R2.length(), DeleteInterval(R1.length() + R2.length(), R3.length(), all_regions);
R1.length() + R2.length() + R3.length() - 1, all_regions);
ASSERT_EQ(R1 + R2 + ";" + R4 + R5, all_regions); ASSERT_EQ(R1 + R2 + ";" + R4 + R5, all_regions);
} }
@ -134,7 +129,7 @@ TEST(InsertRegionTest, InsertRegionTest1) {
std::string all_regions = R1 + R2 + R3 + R4 + R5; std::string all_regions = R1 + R2 + R3 + R4 + R5;
// This call should insert R2 after R4. // This call should insert R2 after R4.
DuplicateInterval(R1.length(), R1.length() + R2.length() - 1, DuplicateInterval(R1.length(), R2.length(),
R1.length() + R2.length() + R3.length() + R4.length() - 1, R1.length() + R2.length() + R3.length() + R4.length() - 1,
all_regions); all_regions);
@ -149,9 +144,8 @@ TEST(InsertRegionTest, InsertRegionTest2) {
std::string all_regions = R1 + R2 + R3 + R4 + R5; std::string all_regions = R1 + R2 + R3 + R4 + R5;
// This call should insert R3 after R1. // This call should insert R3 after R1.
DuplicateInterval(R1.length() + R2.length(), DuplicateInterval(R1.length() + R2.length(), R3.length(), R1.length() - 1,
R1.length() + R2.length() + R3.length() - 1, all_regions);
R1.length() - 1, all_regions);
ASSERT_EQ(R1 + R3.substr(1, R3.length() - 1) + R2 + R3 + R4 + R5, ASSERT_EQ(R1 + R3.substr(1, R3.length() - 1) + R2 + R3 + R4 + R5,
all_regions); all_regions);
@ -165,13 +159,73 @@ TEST(InsertRegionTest, InsertRegionTest3) {
std::string all_regions = R1 + R2 + R3 + R4 + R5; std::string all_regions = R1 + R2 + R3 + R4 + R5;
// This call should insert R2 after R5. // This call should insert R2 after R5.
DuplicateInterval(R1.length(), R1.length() + R2.length() - 1, DuplicateInterval(R1.length(), R2.length(), all_regions.length() - 1,
all_regions.length() - 1, all_regions); all_regions);
ASSERT_EQ(R1 + R2 + R3 + R4 + R5 + R2.substr(1, R2.length() - 1), ASSERT_EQ(R1 + R2 + R3 + R4 + R5 + R2.substr(1, R2.length() - 1),
all_regions); all_regions);
} }
TEST(ReplaceIdentifierTest, ReplaceIdentifierTest1) {
std::string R1 = "|region1|", R2 = "; region2;",
R3 = "---------region3---------", R4 = "++region4++",
R5 = "***region5***";
std::string all_regions = R1 + R2 + R3 + R4 + R5;
// Replaces R3 with R1.
ReplaceRegion(0, R1.length(), R1.length() + R2.length(), R3.length(),
all_regions);
ASSERT_EQ(R1 + R2 + R1 + R4 + R5, all_regions);
}
TEST(ReplaceIdentifierTest, ReplaceIdentifierTest2) {
std::string R1 = "|region1|", R2 = "; region2;",
R3 = "---------region3---------", R4 = "++region4++",
R5 = "***region5***";
std::string all_regions = R1 + R2 + R3 + R4 + R5;
// Replaces R5 with R3.
ReplaceRegion(R1.length() + R2.length(), R3.length(),
R1.length() + R2.length() + R3.length() + R4.length(),
R5.length(), all_regions);
ASSERT_EQ(R1 + R2 + R3 + R4 + R3, all_regions);
}
TEST(GetIdentifierTest, GetIdentifierTest1) {
std::string wgsl_code =
"fn clamp_0acf8f() {"
"var res: vec2<f32> = clamp(vec2<f32>(), vec2<f32>(), vec2<f32>());}"
"[[stage(vertex)]]"
"fn vertex_main() -> [[builtin(position)]] vec4<f32> {"
" clamp_0acf8f();"
" return vec4<f32>();}"
"[[stage(fragment)]]"
"fn fragment_main() {"
" clamp_0acf8f();}"
"[[stage(compute), workgroup_size(1)]]"
"fn compute_main() {"
"var<private> foo: f32 = 0.0;"
" clamp_0acf8f();}";
std::vector<std::pair<size_t, size_t>> identifiers_pos =
GetIdentifiers(wgsl_code);
std::vector<std::pair<size_t, size_t>> ground_truth = {
std::make_pair(3, 12), std::make_pair(19, 3), std::make_pair(28, 4),
std::make_pair(40, 5), std::make_pair(51, 3), std::make_pair(59, 4),
std::make_pair(72, 4), std::make_pair(88, 5), std::make_pair(103, 2),
std::make_pair(113, 4), std::make_pair(125, 7), std::make_pair(145, 4),
std::make_pair(158, 12), std::make_pair(175, 6), std::make_pair(187, 3),
std::make_pair(197, 5), std::make_pair(214, 2), std::make_pair(226, 4),
std::make_pair(236, 12), std::make_pair(254, 5), std::make_pair(270, 14),
std::make_pair(289, 2), std::make_pair(300, 4), std::make_pair(308, 3),
std::make_pair(321, 3), std::make_pair(326, 3), std::make_pair(338, 12)};
ASSERT_EQ(ground_truth, identifiers_pos);
}
} // namespace } // namespace
} // namespace regex_fuzzer } // namespace regex_fuzzer
} // namespace fuzzers } // namespace fuzzers

View File

@ -40,31 +40,68 @@ std::vector<size_t> FindDelimiterIndices(const std::string& delimiter,
return result; return result;
} }
void SwapIntervals(size_t idx1, std::vector<std::pair<size_t, size_t>> GetIdentifiers(
size_t idx2, const std::string& wgsl_code) {
size_t idx3, std::vector<std::pair<size_t, size_t>> result;
size_t idx4,
std::string& wgsl_code) {
std::string region_1 = wgsl_code.substr(idx1 + 1, idx2 - idx1);
std::string region_2 = wgsl_code.substr(idx3 + 1, idx4 - idx3); // This regular expression works by looking for a character that
// is not part of an identifier followed by a WGSL identifier, followed
// by a character which cannot be part of a WGSL identifer. The regex
// for the WGSL identifier is obtained from:
// https://www.w3.org/TR/WGSL/#identifiers.
std::regex wgsl_identifier_regex(
"[^a-zA-Z]([a-zA-Z][0-9a-zA-Z_]*)[^0-9a-zA-Z_]");
std::smatch match;
std::string::const_iterator search_start(wgsl_code.cbegin());
std::string prefix;
while (regex_search(search_start, wgsl_code.cend(), match,
wgsl_identifier_regex) == true) {
prefix += match.prefix();
result.push_back(std::make_pair(prefix.size() + 1, match.str(1).size()));
prefix += match.str(0);
search_start = match.suffix().first;
}
return result;
}
void SwapIntervals(size_t idx1,
size_t reg1_len,
size_t idx2,
size_t reg2_len,
std::string& wgsl_code) {
std::string region_1 = wgsl_code.substr(idx1 + 1, reg1_len - 1);
std::string region_2 = wgsl_code.substr(idx2 + 1, reg2_len - 1);
// The second transformation is done first as it doesn't affect ind1 and ind2 // The second transformation is done first as it doesn't affect ind1 and ind2
wgsl_code.replace(idx3 + 1, region_2.size(), region_1); wgsl_code.replace(idx2 + 1, region_2.size(), region_1);
wgsl_code.replace(idx1 + 1, region_1.size(), region_2); wgsl_code.replace(idx1 + 1, region_1.size(), region_2);
} }
void DeleteInterval(size_t idx1, size_t idx2, std::string& wgsl_code) { void DeleteInterval(size_t idx1, size_t reg_len, std::string& wgsl_code) {
wgsl_code.erase(idx1 + 1, idx2 - idx1); wgsl_code.erase(idx1 + 1, reg_len - 1);
} }
void DuplicateInterval(size_t idx1, void DuplicateInterval(size_t idx1,
size_t reg1_len,
size_t idx2, size_t idx2,
size_t idx3,
std::string& wgsl_code) { std::string& wgsl_code) {
std::string region = wgsl_code.substr(idx1 + 1, idx2 - idx1); std::string region = wgsl_code.substr(idx1 + 1, reg1_len - 1);
wgsl_code.insert(idx3 + 1, region); wgsl_code.insert(idx2 + 1, region);
}
void ReplaceRegion(size_t idx1,
size_t id1_len,
size_t idx2,
size_t id2_len,
std::string& wgsl_code) {
std::string region_1 = wgsl_code.substr(idx1, id1_len);
std::string region_2 = wgsl_code.substr(idx2, id2_len);
wgsl_code.replace(idx2, region_2.size(), region_1);
} }
bool SwapRandomIntervals(const std::string& delimiter, bool SwapRandomIntervals(const std::string& delimiter,
@ -89,8 +126,10 @@ bool SwapRandomIntervals(const std::string& delimiter,
size_t ind4 = GetRandomIntFromRange( size_t ind4 = GetRandomIntFromRange(
ind3 + 1U, delimiter_positions.size() - 1U, generator); ind3 + 1U, delimiter_positions.size() - 1U, generator);
SwapIntervals(delimiter_positions[ind1], delimiter_positions[ind2], SwapIntervals(delimiter_positions[ind1],
delimiter_positions[ind3], delimiter_positions[ind4], delimiter_positions[ind2] - delimiter_positions[ind1],
delimiter_positions[ind3],
delimiter_positions[ind4] - delimiter_positions[ind3],
wgsl_code); wgsl_code);
return true; return true;
@ -112,7 +151,8 @@ bool DeleteRandomInterval(const std::string& delimiter,
size_t ind2 = GetRandomIntFromRange( size_t ind2 = GetRandomIntFromRange(
ind1 + 1U, delimiter_positions.size() - 1U, generator); ind1 + 1U, delimiter_positions.size() - 1U, generator);
DeleteInterval(delimiter_positions[ind1], delimiter_positions[ind2], DeleteInterval(delimiter_positions[ind1],
delimiter_positions[ind2] - delimiter_positions[ind1],
wgsl_code); wgsl_code);
return true; return true;
@ -137,12 +177,40 @@ bool DuplicateRandomInterval(const std::string& delimiter,
size_t ind3 = size_t ind3 =
GetRandomIntFromRange(0, delimiter_positions.size() - 1U, generator); GetRandomIntFromRange(0, delimiter_positions.size() - 1U, generator);
DuplicateInterval(delimiter_positions[ind1], delimiter_positions[ind2], DuplicateInterval(delimiter_positions[ind1],
delimiter_positions[ind2] - delimiter_positions[ind1],
delimiter_positions[ind3], wgsl_code); delimiter_positions[ind3], wgsl_code);
return true; return true;
} }
bool ReplaceRandomIdentifier(std::string& wgsl_code, std::mt19937& generator) {
std::vector<std::pair<size_t, size_t>> identifiers =
GetIdentifiers(wgsl_code);
// Need at least 2 identifiers
if (identifiers.size() < 2) {
return false;
}
size_t id1_index =
GetRandomIntFromRange(0, identifiers.size() - 1U, generator);
size_t id2_index =
GetRandomIntFromRange(0, identifiers.size() - 1U, generator);
// The two identifiers must be different
while (id1_index == id2_index) {
id2_index = GetRandomIntFromRange(0, identifiers.size() - 1U, generator);
}
ReplaceRegion(identifiers[id1_index].first, identifiers[id1_index].second,
identifiers[id2_index].first, identifiers[id2_index].second,
wgsl_code);
return true;
}
} // namespace regex_fuzzer } // namespace regex_fuzzer
} // namespace fuzzers } // namespace fuzzers
} // namespace tint } // namespace tint

View File

@ -32,38 +32,58 @@ namespace regex_fuzzer {
std::vector<size_t> FindDelimiterIndices(const std::string& delimiter, std::vector<size_t> FindDelimiterIndices(const std::string& delimiter,
const std::string& wgsl_code); const std::string& wgsl_code);
/// A function that finds all the identifiers in a WGSL-like string.
/// @param wgsl_code - the WGSL-like string where the identifiers will be found.
/// @return a vector with the positions and the length of all the
/// identifiers in wgsl_code.
std::vector<std::pair<size_t, size_t>> GetIdentifiers(
const std::string& wgsl_code);
/// Given 4 indices, idx1, idx2, idx3 and idx4 it swaps the regions /// Given 4 indices, idx1, idx2, idx3 and idx4 it swaps the regions
/// in the interval (idx1, idx2] with the region in the interval (idx3, idx4] /// in the interval (idx1, idx2] with the region in the interval (idx3, idx4]
/// in wgsl_text. /// in wgsl_text.
/// @param idx1 - starting index of the first region. /// @param idx1 - starting index of the first region.
/// @param idx2 - terminating index of the second region. /// @param reg1_len - length of the first region.
/// @param idx3 - starting index of the second region. /// @param idx2 - starting index of the second region.
/// @param idx4 - terminating index of the second region. /// @param reg2_len - length of the second region.
/// @param wgsl_code - the string where the swap will occur. /// @param wgsl_code - the string where the swap will occur.
void SwapIntervals(size_t idx1, void SwapIntervals(size_t idx1,
size_t reg1_len,
size_t idx2, size_t idx2,
size_t idx3, size_t reg2_len,
size_t idx4,
std::string& wgsl_code); std::string& wgsl_code);
/// Given 2 indices, idx1, idx2, it delets the region in the interval (idx1, /// Given index idx1 it delets the region of length interval_len
/// idx2]. /// starting at index idx1;
/// @param idx1 - starting index of the first region. /// @param idx1 - starting index of the first region.
/// @param idx2 - terminating index of the second region. /// @param reg_len - terminating index of the second region.
/// @param wgsl_code - the string where the swap will occur. /// @param wgsl_code - the string where the swap will occur.
void DeleteInterval(size_t idx1, size_t idx2, std::string& wgsl_code); void DeleteInterval(size_t idx1, size_t reg_len, std::string& wgsl_code);
/// Given 3 indices, idx1, idx2, and idx3 it inserts the /// Given 2 indices, idx1, idx2, it inserts the region of length
/// region in (idx1, idx2] after idx3. /// reg1_len starting at idx1 after idx2.
/// @param idx1 - starting index of region. /// @param idx1 - starting index of region.
/// @param idx2 - terminating index of the region. /// @param reg1_len - length of the region.
/// @param idx3 - the position where the region will be inserted. /// @param idx2 - the position where the region will be inserted.
/// @param wgsl_code - the string where the swap will occur. /// @param wgsl_code - the string where the swap will occur.
void DuplicateInterval(size_t idx1, void DuplicateInterval(size_t idx1,
size_t reg1_len,
size_t idx2, size_t idx2,
size_t idx3,
std::string& wgsl_code); std::string& wgsl_code);
/// Replaces a region of a WGSL-like string of length id2_len starting
/// at position idx2 with a region of length id1_len starting at
/// position idx1.
/// @param idx1 - starting position of the first region.
/// @param id1_len - length of the first region.
/// @param idx2 - starting position of the second region.
/// @param id2_len - length of the second region.
void ReplaceRegion(size_t idx1,
size_t id1_len,
size_t idx2,
size_t id2_len,
std::string& wgsl_code);
/// A function that, given WGSL-like string and a delimiter, /// A function that, given WGSL-like string and a delimiter,
/// generates another WGSL-like string by picking two random regions /// generates another WGSL-like string by picking two random regions
/// enclosed by the delimiter and swapping them. /// enclosed by the delimiter and swapping them.
@ -97,6 +117,13 @@ bool DuplicateRandomInterval(const std::string& delimiter,
std::string& wgsl_code, std::string& wgsl_code,
std::mt19937& generator); std::mt19937& generator);
/// Replaces a random identifier in wgsl_code.
/// @param wgsl_code - WGSL-like string where the replacement will occur.
/// @param generator - the random number generator.
/// @return true if a replacement happened or false otherwise.
bool ReplaceRandomIdentifier(std::string& wgsl_code, std::mt19937& generator);
} // namespace regex_fuzzer } // namespace regex_fuzzer
} // namespace fuzzers } // namespace fuzzers
} // namespace tint } // namespace tint