Regex fuzzer: region deletion and duplication

Adds two transformations, one that deletes a random region enclosed
by a given delimiter and another one that duplicates a region by
inserting it at a position of the WGSL code after a delimiter.

Fixes: tint:1072.
Fixes: tint:1073.

Change-Id: Icb10a7f16a783d5eb8f75a48c4015eb87ea1d174
Reviewed-on: https://dawn-review.googlesource.com/c/tint/+/60200
Reviewed-by: Alastair Donaldson <afdx@google.com>
Kokoro: Kokoro <noreply+kokoro@google.com>
Commit-Queue: Alastair Donaldson <afdx@google.com>
This commit is contained in:
egj 2021-07-29 15:40:57 +00:00 committed by Tint LUCI CQ
parent 5a53634764
commit f5490c732d
4 changed files with 252 additions and 40 deletions

View File

@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include <cassert>
#include <cstddef> #include <cstddef>
#include <cstdint> #include <cstdint>
@ -30,6 +31,13 @@ namespace {
CliParams cli_params{}; CliParams cli_params{};
enum class MutationKind {
kSwapIntervals,
kDeleteInterval,
kDuplicateInterval,
kNumMutationKinds
};
extern "C" int LLVMFuzzerInitialize(int* argc, char*** argv) { extern "C" int LLVMFuzzerInitialize(int* argc, char*** argv) {
// Parse CLI parameters. `ParseCliParams` will call `exit` if some parameter // Parse CLI parameters. `ParseCliParams` will call `exit` if some parameter
// is invalid. // is invalid.
@ -41,12 +49,47 @@ extern "C" size_t LLVMFuzzerCustomMutator(uint8_t* data,
size_t size, size_t size,
size_t max_size, size_t max_size,
unsigned seed) { unsigned seed) {
std::string wgsl_code(data, data + size);
const std::vector<std::string> delimiters{";"}; const std::vector<std::string> delimiters{";"};
std::mt19937 generator(seed); std::mt19937 generator(seed);
std::uniform_int_distribution<size_t> distribution(0, delimiters.size() - 1); std::string delimiter = delimiters[std::uniform_int_distribution<size_t>(
size_t ind = distribution(generator); 0, delimiters.size() - 1)(generator)];
return FuzzEnclosedRegions(size, max_size, delimiters[ind], data, &generator); MutationKind mutation_kind =
static_cast<MutationKind>(std::uniform_int_distribution<size_t>(
0,
static_cast<size_t>(MutationKind::kNumMutationKinds) - 1)(generator));
switch (mutation_kind) {
case MutationKind::kSwapIntervals:
if (!SwapRandomIntervals(delimiter, wgsl_code, generator)) {
return 0;
}
break;
case MutationKind::kDeleteInterval:
if (!DeleteRandomInterval(delimiter, wgsl_code, generator)) {
return 0;
}
break;
case MutationKind::kDuplicateInterval:
if (!DuplicateRandomInterval(delimiter, wgsl_code, generator)) {
return 0;
}
break;
default:
assert(false && "Unreachable");
return 0;
}
if (wgsl_code.size() > max_size) {
return 0;
}
memcpy(data, wgsl_code.c_str(), wgsl_code.size());
return wgsl_code.size();
} }
extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {

View File

@ -31,7 +31,7 @@ TEST(SwapRegionsTest, SwapIntervalsEdgeNonConsecutive) {
// this call should swap R1 with R3. // this call should swap R1 with R3.
SwapIntervals(0, R1.length() - 1, R1.length() + R2.length(), SwapIntervals(0, R1.length() - 1, R1.length() + R2.length(),
all_regions.length() - 1, &all_regions); all_regions.length() - 1, all_regions);
ASSERT_EQ(R3 + R2 + R1, all_regions); ASSERT_EQ(R3 + R2 + R1, all_regions);
} }
@ -47,7 +47,7 @@ TEST(SwapRegionsTest, SwapIntervalsNonConsecutiveNonEdge) {
SwapIntervals(R1.length(), R1.length() + R2.length() - 1, SwapIntervals(R1.length(), R1.length() + R2.length() - 1,
R1.length() + R2.length() + R3.length(), R1.length() + R2.length() + R3.length(),
R1.length() + R2.length() + R3.length() + R4.length() - 1, R1.length() + R2.length() + R3.length() + R4.length() - 1,
&all_regions); all_regions);
ASSERT_EQ(R1 + R4 + R3 + R2 + R5, all_regions); ASSERT_EQ(R1 + R4 + R3 + R2 + R5, all_regions);
} }
@ -61,7 +61,7 @@ TEST(SwapRegionsTest, SwapIntervalsConsecutiveEdge) {
// this call should swap R2 with R3. // this call should swap R2 with R3.
SwapIntervals(R1.length(), R1.length() + R2.length() - 1, SwapIntervals(R1.length(), R1.length() + R2.length() - 1,
R1.length() + R2.length(), R1.length() + R2.length(),
R1.length() + R2.length() + R3.length() - 1, &all_regions); R1.length() + R2.length() + R3.length() - 1, all_regions);
ASSERT_EQ(R1 + R3 + R2 + R4, all_regions); ASSERT_EQ(R1 + R3 + R2 + R4, all_regions);
} }
@ -80,11 +80,93 @@ TEST(SwapRegionsTest, SwapIntervalsConsecutiveNonEdge) {
R1.length() + R2.length() + R3.length() + R4.length() - 1, R1.length() + R2.length() + R3.length() + R4.length() - 1,
R1.length() + R2.length() + R3.length() + R4.length(), R1.length() + R2.length() + R3.length() + R4.length(),
R1.length() + R2.length() + R3.length() + R4.length() + R5.length() - 1, R1.length() + R2.length() + R3.length() + R4.length() + R5.length() - 1,
&all_regions); all_regions);
ASSERT_EQ(R1 + R2 + R3 + R5 + R4, all_regions); ASSERT_EQ(R1 + R2 + R3 + R5 + R4, all_regions);
} }
// Deletes the first region.
TEST(DeleteRegionTest, DeleteFirstRegion) {
std::string R1 = "|region1|", R2 = "; region2;",
R3 = "---------region3---------", R4 = "++region4++",
R5 = "***region5***";
std::string all_regions = R1 + R2 + R3 + R4 + R5;
// This call should delete R1.
DeleteInterval(0, R1.length() - 1, all_regions);
ASSERT_EQ(R2 + R3 + R4 + R5, all_regions);
}
// Deletes the last region.
TEST(DeleteRegionTest, DeleteLastRegion) {
std::string R1 = "|region1|", R2 = "; region2;",
R3 = "---------region3---------", R4 = "++region4++",
R5 = "***region5***";
std::string all_regions = R1 + R2 + R3 + R4 + R5;
// This call should delete R5.
DeleteInterval(R1.length() + R2.length() + R3.length() + R4.length(),
all_regions.length() - 1, all_regions);
ASSERT_EQ(R1 + R2 + R3 + R4, all_regions);
}
// Deletes the middle region.
TEST(DeleteRegionTest, DeleteMiddleRegion) {
std::string R1 = "|region1|", R2 = "; region2;",
R3 = "---------region3---------", R4 = "++region4++",
R5 = "***region5***";
std::string all_regions = R1 + R2 + R3 + R4 + R5;
// This call should delete R3.
DeleteInterval(R1.length() + R2.length(),
R1.length() + R2.length() + R3.length() - 1, all_regions);
ASSERT_EQ(R1 + R2 + R4 + R5, all_regions);
}
TEST(InsertRegionTest, InsertRegionTest1) {
std::string R1 = "|region1|", R2 = "; region2;",
R3 = "---------region3---------", R4 = "++region4++",
R5 = "***region5***";
std::string all_regions = R1 + R2 + R3 + R4 + R5;
// This call should insert R2 after R4.
DuplicateInterval(R1.length(), R1.length() + R2.length() - 1,
R1.length() + R2.length() + R3.length() + R4.length() - 1,
all_regions);
ASSERT_EQ(R1 + R2 + R3 + R4 + R2 + R5, all_regions);
}
TEST(InsertRegionTest, InsertRegionTest2) {
std::string R1 = "|region1|", R2 = "; region2;",
R3 = "---------region3---------", R4 = "++region4++",
R5 = "***region5***";
std::string all_regions = R1 + R2 + R3 + R4 + R5;
// This call should insert R3 after R1.
DuplicateInterval(R1.length() + R2.length(),
R1.length() + R2.length() + R3.length() - 1,
R1.length() - 1, all_regions);
ASSERT_EQ(R1 + R3 + R2 + R3 + R4 + R5, all_regions);
}
TEST(InsertRegionTest, InsertRegionTest3) {
std::string R1 = "|region1|", R2 = "; region2;",
R3 = "---------region3---------", R4 = "++region4++",
R5 = "***region5***";
std::string all_regions = R1 + R2 + R3 + R4 + R5;
// This call should insert R2 after R5.
DuplicateInterval(R1.length(), R1.length() + R2.length() - 1,
all_regions.length() - 1, all_regions);
ASSERT_EQ(R1 + R2 + R3 + R4 + R5 + R2, all_regions);
}
} // namespace } // namespace
} // namespace regex_fuzzer } // namespace regex_fuzzer
} // namespace fuzzers } // namespace fuzzers

View File

@ -32,9 +32,9 @@ namespace {
size_t GetRandomIntFromRange(size_t lower_bound, size_t GetRandomIntFromRange(size_t lower_bound,
size_t upper_bound, size_t upper_bound,
std::mt19937* generator) { std::mt19937& generator) {
std::uniform_int_distribution<size_t> dist(lower_bound, upper_bound); std::uniform_int_distribution<size_t> dist(lower_bound, upper_bound);
return dist(*generator); return dist(generator);
} }
} // namespace } // namespace
@ -54,30 +54,38 @@ void SwapIntervals(size_t idx1,
size_t idx2, size_t idx2,
size_t idx3, size_t idx3,
size_t idx4, size_t idx4,
std::string* wgsl_code) { std::string& wgsl_code) {
std::string region_1 = wgsl_code->substr(idx1, idx2 - idx1 + 1); std::string region_1 = wgsl_code.substr(idx1, idx2 - idx1 + 1);
std::string region_2 = wgsl_code->substr(idx3, idx4 - idx3 + 1); std::string region_2 = wgsl_code.substr(idx3, idx4 - idx3 + 1);
// The second transformation is done first as it doesn't affect ind1 and ind2 // The second transformation is done first as it doesn't affect ind1 and ind2
wgsl_code->replace(idx3, region_2.size(), region_1); wgsl_code.replace(idx3, region_2.size(), region_1);
wgsl_code->replace(idx1, region_1.size(), region_2); wgsl_code.replace(idx1, region_1.size(), region_2);
} }
size_t FuzzEnclosedRegions(size_t size, void DeleteInterval(size_t idx1, size_t idx2, std::string& wgsl_code) {
size_t max_size, wgsl_code.erase(idx1, idx2 - idx1 + 1);
const std::string& delimiter, }
uint8_t* wgsl_code,
std::mt19937* generator) {
std::string init_program(wgsl_code, wgsl_code + size);
void DuplicateInterval(size_t idx1,
size_t idx2,
size_t idx3,
std::string& wgsl_code) {
std::string region = wgsl_code.substr(idx1, idx2 - idx1 + 1);
wgsl_code.insert(idx3 + 1, region);
}
bool SwapRandomIntervals(const std::string& delimiter,
std::string& wgsl_code,
std::mt19937& generator) {
std::vector<size_t> delimiter_positions = std::vector<size_t> delimiter_positions =
FindDelimiterIndices(delimiter, init_program); FindDelimiterIndices(delimiter, wgsl_code);
// Need to have at least 3 indices // Need to have at least 3 indices
if (delimiter_positions.size() < 3) { if (delimiter_positions.size() < 3) {
return 0; return false;
} }
// When generating the i-th random number, we should make sure that there are // When generating the i-th random number, we should make sure that there are
@ -93,13 +101,56 @@ size_t FuzzEnclosedRegions(size_t size,
SwapIntervals(delimiter_positions[ind1], delimiter_positions[ind2], SwapIntervals(delimiter_positions[ind1], delimiter_positions[ind2],
delimiter_positions[ind3], delimiter_positions[ind4], delimiter_positions[ind3], delimiter_positions[ind4],
&init_program); wgsl_code);
if (init_program.size() > max_size) { return true;
return 0;
} }
memcpy(wgsl_code, init_program.c_str(), init_program.size());
return init_program.size(); bool DeleteRandomInterval(const std::string& delimiter,
std::string& wgsl_code,
std::mt19937& generator) {
std::vector<size_t> delimiter_positions =
FindDelimiterIndices(delimiter, wgsl_code);
// Need to have at least 2 indices
if (delimiter_positions.size() < 2) {
return false;
}
size_t ind1 =
GetRandomIntFromRange(0, delimiter_positions.size() - 2U, generator);
size_t ind2 = GetRandomIntFromRange(
ind1 + 1U, delimiter_positions.size() - 1U, generator);
DeleteInterval(delimiter_positions[ind1], delimiter_positions[ind2],
wgsl_code);
return true;
}
bool DuplicateRandomInterval(const std::string& delimiter,
std::string& wgsl_code,
std::mt19937& generator) {
std::vector<size_t> delimiter_positions =
FindDelimiterIndices(delimiter, wgsl_code);
// Need to have at least 2 indices
if (delimiter_positions.size() < 2) {
return false;
}
size_t ind1 =
GetRandomIntFromRange(0, delimiter_positions.size() - 2U, generator);
size_t ind2 = GetRandomIntFromRange(
ind1 + 1U, delimiter_positions.size() - 1U, generator);
size_t ind3 =
GetRandomIntFromRange(0, delimiter_positions.size() - 1U, generator);
DuplicateInterval(delimiter_positions[ind1], delimiter_positions[ind2],
delimiter_positions[ind3] + 1, wgsl_code);
return true;
} }
} // namespace regex_fuzzer } // namespace regex_fuzzer

View File

@ -44,22 +44,58 @@ void SwapIntervals(size_t idx1,
size_t idx2, size_t idx2,
size_t idx3, size_t idx3,
size_t idx4, size_t idx4,
std::string* wgsl_code); std::string& wgsl_code);
/// A function that, given an initial string (valid WGSL code) and a delimiter, /// Given 2 indices, idx1, idx2, it delets the region in the interval [idx1,
/// generates a new set of strings (valid or invalid WGSL code) by /// idx2].
/// picking two random regions and swapping them. /// @param idx1 - starting index of the first region.
/// @param wgsl_code - the initial string (WGSL code) that will be mutated. /// @param idx2 - terminating index of the second region.
/// @param size - size of the string that will be mutated. /// @param wgsl_code - the string where the swap will occur.
/// @param max_size - maximal allowed mutation size. void DeleteInterval(size_t idx1, size_t idx2, std::string& wgsl_code);
/// Given 3 indices, idx1, idx2, and idx3 it inserts the
/// region in [idx1, idx2] after idx3.
/// @param idx1 - starting index of region.
/// @param idx2 - terminating index of the region.
/// @param idx3 - the position where the region will be inserted.
/// @param wgsl_code - the string where the swap will occur.
void DuplicateInterval(size_t idx1,
size_t idx2,
size_t idx3,
std::string& wgsl_code);
/// A function that, given WGSL-like string and a delimiter,
/// generates another WGSL-like string by picking two random regions
/// enclosed by the delimiter and swapping them.
/// @param delimiter - the delimiter that will be used to find enclosed regions. /// @param delimiter - the delimiter that will be used to find enclosed regions.
/// @param wgsl_code - the initial string (WGSL code) that will be mutated.
/// @param generator - the random number generator. /// @param generator - the random number generator.
/// @return size of the mutated string. /// @return true if a swap happened or false otherwise.
size_t FuzzEnclosedRegions(size_t size, bool SwapRandomIntervals(const std::string& delimiter,
size_t max_size, std::string& wgsl_code,
const std::string& delimiter, std::mt19937& generator);
uint8_t* wgsl_code,
std::mt19937* generator); /// A function that, given a WGSL-like string and a delimiter,
/// generates another WGSL-like string by deleting a random
/// region enclosed by the delimiter.
/// @param delimiter - the delimiter that will be used to find enclosed regions.
/// @param wgsl_code - the initial string (WGSL code) that will be mutated.
/// @param generator - the random number generator.
/// @return true if a deletion happened or false otherwise.
bool DeleteRandomInterval(const std::string& delimiter,
std::string& wgsl_code,
std::mt19937& generator);
/// A function that, given a WGSL-like string and a delimiter,
/// generates another WGSL-like string by duplicating a random
/// region enclosed by the delimiter.
/// @param delimiter - the delimiter that will be used to find enclosed regions.
/// @param wgsl_code - the initial string (WGSL code) that will be mutated.
/// @param generator - the random number generator.
/// @return true if a duplication happened or false otherwise.
bool DuplicateRandomInterval(const std::string& delimiter,
std::string& wgsl_code,
std::mt19937& generator);
} // namespace regex_fuzzer } // namespace regex_fuzzer
} // namespace fuzzers } // namespace fuzzers