Regex fuzzer: region deletion and duplication
Adds two transformations, one that deletes a random region enclosed by a given delimiter and another one that duplicates a region by inserting it at a position of the WGSL code after a delimiter. Fixes: tint:1072. Fixes: tint:1073. Change-Id: Icb10a7f16a783d5eb8f75a48c4015eb87ea1d174 Reviewed-on: https://dawn-review.googlesource.com/c/tint/+/60200 Reviewed-by: Alastair Donaldson <afdx@google.com> Kokoro: Kokoro <noreply+kokoro@google.com> Commit-Queue: Alastair Donaldson <afdx@google.com>
This commit is contained in:
parent
5a53634764
commit
f5490c732d
|
@ -12,6 +12,7 @@
|
||||||
// See the License for the specific language governing permissions and
|
// See the License for the specific language governing permissions and
|
||||||
// limitations under the License.
|
// limitations under the License.
|
||||||
|
|
||||||
|
#include <cassert>
|
||||||
#include <cstddef>
|
#include <cstddef>
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
|
|
||||||
|
@ -30,6 +31,13 @@ namespace {
|
||||||
|
|
||||||
CliParams cli_params{};
|
CliParams cli_params{};
|
||||||
|
|
||||||
|
enum class MutationKind {
|
||||||
|
kSwapIntervals,
|
||||||
|
kDeleteInterval,
|
||||||
|
kDuplicateInterval,
|
||||||
|
kNumMutationKinds
|
||||||
|
};
|
||||||
|
|
||||||
extern "C" int LLVMFuzzerInitialize(int* argc, char*** argv) {
|
extern "C" int LLVMFuzzerInitialize(int* argc, char*** argv) {
|
||||||
// Parse CLI parameters. `ParseCliParams` will call `exit` if some parameter
|
// Parse CLI parameters. `ParseCliParams` will call `exit` if some parameter
|
||||||
// is invalid.
|
// is invalid.
|
||||||
|
@ -41,12 +49,47 @@ extern "C" size_t LLVMFuzzerCustomMutator(uint8_t* data,
|
||||||
size_t size,
|
size_t size,
|
||||||
size_t max_size,
|
size_t max_size,
|
||||||
unsigned seed) {
|
unsigned seed) {
|
||||||
|
std::string wgsl_code(data, data + size);
|
||||||
const std::vector<std::string> delimiters{";"};
|
const std::vector<std::string> delimiters{";"};
|
||||||
std::mt19937 generator(seed);
|
std::mt19937 generator(seed);
|
||||||
std::uniform_int_distribution<size_t> distribution(0, delimiters.size() - 1);
|
std::string delimiter = delimiters[std::uniform_int_distribution<size_t>(
|
||||||
size_t ind = distribution(generator);
|
0, delimiters.size() - 1)(generator)];
|
||||||
|
|
||||||
return FuzzEnclosedRegions(size, max_size, delimiters[ind], data, &generator);
|
MutationKind mutation_kind =
|
||||||
|
static_cast<MutationKind>(std::uniform_int_distribution<size_t>(
|
||||||
|
0,
|
||||||
|
static_cast<size_t>(MutationKind::kNumMutationKinds) - 1)(generator));
|
||||||
|
|
||||||
|
switch (mutation_kind) {
|
||||||
|
case MutationKind::kSwapIntervals:
|
||||||
|
if (!SwapRandomIntervals(delimiter, wgsl_code, generator)) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case MutationKind::kDeleteInterval:
|
||||||
|
if (!DeleteRandomInterval(delimiter, wgsl_code, generator)) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case MutationKind::kDuplicateInterval:
|
||||||
|
if (!DuplicateRandomInterval(delimiter, wgsl_code, generator)) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
assert(false && "Unreachable");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (wgsl_code.size() > max_size) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
memcpy(data, wgsl_code.c_str(), wgsl_code.size());
|
||||||
|
return wgsl_code.size();
|
||||||
}
|
}
|
||||||
|
|
||||||
extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
|
extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
|
||||||
|
|
|
@ -31,7 +31,7 @@ TEST(SwapRegionsTest, SwapIntervalsEdgeNonConsecutive) {
|
||||||
|
|
||||||
// this call should swap R1 with R3.
|
// this call should swap R1 with R3.
|
||||||
SwapIntervals(0, R1.length() - 1, R1.length() + R2.length(),
|
SwapIntervals(0, R1.length() - 1, R1.length() + R2.length(),
|
||||||
all_regions.length() - 1, &all_regions);
|
all_regions.length() - 1, all_regions);
|
||||||
|
|
||||||
ASSERT_EQ(R3 + R2 + R1, all_regions);
|
ASSERT_EQ(R3 + R2 + R1, all_regions);
|
||||||
}
|
}
|
||||||
|
@ -47,7 +47,7 @@ TEST(SwapRegionsTest, SwapIntervalsNonConsecutiveNonEdge) {
|
||||||
SwapIntervals(R1.length(), R1.length() + R2.length() - 1,
|
SwapIntervals(R1.length(), R1.length() + R2.length() - 1,
|
||||||
R1.length() + R2.length() + R3.length(),
|
R1.length() + R2.length() + R3.length(),
|
||||||
R1.length() + R2.length() + R3.length() + R4.length() - 1,
|
R1.length() + R2.length() + R3.length() + R4.length() - 1,
|
||||||
&all_regions);
|
all_regions);
|
||||||
|
|
||||||
ASSERT_EQ(R1 + R4 + R3 + R2 + R5, all_regions);
|
ASSERT_EQ(R1 + R4 + R3 + R2 + R5, all_regions);
|
||||||
}
|
}
|
||||||
|
@ -61,7 +61,7 @@ TEST(SwapRegionsTest, SwapIntervalsConsecutiveEdge) {
|
||||||
// this call should swap R2 with R3.
|
// this call should swap R2 with R3.
|
||||||
SwapIntervals(R1.length(), R1.length() + R2.length() - 1,
|
SwapIntervals(R1.length(), R1.length() + R2.length() - 1,
|
||||||
R1.length() + R2.length(),
|
R1.length() + R2.length(),
|
||||||
R1.length() + R2.length() + R3.length() - 1, &all_regions);
|
R1.length() + R2.length() + R3.length() - 1, all_regions);
|
||||||
|
|
||||||
ASSERT_EQ(R1 + R3 + R2 + R4, all_regions);
|
ASSERT_EQ(R1 + R3 + R2 + R4, all_regions);
|
||||||
}
|
}
|
||||||
|
@ -80,11 +80,93 @@ TEST(SwapRegionsTest, SwapIntervalsConsecutiveNonEdge) {
|
||||||
R1.length() + R2.length() + R3.length() + R4.length() - 1,
|
R1.length() + R2.length() + R3.length() + R4.length() - 1,
|
||||||
R1.length() + R2.length() + R3.length() + R4.length(),
|
R1.length() + R2.length() + R3.length() + R4.length(),
|
||||||
R1.length() + R2.length() + R3.length() + R4.length() + R5.length() - 1,
|
R1.length() + R2.length() + R3.length() + R4.length() + R5.length() - 1,
|
||||||
&all_regions);
|
all_regions);
|
||||||
|
|
||||||
ASSERT_EQ(R1 + R2 + R3 + R5 + R4, all_regions);
|
ASSERT_EQ(R1 + R2 + R3 + R5 + R4, all_regions);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Deletes the first region.
|
||||||
|
TEST(DeleteRegionTest, DeleteFirstRegion) {
|
||||||
|
std::string R1 = "|region1|", R2 = "; region2;",
|
||||||
|
R3 = "---------region3---------", R4 = "++region4++",
|
||||||
|
R5 = "***region5***";
|
||||||
|
std::string all_regions = R1 + R2 + R3 + R4 + R5;
|
||||||
|
|
||||||
|
// This call should delete R1.
|
||||||
|
DeleteInterval(0, R1.length() - 1, all_regions);
|
||||||
|
|
||||||
|
ASSERT_EQ(R2 + R3 + R4 + R5, all_regions);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Deletes the last region.
|
||||||
|
TEST(DeleteRegionTest, DeleteLastRegion) {
|
||||||
|
std::string R1 = "|region1|", R2 = "; region2;",
|
||||||
|
R3 = "---------region3---------", R4 = "++region4++",
|
||||||
|
R5 = "***region5***";
|
||||||
|
std::string all_regions = R1 + R2 + R3 + R4 + R5;
|
||||||
|
|
||||||
|
// This call should delete R5.
|
||||||
|
DeleteInterval(R1.length() + R2.length() + R3.length() + R4.length(),
|
||||||
|
all_regions.length() - 1, all_regions);
|
||||||
|
|
||||||
|
ASSERT_EQ(R1 + R2 + R3 + R4, all_regions);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Deletes the middle region.
|
||||||
|
TEST(DeleteRegionTest, DeleteMiddleRegion) {
|
||||||
|
std::string R1 = "|region1|", R2 = "; region2;",
|
||||||
|
R3 = "---------region3---------", R4 = "++region4++",
|
||||||
|
R5 = "***region5***";
|
||||||
|
std::string all_regions = R1 + R2 + R3 + R4 + R5;
|
||||||
|
|
||||||
|
// This call should delete R3.
|
||||||
|
DeleteInterval(R1.length() + R2.length(),
|
||||||
|
R1.length() + R2.length() + R3.length() - 1, all_regions);
|
||||||
|
|
||||||
|
ASSERT_EQ(R1 + R2 + R4 + R5, all_regions);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(InsertRegionTest, InsertRegionTest1) {
|
||||||
|
std::string R1 = "|region1|", R2 = "; region2;",
|
||||||
|
R3 = "---------region3---------", R4 = "++region4++",
|
||||||
|
R5 = "***region5***";
|
||||||
|
std::string all_regions = R1 + R2 + R3 + R4 + R5;
|
||||||
|
|
||||||
|
// This call should insert R2 after R4.
|
||||||
|
DuplicateInterval(R1.length(), R1.length() + R2.length() - 1,
|
||||||
|
R1.length() + R2.length() + R3.length() + R4.length() - 1,
|
||||||
|
all_regions);
|
||||||
|
|
||||||
|
ASSERT_EQ(R1 + R2 + R3 + R4 + R2 + R5, all_regions);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(InsertRegionTest, InsertRegionTest2) {
|
||||||
|
std::string R1 = "|region1|", R2 = "; region2;",
|
||||||
|
R3 = "---------region3---------", R4 = "++region4++",
|
||||||
|
R5 = "***region5***";
|
||||||
|
std::string all_regions = R1 + R2 + R3 + R4 + R5;
|
||||||
|
|
||||||
|
// This call should insert R3 after R1.
|
||||||
|
DuplicateInterval(R1.length() + R2.length(),
|
||||||
|
R1.length() + R2.length() + R3.length() - 1,
|
||||||
|
R1.length() - 1, all_regions);
|
||||||
|
|
||||||
|
ASSERT_EQ(R1 + R3 + R2 + R3 + R4 + R5, all_regions);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(InsertRegionTest, InsertRegionTest3) {
|
||||||
|
std::string R1 = "|region1|", R2 = "; region2;",
|
||||||
|
R3 = "---------region3---------", R4 = "++region4++",
|
||||||
|
R5 = "***region5***";
|
||||||
|
std::string all_regions = R1 + R2 + R3 + R4 + R5;
|
||||||
|
|
||||||
|
// This call should insert R2 after R5.
|
||||||
|
DuplicateInterval(R1.length(), R1.length() + R2.length() - 1,
|
||||||
|
all_regions.length() - 1, all_regions);
|
||||||
|
|
||||||
|
ASSERT_EQ(R1 + R2 + R3 + R4 + R5 + R2, all_regions);
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
} // namespace regex_fuzzer
|
} // namespace regex_fuzzer
|
||||||
} // namespace fuzzers
|
} // namespace fuzzers
|
||||||
|
|
|
@ -32,9 +32,9 @@ namespace {
|
||||||
|
|
||||||
size_t GetRandomIntFromRange(size_t lower_bound,
|
size_t GetRandomIntFromRange(size_t lower_bound,
|
||||||
size_t upper_bound,
|
size_t upper_bound,
|
||||||
std::mt19937* generator) {
|
std::mt19937& generator) {
|
||||||
std::uniform_int_distribution<size_t> dist(lower_bound, upper_bound);
|
std::uniform_int_distribution<size_t> dist(lower_bound, upper_bound);
|
||||||
return dist(*generator);
|
return dist(generator);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
@ -54,30 +54,38 @@ void SwapIntervals(size_t idx1,
|
||||||
size_t idx2,
|
size_t idx2,
|
||||||
size_t idx3,
|
size_t idx3,
|
||||||
size_t idx4,
|
size_t idx4,
|
||||||
std::string* wgsl_code) {
|
std::string& wgsl_code) {
|
||||||
std::string region_1 = wgsl_code->substr(idx1, idx2 - idx1 + 1);
|
std::string region_1 = wgsl_code.substr(idx1, idx2 - idx1 + 1);
|
||||||
|
|
||||||
std::string region_2 = wgsl_code->substr(idx3, idx4 - idx3 + 1);
|
std::string region_2 = wgsl_code.substr(idx3, idx4 - idx3 + 1);
|
||||||
|
|
||||||
// The second transformation is done first as it doesn't affect ind1 and ind2
|
// The second transformation is done first as it doesn't affect ind1 and ind2
|
||||||
wgsl_code->replace(idx3, region_2.size(), region_1);
|
wgsl_code.replace(idx3, region_2.size(), region_1);
|
||||||
|
|
||||||
wgsl_code->replace(idx1, region_1.size(), region_2);
|
wgsl_code.replace(idx1, region_1.size(), region_2);
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t FuzzEnclosedRegions(size_t size,
|
void DeleteInterval(size_t idx1, size_t idx2, std::string& wgsl_code) {
|
||||||
size_t max_size,
|
wgsl_code.erase(idx1, idx2 - idx1 + 1);
|
||||||
const std::string& delimiter,
|
}
|
||||||
uint8_t* wgsl_code,
|
|
||||||
std::mt19937* generator) {
|
|
||||||
std::string init_program(wgsl_code, wgsl_code + size);
|
|
||||||
|
|
||||||
|
void DuplicateInterval(size_t idx1,
|
||||||
|
size_t idx2,
|
||||||
|
size_t idx3,
|
||||||
|
std::string& wgsl_code) {
|
||||||
|
std::string region = wgsl_code.substr(idx1, idx2 - idx1 + 1);
|
||||||
|
wgsl_code.insert(idx3 + 1, region);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool SwapRandomIntervals(const std::string& delimiter,
|
||||||
|
std::string& wgsl_code,
|
||||||
|
std::mt19937& generator) {
|
||||||
std::vector<size_t> delimiter_positions =
|
std::vector<size_t> delimiter_positions =
|
||||||
FindDelimiterIndices(delimiter, init_program);
|
FindDelimiterIndices(delimiter, wgsl_code);
|
||||||
|
|
||||||
// Need to have at least 3 indices
|
// Need to have at least 3 indices
|
||||||
if (delimiter_positions.size() < 3) {
|
if (delimiter_positions.size() < 3) {
|
||||||
return 0;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// When generating the i-th random number, we should make sure that there are
|
// When generating the i-th random number, we should make sure that there are
|
||||||
|
@ -93,13 +101,56 @@ size_t FuzzEnclosedRegions(size_t size,
|
||||||
|
|
||||||
SwapIntervals(delimiter_positions[ind1], delimiter_positions[ind2],
|
SwapIntervals(delimiter_positions[ind1], delimiter_positions[ind2],
|
||||||
delimiter_positions[ind3], delimiter_positions[ind4],
|
delimiter_positions[ind3], delimiter_positions[ind4],
|
||||||
&init_program);
|
wgsl_code);
|
||||||
|
|
||||||
if (init_program.size() > max_size) {
|
return true;
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
memcpy(wgsl_code, init_program.c_str(), init_program.size());
|
|
||||||
return init_program.size();
|
bool DeleteRandomInterval(const std::string& delimiter,
|
||||||
|
std::string& wgsl_code,
|
||||||
|
std::mt19937& generator) {
|
||||||
|
std::vector<size_t> delimiter_positions =
|
||||||
|
FindDelimiterIndices(delimiter, wgsl_code);
|
||||||
|
|
||||||
|
// Need to have at least 2 indices
|
||||||
|
if (delimiter_positions.size() < 2) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t ind1 =
|
||||||
|
GetRandomIntFromRange(0, delimiter_positions.size() - 2U, generator);
|
||||||
|
size_t ind2 = GetRandomIntFromRange(
|
||||||
|
ind1 + 1U, delimiter_positions.size() - 1U, generator);
|
||||||
|
|
||||||
|
DeleteInterval(delimiter_positions[ind1], delimiter_positions[ind2],
|
||||||
|
wgsl_code);
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool DuplicateRandomInterval(const std::string& delimiter,
|
||||||
|
std::string& wgsl_code,
|
||||||
|
std::mt19937& generator) {
|
||||||
|
std::vector<size_t> delimiter_positions =
|
||||||
|
FindDelimiterIndices(delimiter, wgsl_code);
|
||||||
|
|
||||||
|
// Need to have at least 2 indices
|
||||||
|
if (delimiter_positions.size() < 2) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t ind1 =
|
||||||
|
GetRandomIntFromRange(0, delimiter_positions.size() - 2U, generator);
|
||||||
|
size_t ind2 = GetRandomIntFromRange(
|
||||||
|
ind1 + 1U, delimiter_positions.size() - 1U, generator);
|
||||||
|
|
||||||
|
size_t ind3 =
|
||||||
|
GetRandomIntFromRange(0, delimiter_positions.size() - 1U, generator);
|
||||||
|
|
||||||
|
DuplicateInterval(delimiter_positions[ind1], delimiter_positions[ind2],
|
||||||
|
delimiter_positions[ind3] + 1, wgsl_code);
|
||||||
|
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace regex_fuzzer
|
} // namespace regex_fuzzer
|
||||||
|
|
|
@ -44,22 +44,58 @@ void SwapIntervals(size_t idx1,
|
||||||
size_t idx2,
|
size_t idx2,
|
||||||
size_t idx3,
|
size_t idx3,
|
||||||
size_t idx4,
|
size_t idx4,
|
||||||
std::string* wgsl_code);
|
std::string& wgsl_code);
|
||||||
|
|
||||||
/// A function that, given an initial string (valid WGSL code) and a delimiter,
|
/// Given 2 indices, idx1, idx2, it delets the region in the interval [idx1,
|
||||||
/// generates a new set of strings (valid or invalid WGSL code) by
|
/// idx2].
|
||||||
/// picking two random regions and swapping them.
|
/// @param idx1 - starting index of the first region.
|
||||||
/// @param wgsl_code - the initial string (WGSL code) that will be mutated.
|
/// @param idx2 - terminating index of the second region.
|
||||||
/// @param size - size of the string that will be mutated.
|
/// @param wgsl_code - the string where the swap will occur.
|
||||||
/// @param max_size - maximal allowed mutation size.
|
void DeleteInterval(size_t idx1, size_t idx2, std::string& wgsl_code);
|
||||||
|
|
||||||
|
/// Given 3 indices, idx1, idx2, and idx3 it inserts the
|
||||||
|
/// region in [idx1, idx2] after idx3.
|
||||||
|
/// @param idx1 - starting index of region.
|
||||||
|
/// @param idx2 - terminating index of the region.
|
||||||
|
/// @param idx3 - the position where the region will be inserted.
|
||||||
|
/// @param wgsl_code - the string where the swap will occur.
|
||||||
|
void DuplicateInterval(size_t idx1,
|
||||||
|
size_t idx2,
|
||||||
|
size_t idx3,
|
||||||
|
std::string& wgsl_code);
|
||||||
|
|
||||||
|
/// A function that, given WGSL-like string and a delimiter,
|
||||||
|
/// generates another WGSL-like string by picking two random regions
|
||||||
|
/// enclosed by the delimiter and swapping them.
|
||||||
/// @param delimiter - the delimiter that will be used to find enclosed regions.
|
/// @param delimiter - the delimiter that will be used to find enclosed regions.
|
||||||
|
/// @param wgsl_code - the initial string (WGSL code) that will be mutated.
|
||||||
/// @param generator - the random number generator.
|
/// @param generator - the random number generator.
|
||||||
/// @return size of the mutated string.
|
/// @return true if a swap happened or false otherwise.
|
||||||
size_t FuzzEnclosedRegions(size_t size,
|
bool SwapRandomIntervals(const std::string& delimiter,
|
||||||
size_t max_size,
|
std::string& wgsl_code,
|
||||||
const std::string& delimiter,
|
std::mt19937& generator);
|
||||||
uint8_t* wgsl_code,
|
|
||||||
std::mt19937* generator);
|
/// A function that, given a WGSL-like string and a delimiter,
|
||||||
|
/// generates another WGSL-like string by deleting a random
|
||||||
|
/// region enclosed by the delimiter.
|
||||||
|
/// @param delimiter - the delimiter that will be used to find enclosed regions.
|
||||||
|
/// @param wgsl_code - the initial string (WGSL code) that will be mutated.
|
||||||
|
/// @param generator - the random number generator.
|
||||||
|
/// @return true if a deletion happened or false otherwise.
|
||||||
|
bool DeleteRandomInterval(const std::string& delimiter,
|
||||||
|
std::string& wgsl_code,
|
||||||
|
std::mt19937& generator);
|
||||||
|
|
||||||
|
/// A function that, given a WGSL-like string and a delimiter,
|
||||||
|
/// generates another WGSL-like string by duplicating a random
|
||||||
|
/// region enclosed by the delimiter.
|
||||||
|
/// @param delimiter - the delimiter that will be used to find enclosed regions.
|
||||||
|
/// @param wgsl_code - the initial string (WGSL code) that will be mutated.
|
||||||
|
/// @param generator - the random number generator.
|
||||||
|
/// @return true if a duplication happened or false otherwise.
|
||||||
|
bool DuplicateRandomInterval(const std::string& delimiter,
|
||||||
|
std::string& wgsl_code,
|
||||||
|
std::mt19937& generator);
|
||||||
|
|
||||||
} // namespace regex_fuzzer
|
} // namespace regex_fuzzer
|
||||||
} // namespace fuzzers
|
} // namespace fuzzers
|
||||||
|
|
Loading…
Reference in New Issue