Add regex fuzzer

A fuzzer that mutates a WGSL code by finding two regions enclosed by
semicolons and swapping them randomly.

Change-Id: I5b14eb21fd2924227d05ac516f806c6e2efa6198
Reviewed-on: https://dawn-review.googlesource.com/c/tint/+/58395
Kokoro: Kokoro <noreply+kokoro@google.com>
Reviewed-by: Alastair Donaldson <afdx@google.com>
Reviewed-by: Ryan Harrison <rharrison@chromium.org>
Commit-Queue: Alastair Donaldson <afdx@google.com>
This commit is contained in:
egj 2021-07-28 11:11:26 +00:00 committed by Tint LUCI CQ
parent 085dcbbe20
commit cc6d5b464d
9 changed files with 620 additions and 0 deletions

View File

@ -52,6 +52,7 @@ option(TINT_BUILD_WGSL_WRITER "Build the WGSL output writer" ON)
option(TINT_BUILD_FUZZERS "Build fuzzers" OFF)
option(TINT_BUILD_SPIRV_TOOLS_FUZZER "Build SPIRV-Tools fuzzer" OFF)
option(TINT_BUILD_AST_FUZZER "Build AST fuzzer" OFF)
option(TINT_BUILD_REGEX_FUZZER "Build regex fuzzer" OFF)
option(TINT_BUILD_TESTS "Build tests" ${TINT_BUILD_TESTS_DEFAULT})
option(TINT_BUILD_AS_OTHER_OS "Override OS detection to force building of *_other.cc files" OFF)
option(TINT_BUILD_REMOTE_COMPILE "Build the remote-compile tool for validating shaders on a remote machine" OFF)
@ -75,6 +76,7 @@ message(STATUS "Tint build WGSL writer: ${TINT_BUILD_WGSL_WRITER}")
message(STATUS "Tint build fuzzers: ${TINT_BUILD_FUZZERS}")
message(STATUS "Tint build SPIRV-Tools fuzzer: ${TINT_BUILD_SPIRV_TOOLS_FUZZER}")
message(STATUS "Tint build AST fuzzer: ${TINT_BUILD_AST_FUZZER}")
message(STATUS "Tint build regex fuzzer: ${TINT_BUILD_REGEX_FUZZER}")
message(STATUS "Tint build tests: ${TINT_BUILD_TESTS}")
message(STATUS "Tint build with ASAN: ${TINT_ENABLE_ASAN}")
message(STATUS "Tint build with MSAN: ${TINT_ENABLE_MSAN}")
@ -119,6 +121,22 @@ if (${TINT_BUILD_AST_FUZZER})
set(TINT_BUILD_HLSL_WRITER ON CACHE BOOL "Build HLSL writer" FORCE)
endif()
if (${TINT_BUILD_REGEX_FUZZER})
message(STATUS "TINT_BUILD_REGEX_FUZZER is ON - setting
TINT_BUILD_FUZZERS
TINT_BUILD_WGSL_READER
TINT_BUILD_WGSL_WRITER
TINT_BUILD_SPV_WRITER
TINT_BUILD_MSL_WRITER
TINT_BUILD_HLSL_WRITER to ON")
set(TINT_BUILD_FUZZERS ON CACHE BOOL "Build tint fuzzers" FORCE)
set(TINT_BUILD_WGSL_READER ON CACHE BOOL "Build WGSL reader" FORCE)
set(TINT_BUILD_WGSL_WRITER ON CACHE BOOL "Build WGSL writer" FORCE)
set(TINT_BUILD_SPV_WRITER ON CACHE BOOL "Build SPIR-V writer" FORCE)
set(TINT_BUILD_MSL_WRITER ON CACHE BOOL "Build MSL writer" FORCE)
set(TINT_BUILD_HLSL_WRITER ON CACHE BOOL "Build HLSL writer" FORCE)
endif()
set(TINT_ROOT_SOURCE_DIR ${PROJECT_SOURCE_DIR})
# CMake < 3.15 sets /W3 in CMAKE_CXX_FLAGS. Remove it if it's there.

View File

@ -81,3 +81,7 @@ endif()
if (${TINT_BUILD_AST_FUZZER})
add_subdirectory(tint_ast_fuzzer)
endif()
if (${TINT_BUILD_REGEX_FUZZER})
add_subdirectory(tint_regex_fuzzer)
endif()

View File

@ -0,0 +1,57 @@
# Copyright 2021 The Tint Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set(LIBTINT_REGEX_FUZZER_SOURCES
wgsl_mutator.cc
wgsl_mutator.h)
# Add static library target.
add_library(libtint_regex_fuzzer STATIC ${LIBTINT_REGEX_FUZZER_SOURCES})
tint_default_compile_options(libtint_regex_fuzzer)
set(REGEX_FUZZER_SOURCES
cli.cc
cli.h
fuzzer.cc
../tint_common_fuzzer.cc
../tint_common_fuzzer.h)
# Add libfuzzer target.
add_executable(tint_regex_fuzzer ${REGEX_FUZZER_SOURCES})
target_compile_options(tint_regex_fuzzer PRIVATE
-Wno-missing-prototypes)
target_link_libraries(tint_regex_fuzzer libtint-fuzz libtint_regex_fuzzer)
tint_default_compile_options(tint_regex_fuzzer)
target_compile_definitions(tint_regex_fuzzer PUBLIC CUSTOM_MUTATOR)
# Add tests.
if (${TINT_BUILD_TESTS})
set(TEST_SOURCES
regex_fuzzer_tests.cc)
add_executable(tint_regex_fuzzer_unittests ${TEST_SOURCES})
target_include_directories(
tint_regex_fuzzer_unittests PRIVATE ${gmock_SOURCE_DIR}/include)
target_link_libraries(tint_regex_fuzzer_unittests gmock_main libtint_regex_fuzzer)
tint_default_compile_options(tint_regex_fuzzer_unittests)
target_compile_options(tint_regex_fuzzer_unittests PRIVATE
-Wno-global-constructors
-Wno-weak-vtables
-Wno-covered-switch-default)
target_include_directories(tint_regex_fuzzer_unittests PRIVATE ${CMAKE_BINARY_DIR})
add_test(NAME tint_regex_fuzzer_unittests COMMAND tint_regex_fuzzer_unittests)
endif ()

View File

@ -0,0 +1,126 @@
// Copyright 2021 The Tint Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "fuzzers/tint_regex_fuzzer/cli.h"
#include <cstring>
#include <iostream>
#include <limits>
#include <sstream>
#include <string>
#include <utility>
namespace tint {
namespace fuzzers {
namespace regex_fuzzer {
namespace {
const char* const kHelpMessage = R"(
This is a fuzzer for the Tint compiler that works by mutating a WGSL shader.
Below is a list of all supported parameters for this fuzzer. You may want to
run it with -help=1 to check out libfuzzer parameters.
-tint_fuzzing_target=
Specifies the shading language to target during fuzzing.
This must be one or a combination of `wgsl`, `spv`, `hlsl`,
`msl` (without `) separated by commas. By default it's
`wgsl,msl,hlsl,spv`.
-tint_help
Show this message. Note that there is also a -help=1
parameter that will display libfuzzer's help message.
)";
bool HasPrefix(const char* str, const char* prefix) {
return strncmp(str, prefix, strlen(prefix)) == 0;
}
[[noreturn]] void InvalidParam(const char* param) {
std::cout << "Invalid value for " << param << std::endl;
std::cout << kHelpMessage << std::endl;
exit(1);
}
bool ParseFuzzingTarget(const char* value, FuzzingTarget* out) {
if (!strcmp(value, "wgsl")) {
*out = FuzzingTarget::kWgsl;
} else if (!strcmp(value, "spv")) {
*out = FuzzingTarget::kSpv;
} else if (!strcmp(value, "msl")) {
*out = FuzzingTarget::kMsl;
} else if (!strcmp(value, "hlsl")) {
*out = FuzzingTarget::kHlsl;
} else {
return false;
}
return true;
}
} // namespace
CliParams ParseCliParams(int* argc, char** argv) {
CliParams cli_params;
auto help = false;
for (int i = *argc - 1; i > 0; --i) {
auto param = argv[i];
auto recognized_parameter = true;
if (HasPrefix(param, "-tint_fuzzing_target=")) {
auto result = FuzzingTarget::kNone;
std::stringstream ss(param + sizeof("-tint_fuzzing_target=") - 1);
for (std::string value; std::getline(ss, value, ',');) {
auto tmp = FuzzingTarget::kNone;
if (!ParseFuzzingTarget(value.c_str(), &tmp)) {
InvalidParam(param);
}
result = result | tmp;
}
if (result == FuzzingTarget::kNone) {
InvalidParam(param);
}
cli_params.fuzzing_target = result;
} else if (!strcmp(param, "-tint_help")) {
help = true;
} else {
recognized_parameter = false;
}
if (recognized_parameter) {
// Remove the recognized parameter from the list of all parameters by
// swapping it with the last one. This will suppress warnings in the
// libFuzzer about unrecognized parameters. By default, libFuzzer thinks
// that all user-defined parameters start with two dashes. However, we are
// forced to use a single one to make the fuzzer compatible with the
// ClusterFuzz.
std::swap(argv[i], argv[*argc - 1]);
*argc -= 1;
}
}
if (help) {
std::cout << kHelpMessage << std::endl;
exit(0);
}
return cli_params;
}
} // namespace regex_fuzzer
} // namespace fuzzers
} // namespace tint

View File

@ -0,0 +1,64 @@
// Copyright 2021 The Tint Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef FUZZERS_TINT_REGEX_FUZZER_CLI_H_
#define FUZZERS_TINT_REGEX_FUZZER_CLI_H_
#include <cstdint>
namespace tint {
namespace fuzzers {
namespace regex_fuzzer {
/// The backend this fuzzer will test.
enum class FuzzingTarget {
kNone = 0,
kHlsl = 1 << 0,
kMsl = 1 << 1,
kSpv = 1 << 2,
kWgsl = 1 << 3,
kAll = kHlsl | kMsl | kSpv | kWgsl
};
inline FuzzingTarget operator|(FuzzingTarget a, FuzzingTarget b) {
return static_cast<FuzzingTarget>(static_cast<int>(a) | static_cast<int>(b));
}
inline FuzzingTarget operator&(FuzzingTarget a, FuzzingTarget b) {
return static_cast<FuzzingTarget>(static_cast<int>(a) & static_cast<int>(b));
}
/// CLI parameters accepted by the fuzzer. Type -tint_help in the CLI to see the
/// help message
struct CliParams {
/// Compiler backends we want to fuzz.
FuzzingTarget fuzzing_target = FuzzingTarget::kAll;
};
/// @brief Parses CLI parameters.
///
/// This function will exit the process with non-zero return code if some
/// parameters are invalid. This function will remove recognized parameters from
/// `argv` and adjust `argc` accordingly.
///
/// @param argc - the total number of parameters.
/// @param argv - array of all CLI parameters.
/// @return parsed parameters.
CliParams ParseCliParams(int* argc, char** argv);
} // namespace regex_fuzzer
} // namespace fuzzers
} // namespace tint
#endif // FUZZERS_TINT_REGEX_FUZZER_CLI_H_

View File

@ -0,0 +1,85 @@
// Copyright 2021 The Tint Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <cstddef>
#include <cstdint>
#include "fuzzers/tint_common_fuzzer.h"
#include "fuzzers/tint_regex_fuzzer/cli.h"
#include "fuzzers/tint_regex_fuzzer/wgsl_mutator.h"
#include "src/reader/wgsl/parser.h"
#include "src/writer/wgsl/generator.h"
namespace tint {
namespace fuzzers {
namespace regex_fuzzer {
namespace {
CliParams cli_params{};
extern "C" int LLVMFuzzerInitialize(int* argc, char*** argv) {
// Parse CLI parameters. `ParseCliParams` will call `exit` if some parameter
// is invalid.
cli_params = ParseCliParams(argc, *argv);
return 0;
}
extern "C" size_t LLVMFuzzerCustomMutator(uint8_t* data,
size_t size,
size_t max_size,
unsigned seed) {
const std::vector<std::string> delimiters{";"};
std::mt19937 generator(seed);
std::uniform_int_distribution<size_t> distribution(0, delimiters.size() - 1);
size_t ind = distribution(generator);
return FuzzEnclosedRegions(size, max_size, delimiters[ind], data, &generator);
}
extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
if (size == 0) {
return 0;
}
struct Target {
FuzzingTarget fuzzing_target;
OutputFormat output_format;
const char* name;
};
Target targets[] = {{FuzzingTarget::kWgsl, OutputFormat::kWGSL, "WGSL"},
{FuzzingTarget::kHlsl, OutputFormat::kHLSL, "HLSL"},
{FuzzingTarget::kMsl, OutputFormat::kMSL, "MSL"},
{FuzzingTarget::kSpv, OutputFormat::kSpv, "SPV"}};
for (auto target : targets) {
if ((target.fuzzing_target & cli_params.fuzzing_target) !=
target.fuzzing_target) {
continue;
}
CommonFuzzer fuzzer(InputFormat::kWGSL, target.output_format);
fuzzer.EnableInspector();
fuzzer.Run(data, size);
}
return 0;
}
} // namespace
} // namespace regex_fuzzer
} // namespace fuzzers
} // namespace tint

View File

@ -0,0 +1,91 @@
// Copyright 2021 The Tint Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <string>
#include "gtest/gtest.h"
#include "fuzzers/tint_regex_fuzzer/wgsl_mutator.h"
namespace tint {
namespace fuzzers {
namespace regex_fuzzer {
namespace {
// Swaps two non-consecutive regions in the edge
TEST(SwapRegionsTest, SwapIntervalsEdgeNonConsecutive) {
std::string R1 = "|region1|", R2 = "; region2;",
R3 = "---------region3---------";
std::string all_regions = R1 + R2 + R3;
// this call should swap R1 with R3.
SwapIntervals(0, R1.length() - 1, R1.length() + R2.length(),
all_regions.length() - 1, &all_regions);
ASSERT_EQ(R3 + R2 + R1, all_regions);
}
// Swaps two non-consecutive regions not in the edge
TEST(SwapRegionsTest, SwapIntervalsNonConsecutiveNonEdge) {
std::string R1 = "|region1|", R2 = "; region2;",
R3 = "---------region3---------", R4 = "++region4++",
R5 = "***region5***";
std::string all_regions = R1 + R2 + R3 + R4 + R5;
// this call should swap R2 with R4.
SwapIntervals(R1.length(), R1.length() + R2.length() - 1,
R1.length() + R2.length() + R3.length(),
R1.length() + R2.length() + R3.length() + R4.length() - 1,
&all_regions);
ASSERT_EQ(R1 + R4 + R3 + R2 + R5, all_regions);
}
// Swaps two consecutive regions not in the edge (sorrounded by other regions)
TEST(SwapRegionsTest, SwapIntervalsConsecutiveEdge) {
std::string R1 = "|region1|", R2 = "; region2;", R3 = "++++region3++++",
R4 = "---------region4---------";
std::string all_regions = R1 + R2 + R3 + R4;
// this call should swap R2 with R3.
SwapIntervals(R1.length(), R1.length() + R2.length() - 1,
R1.length() + R2.length(),
R1.length() + R2.length() + R3.length() - 1, &all_regions);
ASSERT_EQ(R1 + R3 + R2 + R4, all_regions);
}
// Swaps two consecutive regions not in the edge (not sorrounded by other
// regions)
TEST(SwapRegionsTest, SwapIntervalsConsecutiveNonEdge) {
std::string R1 = "|region1|", R2 = "; region2;",
R3 = "---------region3---------", R4 = "++region4++",
R5 = "***region5***";
std::string all_regions = R1 + R2 + R3 + R4 + R5;
// this call should swap R4 with R5.
SwapIntervals(
R1.length() + R2.length() + R3.length(),
R1.length() + R2.length() + R3.length() + R4.length() - 1,
R1.length() + R2.length() + R3.length() + R4.length(),
R1.length() + R2.length() + R3.length() + R4.length() + R5.length() - 1,
&all_regions);
ASSERT_EQ(R1 + R2 + R3 + R5 + R4, all_regions);
}
} // namespace
} // namespace regex_fuzzer
} // namespace fuzzers
} // namespace tint

View File

@ -0,0 +1,107 @@
// Copyright 2021 The Tint Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "fuzzers/tint_regex_fuzzer/wgsl_mutator.h"
#include <cassert>
#include <cstring>
#include <iostream>
#include <map>
#include <random>
#include <regex>
#include <string>
#include <utility>
#include <vector>
namespace tint {
namespace fuzzers {
namespace regex_fuzzer {
namespace {
size_t GetRandomIntFromRange(size_t lower_bound,
size_t upper_bound,
std::mt19937* generator) {
std::uniform_int_distribution<size_t> dist(lower_bound, upper_bound);
return dist(*generator);
}
} // namespace
std::vector<size_t> FindDelimiterIndices(const std::string& delimiter,
const std::string& wgsl_code) {
std::vector<size_t> result;
for (size_t pos = wgsl_code.find(delimiter, 0); pos != std::string::npos;
pos = wgsl_code.find(delimiter, pos + 1)) {
result.push_back(pos);
}
return result;
}
void SwapIntervals(size_t idx1,
size_t idx2,
size_t idx3,
size_t idx4,
std::string* wgsl_code) {
std::string region_1 = wgsl_code->substr(idx1, idx2 - idx1 + 1);
std::string region_2 = wgsl_code->substr(idx3, idx4 - idx3 + 1);
// The second transformation is done first as it doesn't affect ind1 and ind2
wgsl_code->replace(idx3, region_2.size(), region_1);
wgsl_code->replace(idx1, region_1.size(), region_2);
}
size_t FuzzEnclosedRegions(size_t size,
size_t max_size,
const std::string& delimiter,
uint8_t* wgsl_code,
std::mt19937* generator) {
std::string init_program(wgsl_code, wgsl_code + size);
std::vector<size_t> delimiter_positions =
FindDelimiterIndices(delimiter, init_program);
// Need to have at least 3 indices
if (delimiter_positions.size() < 3) {
return 0;
}
// When generating the i-th random number, we should make sure that there are
// at least (3-i) numbers greater than this number.
size_t ind1 =
GetRandomIntFromRange(0, delimiter_positions.size() - 3U, generator);
size_t ind2 = GetRandomIntFromRange(
ind1 + 1U, delimiter_positions.size() - 2U, generator);
size_t ind3 =
GetRandomIntFromRange(ind2, delimiter_positions.size() - 2U, generator);
size_t ind4 = GetRandomIntFromRange(
ind3 + 1U, delimiter_positions.size() - 1U, generator);
SwapIntervals(delimiter_positions[ind1], delimiter_positions[ind2],
delimiter_positions[ind3], delimiter_positions[ind4],
&init_program);
if (init_program.size() > max_size) {
return 0;
}
memcpy(wgsl_code, init_program.c_str(), init_program.size());
return init_program.size();
}
} // namespace regex_fuzzer
} // namespace fuzzers
} // namespace tint

View File

@ -0,0 +1,68 @@
// Copyright 2021 The Tint Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef FUZZERS_TINT_REGEX_FUZZER_WGSL_MUTATOR_H_
#define FUZZERS_TINT_REGEX_FUZZER_WGSL_MUTATOR_H_
#include <random>
#include <string>
#include <utility>
#include <vector>
namespace tint {
namespace fuzzers {
namespace regex_fuzzer {
/// A function that given a delimiter, returns a vector that contains
/// all the positions of the delimiter in the WGSL code.
/// @param delimiter - the delimiter of the enclosed region.
/// @param wgsl_code - the initial string (WGSL code) that will be mutated.
/// @return a vector with the positions of the delimiter in the WGSL code.
std::vector<size_t> FindDelimiterIndices(const std::string& delimiter,
const std::string& wgsl_code);
/// Given 4 indices, idx1, idx2, idx3 and idx4 it swaps the regions
/// in the interval [idx1, idx2] with the region in the interval [idx3, idx4]
/// in wgsl_text.
/// @param idx1 - starting index of the first region.
/// @param idx2 - terminating index of the second region.
/// @param idx3 - starting index of the second region.
/// @param idx4 - terminating index of the second region.
/// @param wgsl_code - the string where the swap will occur.
void SwapIntervals(size_t idx1,
size_t idx2,
size_t idx3,
size_t idx4,
std::string* wgsl_code);
/// A function that, given an initial string (valid WGSL code) and a delimiter,
/// generates a new set of strings (valid or invalid WGSL code) by
/// picking two random regions and swapping them.
/// @param wgsl_code - the initial string (WGSL code) that will be mutated.
/// @param size - size of the string that will be mutated.
/// @param max_size - maximal allowed mutation size.
/// @param delimiter - the delimiter that will be used to find enclosed regions.
/// @param generator - the random number generator.
/// @return size of the mutated string.
size_t FuzzEnclosedRegions(size_t size,
size_t max_size,
const std::string& delimiter,
uint8_t* wgsl_code,
std::mt19937* generator);
} // namespace regex_fuzzer
} // namespace fuzzers
} // namespace tint
#endif // FUZZERS_TINT_REGEX_FUZZER_WGSL_MUTATOR_H_