Expand ASCII short circuit.

This CL expands the ASCII short circuit to add the number range and _
into IsXIDContinue.

IsXIDStart is updated to filter out anything less then the first
sequence after the (a-zA-Z) blocks as they won't be XIDStart but do cover
all of the common ASCII punctuation characters.

Change-Id: Ib839d9840f5a1ecc3d2e80774b11af2444e9f439
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/97071
Reviewed-by: Ben Clayton <bclayton@google.com>
Commit-Queue: Dan Sinclair <dsinclair@chromium.org>
Kokoro: Kokoro <noreply+kokoro@google.com>
This commit is contained in:
dan sinclair 2022-07-25 13:41:21 +00:00 committed by Dawn LUCI CQ
parent b0499e446f
commit 08482ec7ed
1 changed files with 13 additions and 2 deletions

View File

@ -306,15 +306,26 @@ constexpr size_t kNumXIDContinueRanges = sizeof(kXIDContinueRanges) / sizeof(kXI
} // namespace
bool CodePoint::IsXIDStart() const {
// Short circuit ascii. It will end up being at the end of the binary search
// but is our, currently, common case.
// Short circuit ASCII. The binary search will find these last, but most
// of our current source is ASCII, so handle them quicker.
if ((value >= 'a' && value <= 'z') || (value >= 'A' && value <= 'Z')) {
return true;
}
// With [a-zA-Z] handled, nothing less then the next sequence start can be
// XIDStart, so filter them all out. This catches most of the common symbols
// that are used in ASCII.
if (value < 0x000aa) {
return false;
}
return std::binary_search(kXIDStartRanges, kXIDStartRanges + kNumXIDStartRanges, *this);
}
bool CodePoint::IsXIDContinue() const {
// Short circuit ASCII. The binary search will find these last, but most
// of our current source is ASCII, so handle them quicker.
if ((value >= '0' && value <= '9') || value == '_') {
return true;
}
return IsXIDStart() || std::binary_search(kXIDContinueRanges,
kXIDContinueRanges + kNumXIDContinueRanges, *this);
}