Add vendored dependencies & cleanup script

This commit is contained in:
2022-02-11 14:01:25 -05:00
parent ea5ad06289
commit f55d064a0d
4315 changed files with 1296565 additions and 18 deletions

View File

@@ -0,0 +1,156 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Character Map Class
//
// A fast, bit-vector map for 8-bit unsigned characters.
// This class is useful for non-character purposes as well.
#ifndef ABSL_STRINGS_INTERNAL_CHAR_MAP_H_
#define ABSL_STRINGS_INTERNAL_CHAR_MAP_H_
#include <cstddef>
#include <cstdint>
#include <cstring>
#include "absl/base/macros.h"
#include "absl/base/port.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace strings_internal {
class Charmap {
public:
constexpr Charmap() : m_() {}
// Initializes with a given char*. Note that NUL is not treated as
// a terminator, but rather a char to be flicked.
Charmap(const char* str, int len) : m_() {
while (len--) SetChar(*str++);
}
// Initializes with a given char*. NUL is treated as a terminator
// and will not be in the charmap.
explicit Charmap(const char* str) : m_() {
while (*str) SetChar(*str++);
}
constexpr bool contains(unsigned char c) const {
return (m_[c / 64] >> (c % 64)) & 0x1;
}
// Returns true if and only if a character exists in both maps.
bool IntersectsWith(const Charmap& c) const {
for (size_t i = 0; i < ABSL_ARRAYSIZE(m_); ++i) {
if ((m_[i] & c.m_[i]) != 0) return true;
}
return false;
}
bool IsZero() const {
for (uint64_t c : m_) {
if (c != 0) return false;
}
return true;
}
// Containing only a single specified char.
static constexpr Charmap Char(char x) {
return Charmap(CharMaskForWord(x, 0), CharMaskForWord(x, 1),
CharMaskForWord(x, 2), CharMaskForWord(x, 3));
}
// Containing all the chars in the C-string 's'.
// Note that this is expensively recursive because of the C++11 constexpr
// formulation. Use only in constexpr initializers.
static constexpr Charmap FromString(const char* s) {
return *s == 0 ? Charmap() : (Char(*s) | FromString(s + 1));
}
// Containing all the chars in the closed interval [lo,hi].
static constexpr Charmap Range(char lo, char hi) {
return Charmap(RangeForWord(lo, hi, 0), RangeForWord(lo, hi, 1),
RangeForWord(lo, hi, 2), RangeForWord(lo, hi, 3));
}
friend constexpr Charmap operator&(const Charmap& a, const Charmap& b) {
return Charmap(a.m_[0] & b.m_[0], a.m_[1] & b.m_[1], a.m_[2] & b.m_[2],
a.m_[3] & b.m_[3]);
}
friend constexpr Charmap operator|(const Charmap& a, const Charmap& b) {
return Charmap(a.m_[0] | b.m_[0], a.m_[1] | b.m_[1], a.m_[2] | b.m_[2],
a.m_[3] | b.m_[3]);
}
friend constexpr Charmap operator~(const Charmap& a) {
return Charmap(~a.m_[0], ~a.m_[1], ~a.m_[2], ~a.m_[3]);
}
private:
constexpr Charmap(uint64_t b0, uint64_t b1, uint64_t b2, uint64_t b3)
: m_{b0, b1, b2, b3} {}
static constexpr uint64_t RangeForWord(unsigned char lo, unsigned char hi,
uint64_t word) {
return OpenRangeFromZeroForWord(hi + 1, word) &
~OpenRangeFromZeroForWord(lo, word);
}
// All the chars in the specified word of the range [0, upper).
static constexpr uint64_t OpenRangeFromZeroForWord(uint64_t upper,
uint64_t word) {
return (upper <= 64 * word)
? 0
: (upper >= 64 * (word + 1))
? ~static_cast<uint64_t>(0)
: (~static_cast<uint64_t>(0) >> (64 - upper % 64));
}
static constexpr uint64_t CharMaskForWord(unsigned char x, uint64_t word) {
return (x / 64 == word) ? (static_cast<uint64_t>(1) << (x % 64)) : 0;
}
private:
void SetChar(unsigned char c) {
m_[c / 64] |= static_cast<uint64_t>(1) << (c % 64);
}
uint64_t m_[4];
};
// Mirror the char-classifying predicates in <cctype>
constexpr Charmap UpperCharmap() { return Charmap::Range('A', 'Z'); }
constexpr Charmap LowerCharmap() { return Charmap::Range('a', 'z'); }
constexpr Charmap DigitCharmap() { return Charmap::Range('0', '9'); }
constexpr Charmap AlphaCharmap() { return LowerCharmap() | UpperCharmap(); }
constexpr Charmap AlnumCharmap() { return DigitCharmap() | AlphaCharmap(); }
constexpr Charmap XDigitCharmap() {
return DigitCharmap() | Charmap::Range('A', 'F') | Charmap::Range('a', 'f');
}
constexpr Charmap PrintCharmap() { return Charmap::Range(0x20, 0x7e); }
constexpr Charmap SpaceCharmap() { return Charmap::FromString("\t\n\v\f\r "); }
constexpr Charmap CntrlCharmap() {
return Charmap::Range(0, 0x7f) & ~PrintCharmap();
}
constexpr Charmap BlankCharmap() { return Charmap::FromString("\t "); }
constexpr Charmap GraphCharmap() { return PrintCharmap() & ~SpaceCharmap(); }
constexpr Charmap PunctCharmap() { return GraphCharmap() & ~AlnumCharmap(); }
} // namespace strings_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_CHAR_MAP_H_

View File

@@ -0,0 +1,61 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/char_map.h"
#include <cstdint>
#include "benchmark/benchmark.h"
namespace {
absl::strings_internal::Charmap MakeBenchmarkMap() {
absl::strings_internal::Charmap m;
uint32_t x[] = {0x0, 0x1, 0x2, 0x3, 0xf, 0xe, 0xd, 0xc};
for (uint32_t& t : x) t *= static_cast<uint32_t>(0x11111111UL);
for (uint32_t i = 0; i < 256; ++i) {
if ((x[i / 32] >> (i % 32)) & 1)
m = m | absl::strings_internal::Charmap::Char(i);
}
return m;
}
// Micro-benchmark for Charmap::contains.
void BM_Contains(benchmark::State& state) {
// Loop-body replicated 10 times to increase time per iteration.
// Argument continuously changed to avoid generating common subexpressions.
const absl::strings_internal::Charmap benchmark_map = MakeBenchmarkMap();
unsigned char c = 0;
int ops = 0;
for (auto _ : state) {
ops += benchmark_map.contains(c++);
ops += benchmark_map.contains(c++);
ops += benchmark_map.contains(c++);
ops += benchmark_map.contains(c++);
ops += benchmark_map.contains(c++);
ops += benchmark_map.contains(c++);
ops += benchmark_map.contains(c++);
ops += benchmark_map.contains(c++);
ops += benchmark_map.contains(c++);
ops += benchmark_map.contains(c++);
}
benchmark::DoNotOptimize(ops);
}
BENCHMARK(BM_Contains);
// We don't bother benchmarking Charmap::IsZero or Charmap::IntersectsWith;
// their running time is data-dependent and it is not worth characterizing
// "typical" data.
} // namespace

View File

@@ -0,0 +1,172 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/char_map.h"
#include <cctype>
#include <string>
#include <vector>
#include "gmock/gmock.h"
#include "gtest/gtest.h"
namespace {
constexpr absl::strings_internal::Charmap everything_map =
~absl::strings_internal::Charmap();
constexpr absl::strings_internal::Charmap nothing_map{};
TEST(Charmap, AllTests) {
const absl::strings_internal::Charmap also_nothing_map("", 0);
ASSERT_TRUE(everything_map.contains('\0'));
ASSERT_TRUE(!nothing_map.contains('\0'));
ASSERT_TRUE(!also_nothing_map.contains('\0'));
for (unsigned char ch = 1; ch != 0; ++ch) {
ASSERT_TRUE(everything_map.contains(ch));
ASSERT_TRUE(!nothing_map.contains(ch));
ASSERT_TRUE(!also_nothing_map.contains(ch));
}
const absl::strings_internal::Charmap symbols("&@#@^!@?", 5);
ASSERT_TRUE(symbols.contains('&'));
ASSERT_TRUE(symbols.contains('@'));
ASSERT_TRUE(symbols.contains('#'));
ASSERT_TRUE(symbols.contains('^'));
ASSERT_TRUE(!symbols.contains('!'));
ASSERT_TRUE(!symbols.contains('?'));
int cnt = 0;
for (unsigned char ch = 1; ch != 0; ++ch)
cnt += symbols.contains(ch);
ASSERT_EQ(cnt, 4);
const absl::strings_internal::Charmap lets("^abcde", 3);
const absl::strings_internal::Charmap lets2("fghij\0klmnop", 10);
const absl::strings_internal::Charmap lets3("fghij\0klmnop");
ASSERT_TRUE(lets2.contains('k'));
ASSERT_TRUE(!lets3.contains('k'));
ASSERT_TRUE(symbols.IntersectsWith(lets));
ASSERT_TRUE(!lets2.IntersectsWith(lets));
ASSERT_TRUE(lets.IntersectsWith(symbols));
ASSERT_TRUE(!lets.IntersectsWith(lets2));
ASSERT_TRUE(nothing_map.IsZero());
ASSERT_TRUE(!lets.IsZero());
}
namespace {
std::string Members(const absl::strings_internal::Charmap& m) {
std::string r;
for (size_t i = 0; i < 256; ++i)
if (m.contains(i)) r.push_back(i);
return r;
}
std::string ClosedRangeString(unsigned char lo, unsigned char hi) {
// Don't depend on lo<hi. Just increment until lo==hi.
std::string s;
while (true) {
s.push_back(lo);
if (lo == hi) break;
++lo;
}
return s;
}
} // namespace
TEST(Charmap, Constexpr) {
constexpr absl::strings_internal::Charmap kEmpty = nothing_map;
EXPECT_THAT(Members(kEmpty), "");
constexpr absl::strings_internal::Charmap kA =
absl::strings_internal::Charmap::Char('A');
EXPECT_THAT(Members(kA), "A");
constexpr absl::strings_internal::Charmap kAZ =
absl::strings_internal::Charmap::Range('A', 'Z');
EXPECT_THAT(Members(kAZ), "ABCDEFGHIJKLMNOPQRSTUVWXYZ");
constexpr absl::strings_internal::Charmap kIdentifier =
absl::strings_internal::Charmap::Range('0', '9') |
absl::strings_internal::Charmap::Range('A', 'Z') |
absl::strings_internal::Charmap::Range('a', 'z') |
absl::strings_internal::Charmap::Char('_');
EXPECT_THAT(Members(kIdentifier),
"0123456789"
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
"_"
"abcdefghijklmnopqrstuvwxyz");
constexpr absl::strings_internal::Charmap kAll = everything_map;
for (size_t i = 0; i < 256; ++i) {
EXPECT_TRUE(kAll.contains(i)) << i;
}
constexpr absl::strings_internal::Charmap kHello =
absl::strings_internal::Charmap::FromString("Hello, world!");
EXPECT_THAT(Members(kHello), " !,Hdelorw");
// test negation and intersection
constexpr absl::strings_internal::Charmap kABC =
absl::strings_internal::Charmap::Range('A', 'Z') &
~absl::strings_internal::Charmap::Range('D', 'Z');
EXPECT_THAT(Members(kABC), "ABC");
}
TEST(Charmap, Range) {
// Exhaustive testing takes too long, so test some of the boundaries that
// are perhaps going to cause trouble.
std::vector<size_t> poi = {0, 1, 2, 3, 4, 7, 8, 9, 15,
16, 17, 30, 31, 32, 33, 63, 64, 65,
127, 128, 129, 223, 224, 225, 254, 255};
for (auto lo = poi.begin(); lo != poi.end(); ++lo) {
SCOPED_TRACE(*lo);
for (auto hi = lo; hi != poi.end(); ++hi) {
SCOPED_TRACE(*hi);
EXPECT_THAT(Members(absl::strings_internal::Charmap::Range(*lo, *hi)),
ClosedRangeString(*lo, *hi));
}
}
}
bool AsBool(int x) { return static_cast<bool>(x); }
TEST(CharmapCtype, Match) {
for (int c = 0; c < 256; ++c) {
SCOPED_TRACE(c);
SCOPED_TRACE(static_cast<char>(c));
EXPECT_EQ(AsBool(std::isupper(c)),
absl::strings_internal::UpperCharmap().contains(c));
EXPECT_EQ(AsBool(std::islower(c)),
absl::strings_internal::LowerCharmap().contains(c));
EXPECT_EQ(AsBool(std::isdigit(c)),
absl::strings_internal::DigitCharmap().contains(c));
EXPECT_EQ(AsBool(std::isalpha(c)),
absl::strings_internal::AlphaCharmap().contains(c));
EXPECT_EQ(AsBool(std::isalnum(c)),
absl::strings_internal::AlnumCharmap().contains(c));
EXPECT_EQ(AsBool(std::isxdigit(c)),
absl::strings_internal::XDigitCharmap().contains(c));
EXPECT_EQ(AsBool(std::isprint(c)),
absl::strings_internal::PrintCharmap().contains(c));
EXPECT_EQ(AsBool(std::isspace(c)),
absl::strings_internal::SpaceCharmap().contains(c));
EXPECT_EQ(AsBool(std::iscntrl(c)),
absl::strings_internal::CntrlCharmap().contains(c));
EXPECT_EQ(AsBool(std::isblank(c)),
absl::strings_internal::BlankCharmap().contains(c));
EXPECT_EQ(AsBool(std::isgraph(c)),
absl::strings_internal::GraphCharmap().contains(c));
EXPECT_EQ(AsBool(std::ispunct(c)),
absl::strings_internal::PunctCharmap().contains(c));
}
}
} // namespace

View File

@@ -0,0 +1,359 @@
// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/charconv_bigint.h"
#include <algorithm>
#include <cassert>
#include <string>
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace strings_internal {
namespace {
// Table containing some large powers of 5, for fast computation.
// Constant step size for entries in the kLargePowersOfFive table. Each entry
// is larger than the previous entry by a factor of 5**kLargePowerOfFiveStep
// (or 5**27).
//
// In other words, the Nth entry in the table is 5**(27*N).
//
// 5**27 is the largest power of 5 that fits in 64 bits.
constexpr int kLargePowerOfFiveStep = 27;
// The largest legal index into the kLargePowersOfFive table.
//
// In other words, the largest precomputed power of 5 is 5**(27*20).
constexpr int kLargestPowerOfFiveIndex = 20;
// Table of powers of (5**27), up to (5**27)**20 == 5**540.
//
// Used to generate large powers of 5 while limiting the number of repeated
// multiplications required.
//
// clang-format off
const uint32_t kLargePowersOfFive[] = {
// 5**27 (i=1), start=0, end=2
0xfa10079dU, 0x6765c793U,
// 5**54 (i=2), start=2, end=6
0x97d9f649U, 0x6664242dU, 0x29939b14U, 0x29c30f10U,
// 5**81 (i=3), start=6, end=12
0xc4f809c5U, 0x7bf3f22aU, 0x67bdae34U, 0xad340517U, 0x369d1b5fU, 0x10de1593U,
// 5**108 (i=4), start=12, end=20
0x92b260d1U, 0x9efff7c7U, 0x81de0ec6U, 0xaeba5d56U, 0x410664a4U, 0x4f40737aU,
0x20d3846fU, 0x06d00f73U,
// 5**135 (i=5), start=20, end=30
0xff1b172dU, 0x13a1d71cU, 0xefa07617U, 0x7f682d3dU, 0xff8c90c0U, 0x3f0131e7U,
0x3fdcb9feU, 0x917b0177U, 0x16c407a7U, 0x02c06b9dU,
// 5**162 (i=6), start=30, end=42
0x960f7199U, 0x056667ecU, 0xe07aefd8U, 0x80f2b9ccU, 0x8273f5e3U, 0xeb9a214aU,
0x40b38005U, 0x0e477ad4U, 0x277d08e6U, 0xfa28b11eU, 0xd3f7d784U, 0x011c835bU,
// 5**189 (i=7), start=42, end=56
0xf723d9d5U, 0x3282d3f3U, 0xe00857d1U, 0x69659d25U, 0x2cf117cfU, 0x24da6d07U,
0x954d1417U, 0x3e5d8cedU, 0x7a8bb766U, 0xfd785ae6U, 0x645436d2U, 0x40c78b34U,
0x94151217U, 0x0072e9f7U,
// 5**216 (i=8), start=56, end=72
0x2b416aa1U, 0x7893c5a7U, 0xe37dc6d4U, 0x2bad2beaU, 0xf0fc846cU, 0x7575ae4bU,
0x62587b14U, 0x83b67a34U, 0x02110cdbU, 0xf7992f55U, 0x00deb022U, 0xa4a23becU,
0x8af5c5cdU, 0xb85b654fU, 0x818df38bU, 0x002e69d2U,
// 5**243 (i=9), start=72, end=90
0x3518cbbdU, 0x20b0c15fU, 0x38756c2fU, 0xfb5dc3ddU, 0x22ad2d94U, 0xbf35a952U,
0xa699192aU, 0x9a613326U, 0xad2a9cedU, 0xd7f48968U, 0xe87dfb54U, 0xc8f05db6U,
0x5ef67531U, 0x31c1ab49U, 0xe202ac9fU, 0x9b2957b5U, 0xa143f6d3U, 0x0012bf07U,
// 5**270 (i=10), start=90, end=110
0x8b971de9U, 0x21aba2e1U, 0x63944362U, 0x57172336U, 0xd9544225U, 0xfb534166U,
0x08c563eeU, 0x14640ee2U, 0x24e40d31U, 0x02b06537U, 0x03887f14U, 0x0285e533U,
0xb744ef26U, 0x8be3a6c4U, 0x266979b4U, 0x6761ece2U, 0xd9cb39e4U, 0xe67de319U,
0x0d39e796U, 0x00079250U,
// 5**297 (i=11), start=110, end=132
0x260eb6e5U, 0xf414a796U, 0xee1a7491U, 0xdb9368ebU, 0xf50c105bU, 0x59157750U,
0x9ed2fb5cU, 0xf6e56d8bU, 0xeaee8d23U, 0x0f319f75U, 0x2aa134d6U, 0xac2908e9U,
0xd4413298U, 0x02f02a55U, 0x989d5a7aU, 0x70dde184U, 0xba8040a7U, 0x03200981U,
0xbe03b11cU, 0x3c1c2a18U, 0xd60427a1U, 0x00030ee0U,
// 5**324 (i=12), start=132, end=156
0xce566d71U, 0xf1c4aa25U, 0x4e93ca53U, 0xa72283d0U, 0x551a73eaU, 0x3d0538e2U,
0x8da4303fU, 0x6a58de60U, 0x0e660221U, 0x49cf61a6U, 0x8d058fc1U, 0xb9d1a14cU,
0x4bab157dU, 0xc85c6932U, 0x518c8b9eU, 0x9b92b8d0U, 0x0d8a0e21U, 0xbd855df9U,
0xb3ea59a1U, 0x8da29289U, 0x4584d506U, 0x3752d80fU, 0xb72569c6U, 0x00013c33U,
// 5**351 (i=13), start=156, end=182
0x190f354dU, 0x83695cfeU, 0xe5a4d0c7U, 0xb60fb7e8U, 0xee5bbcc4U, 0xb922054cU,
0xbb4f0d85U, 0x48394028U, 0x1d8957dbU, 0x0d7edb14U, 0x4ecc7587U, 0x505e9e02U,
0x4c87f36bU, 0x99e66bd6U, 0x44b9ed35U, 0x753037d4U, 0xe5fe5f27U, 0x2742c203U,
0x13b2ed2bU, 0xdc525d2cU, 0xe6fde59aU, 0x77ffb18fU, 0x13c5752cU, 0x08a84bccU,
0x859a4940U, 0x00007fb6U,
// 5**378 (i=14), start=182, end=210
0x4f98cb39U, 0xa60edbbcU, 0x83b5872eU, 0xa501acffU, 0x9cc76f78U, 0xbadd4c73U,
0x43e989faU, 0xca7acf80U, 0x2e0c824fU, 0xb19f4ffcU, 0x092fd81cU, 0xe4eb645bU,
0xa1ff84c2U, 0x8a5a83baU, 0xa8a1fae9U, 0x1db43609U, 0xb0fed50bU, 0x0dd7d2bdU,
0x7d7accd8U, 0x91fa640fU, 0x37dcc6c5U, 0x1c417fd5U, 0xe4d462adU, 0xe8a43399U,
0x131bf9a5U, 0x8df54d29U, 0x36547dc1U, 0x00003395U,
// 5**405 (i=15), start=210, end=240
0x5bd330f5U, 0x77d21967U, 0x1ac481b7U, 0x6be2f7ceU, 0x7f4792a9U, 0xe84c2c52U,
0x84592228U, 0x9dcaf829U, 0xdab44ce1U, 0x3d0c311bU, 0x532e297dU, 0x4704e8b4U,
0x9cdc32beU, 0x41e64d9dU, 0x7717bea1U, 0xa824c00dU, 0x08f50b27U, 0x0f198d77U,
0x49bbfdf0U, 0x025c6c69U, 0xd4e55cd3U, 0xf083602bU, 0xb9f0fecdU, 0xc0864aeaU,
0x9cb98681U, 0xaaf620e9U, 0xacb6df30U, 0x4faafe66U, 0x8af13c3bU, 0x000014d5U,
// 5**432 (i=16), start=240, end=272
0x682bb941U, 0x89a9f297U, 0xcba75d7bU, 0x404217b1U, 0xb4e519e9U, 0xa1bc162bU,
0xf7f5910aU, 0x98715af5U, 0x2ff53e57U, 0xe3ef118cU, 0x490c4543U, 0xbc9b1734U,
0x2affbe4dU, 0x4cedcb4cU, 0xfb14e99eU, 0x35e34212U, 0xece39c24U, 0x07673ab3U,
0xe73115ddU, 0xd15d38e7U, 0x093eed3bU, 0xf8e7eac5U, 0x78a8cc80U, 0x25227aacU,
0x3f590551U, 0x413da1cbU, 0xdf643a55U, 0xab65ad44U, 0xd70b23d7U, 0xc672cd76U,
0x3364ea62U, 0x0000086aU,
// 5**459 (i=17), start=272, end=306
0x22f163ddU, 0x23cf07acU, 0xbe2af6c2U, 0xf412f6f6U, 0xc3ff541eU, 0x6eeaf7deU,
0xa47047e0U, 0x408cda92U, 0x0f0eeb08U, 0x56deba9dU, 0xcfc6b090U, 0x8bbbdf04U,
0x3933cdb3U, 0x9e7bb67dU, 0x9f297035U, 0x38946244U, 0xee1d37bbU, 0xde898174U,
0x63f3559dU, 0x705b72fbU, 0x138d27d9U, 0xf8603a78U, 0x735eec44U, 0xe30987d5U,
0xc6d38070U, 0x9cfe548eU, 0x9ff01422U, 0x7c564aa8U, 0x91cc60baU, 0xcbc3565dU,
0x7550a50bU, 0x6909aeadU, 0x13234c45U, 0x00000366U,
// 5**486 (i=18), start=306, end=342
0x17954989U, 0x3a7d7709U, 0x98042de5U, 0xa9011443U, 0x45e723c2U, 0x269ffd6fU,
0x58852a46U, 0xaaa1042aU, 0x2eee8153U, 0xb2b6c39eU, 0xaf845b65U, 0xf6c365d7U,
0xe4cffb2bU, 0xc840e90cU, 0xabea8abbU, 0x5c58f8d2U, 0x5c19fa3aU, 0x4670910aU,
0x4449f21cU, 0xefa645b3U, 0xcc427decU, 0x083c3d73U, 0x467cb413U, 0x6fe10ae4U,
0x3caffc72U, 0x9f8da55eU, 0x5e5c8ea7U, 0x490594bbU, 0xf0871b0bU, 0xdd89816cU,
0x8e931df8U, 0xe85ce1c9U, 0xcca090a5U, 0x575fa16bU, 0x6b9f106cU, 0x0000015fU,
// 5**513 (i=19), start=342, end=380
0xee20d805U, 0x57bc3c07U, 0xcdea624eU, 0xd3f0f52dU, 0x9924b4f4U, 0xcf968640U,
0x61d41962U, 0xe87fb464U, 0xeaaf51c7U, 0x564c8b60U, 0xccda4028U, 0x529428bbU,
0x313a1fa8U, 0x96bd0f94U, 0x7a82ebaaU, 0xad99e7e9U, 0xf2668cd4U, 0xbe33a45eU,
0xfd0db669U, 0x87ee369fU, 0xd3ec20edU, 0x9c4d7db7U, 0xdedcf0d8U, 0x7cd2ca64U,
0xe25a6577U, 0x61003fd4U, 0xe56f54ccU, 0x10b7c748U, 0x40526e5eU, 0x7300ae87U,
0x5c439261U, 0x2c0ff469U, 0xbf723f12U, 0xb2379b61U, 0xbf59b4f5U, 0xc91b1c3fU,
0xf0046d27U, 0x0000008dU,
// 5**540 (i=20), start=380, end=420
0x525c9e11U, 0xf4e0eb41U, 0xebb2895dU, 0x5da512f9U, 0x7d9b29d4U, 0x452f4edcU,
0x0b90bc37U, 0x341777cbU, 0x63d269afU, 0x1da77929U, 0x0a5c1826U, 0x77991898U,
0x5aeddf86U, 0xf853a877U, 0x538c31ccU, 0xe84896daU, 0xb7a0010bU, 0x17ef4de5U,
0xa52a2adeU, 0x029fd81cU, 0x987ce701U, 0x27fefd77U, 0xdb46c66fU, 0x5d301900U,
0x496998c0U, 0xbb6598b9U, 0x5eebb607U, 0xe547354aU, 0xdf4a2f7eU, 0xf06c4955U,
0x96242ffaU, 0x1775fb27U, 0xbecc58ceU, 0xebf2a53bU, 0x3eaad82aU, 0xf41137baU,
0x573e6fbaU, 0xfb4866b8U, 0x54002148U, 0x00000039U,
};
// clang-format on
// Returns a pointer to the big integer data for (5**27)**i. i must be
// between 1 and 20, inclusive.
const uint32_t* LargePowerOfFiveData(int i) {
return kLargePowersOfFive + i * (i - 1);
}
// Returns the size of the big integer data for (5**27)**i, in words. i must be
// between 1 and 20, inclusive.
int LargePowerOfFiveSize(int i) { return 2 * i; }
} // namespace
ABSL_DLL const uint32_t kFiveToNth[14] = {
1, 5, 25, 125, 625, 3125, 15625,
78125, 390625, 1953125, 9765625, 48828125, 244140625, 1220703125,
};
ABSL_DLL const uint32_t kTenToNth[10] = {
1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000,
};
template <int max_words>
int BigUnsigned<max_words>::ReadFloatMantissa(const ParsedFloat& fp,
int significant_digits) {
SetToZero();
assert(fp.type == FloatType::kNumber);
if (fp.subrange_begin == nullptr) {
// We already exactly parsed the mantissa, so no more work is necessary.
words_[0] = fp.mantissa & 0xffffffffu;
words_[1] = fp.mantissa >> 32;
if (words_[1]) {
size_ = 2;
} else if (words_[0]) {
size_ = 1;
}
return fp.exponent;
}
int exponent_adjust =
ReadDigits(fp.subrange_begin, fp.subrange_end, significant_digits);
return fp.literal_exponent + exponent_adjust;
}
template <int max_words>
int BigUnsigned<max_words>::ReadDigits(const char* begin, const char* end,
int significant_digits) {
assert(significant_digits <= Digits10() + 1);
SetToZero();
bool after_decimal_point = false;
// Discard any leading zeroes before the decimal point
while (begin < end && *begin == '0') {
++begin;
}
int dropped_digits = 0;
// Discard any trailing zeroes. These may or may not be after the decimal
// point.
while (begin < end && *std::prev(end) == '0') {
--end;
++dropped_digits;
}
if (begin < end && *std::prev(end) == '.') {
// If the string ends in '.', either before or after dropping zeroes, then
// drop the decimal point and look for more digits to drop.
dropped_digits = 0;
--end;
while (begin < end && *std::prev(end) == '0') {
--end;
++dropped_digits;
}
} else if (dropped_digits) {
// We dropped digits, and aren't sure if they're before or after the decimal
// point. Figure that out now.
const char* dp = std::find(begin, end, '.');
if (dp != end) {
// The dropped trailing digits were after the decimal point, so don't
// count them.
dropped_digits = 0;
}
}
// Any non-fraction digits we dropped need to be accounted for in our exponent
// adjustment.
int exponent_adjust = dropped_digits;
uint32_t queued = 0;
int digits_queued = 0;
for (; begin != end && significant_digits > 0; ++begin) {
if (*begin == '.') {
after_decimal_point = true;
continue;
}
if (after_decimal_point) {
// For each fractional digit we emit in our parsed integer, adjust our
// decimal exponent to compensate.
--exponent_adjust;
}
int digit = (*begin - '0');
--significant_digits;
if (significant_digits == 0 && std::next(begin) != end &&
(digit == 0 || digit == 5)) {
// If this is the very last significant digit, but insignificant digits
// remain, we know that the last of those remaining significant digits is
// nonzero. (If it wasn't, we would have stripped it before we got here.)
// So if this final digit is a 0 or 5, adjust it upward by 1.
//
// This adjustment is what allows incredibly large mantissas ending in
// 500000...000000000001 to correctly round up, rather than to nearest.
++digit;
}
queued = 10 * queued + digit;
++digits_queued;
if (digits_queued == kMaxSmallPowerOfTen) {
MultiplyBy(kTenToNth[kMaxSmallPowerOfTen]);
AddWithCarry(0, queued);
queued = digits_queued = 0;
}
}
// Encode any remaining digits.
if (digits_queued) {
MultiplyBy(kTenToNth[digits_queued]);
AddWithCarry(0, queued);
}
// If any insignificant digits remain, we will drop them. But if we have not
// yet read the decimal point, then we have to adjust the exponent to account
// for the dropped digits.
if (begin < end && !after_decimal_point) {
// This call to std::find will result in a pointer either to the decimal
// point, or to the end of our buffer if there was none.
//
// Either way, [begin, decimal_point) will contain the set of dropped digits
// that require an exponent adjustment.
const char* decimal_point = std::find(begin, end, '.');
exponent_adjust += (decimal_point - begin);
}
return exponent_adjust;
}
template <int max_words>
/* static */ BigUnsigned<max_words> BigUnsigned<max_words>::FiveToTheNth(
int n) {
BigUnsigned answer(1u);
// Seed from the table of large powers, if possible.
bool first_pass = true;
while (n >= kLargePowerOfFiveStep) {
int big_power =
std::min(n / kLargePowerOfFiveStep, kLargestPowerOfFiveIndex);
if (first_pass) {
// just copy, rather than multiplying by 1
std::copy(
LargePowerOfFiveData(big_power),
LargePowerOfFiveData(big_power) + LargePowerOfFiveSize(big_power),
answer.words_);
answer.size_ = LargePowerOfFiveSize(big_power);
first_pass = false;
} else {
answer.MultiplyBy(LargePowerOfFiveSize(big_power),
LargePowerOfFiveData(big_power));
}
n -= kLargePowerOfFiveStep * big_power;
}
answer.MultiplyByFiveToTheNth(n);
return answer;
}
template <int max_words>
void BigUnsigned<max_words>::MultiplyStep(int original_size,
const uint32_t* other_words,
int other_size, int step) {
int this_i = std::min(original_size - 1, step);
int other_i = step - this_i;
uint64_t this_word = 0;
uint64_t carry = 0;
for (; this_i >= 0 && other_i < other_size; --this_i, ++other_i) {
uint64_t product = words_[this_i];
product *= other_words[other_i];
this_word += product;
carry += (this_word >> 32);
this_word &= 0xffffffff;
}
AddWithCarry(step + 1, carry);
words_[step] = this_word & 0xffffffff;
if (this_word > 0 && size_ <= step) {
size_ = step + 1;
}
}
template <int max_words>
std::string BigUnsigned<max_words>::ToString() const {
BigUnsigned<max_words> copy = *this;
std::string result;
// Build result in reverse order
while (copy.size() > 0) {
int next_digit = copy.DivMod<10>();
result.push_back('0' + next_digit);
}
if (result.empty()) {
result.push_back('0');
}
std::reverse(result.begin(), result.end());
return result;
}
template class BigUnsigned<4>;
template class BigUnsigned<84>;
} // namespace strings_internal
ABSL_NAMESPACE_END
} // namespace absl

View File

@@ -0,0 +1,423 @@
// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef ABSL_STRINGS_INTERNAL_CHARCONV_BIGINT_H_
#define ABSL_STRINGS_INTERNAL_CHARCONV_BIGINT_H_
#include <algorithm>
#include <cstdint>
#include <iostream>
#include <string>
#include "absl/base/config.h"
#include "absl/strings/ascii.h"
#include "absl/strings/internal/charconv_parse.h"
#include "absl/strings/string_view.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace strings_internal {
// The largest power that 5 that can be raised to, and still fit in a uint32_t.
constexpr int kMaxSmallPowerOfFive = 13;
// The largest power that 10 that can be raised to, and still fit in a uint32_t.
constexpr int kMaxSmallPowerOfTen = 9;
ABSL_DLL extern const uint32_t
kFiveToNth[kMaxSmallPowerOfFive + 1];
ABSL_DLL extern const uint32_t kTenToNth[kMaxSmallPowerOfTen + 1];
// Large, fixed-width unsigned integer.
//
// Exact rounding for decimal-to-binary floating point conversion requires very
// large integer math, but a design goal of absl::from_chars is to avoid
// allocating memory. The integer precision needed for decimal-to-binary
// conversions is large but bounded, so a huge fixed-width integer class
// suffices.
//
// This is an intentionally limited big integer class. Only needed operations
// are implemented. All storage lives in an array data member, and all
// arithmetic is done in-place, to avoid requiring separate storage for operand
// and result.
//
// This is an internal class. Some methods live in the .cc file, and are
// instantiated only for the values of max_words we need.
template <int max_words>
class BigUnsigned {
public:
static_assert(max_words == 4 || max_words == 84,
"unsupported max_words value");
BigUnsigned() : size_(0), words_{} {}
explicit constexpr BigUnsigned(uint64_t v)
: size_((v >> 32) ? 2 : v ? 1 : 0),
words_{static_cast<uint32_t>(v & 0xffffffffu),
static_cast<uint32_t>(v >> 32)} {}
// Constructs a BigUnsigned from the given string_view containing a decimal
// value. If the input string is not a decimal integer, constructs a 0
// instead.
explicit BigUnsigned(absl::string_view sv) : size_(0), words_{} {
// Check for valid input, returning a 0 otherwise. This is reasonable
// behavior only because this constructor is for unit tests.
if (std::find_if_not(sv.begin(), sv.end(), ascii_isdigit) != sv.end() ||
sv.empty()) {
return;
}
int exponent_adjust =
ReadDigits(sv.data(), sv.data() + sv.size(), Digits10() + 1);
if (exponent_adjust > 0) {
MultiplyByTenToTheNth(exponent_adjust);
}
}
// Loads the mantissa value of a previously-parsed float.
//
// Returns the associated decimal exponent. The value of the parsed float is
// exactly *this * 10**exponent.
int ReadFloatMantissa(const ParsedFloat& fp, int significant_digits);
// Returns the number of decimal digits of precision this type provides. All
// numbers with this many decimal digits or fewer are representable by this
// type.
//
// Analagous to std::numeric_limits<BigUnsigned>::digits10.
static constexpr int Digits10() {
// 9975007/1035508 is very slightly less than log10(2**32).
return static_cast<uint64_t>(max_words) * 9975007 / 1035508;
}
// Shifts left by the given number of bits.
void ShiftLeft(int count) {
if (count > 0) {
const int word_shift = count / 32;
if (word_shift >= max_words) {
SetToZero();
return;
}
size_ = (std::min)(size_ + word_shift, max_words);
count %= 32;
if (count == 0) {
std::copy_backward(words_, words_ + size_ - word_shift, words_ + size_);
} else {
for (int i = (std::min)(size_, max_words - 1); i > word_shift; --i) {
words_[i] = (words_[i - word_shift] << count) |
(words_[i - word_shift - 1] >> (32 - count));
}
words_[word_shift] = words_[0] << count;
// Grow size_ if necessary.
if (size_ < max_words && words_[size_]) {
++size_;
}
}
std::fill(words_, words_ + word_shift, 0u);
}
}
// Multiplies by v in-place.
void MultiplyBy(uint32_t v) {
if (size_ == 0 || v == 1) {
return;
}
if (v == 0) {
SetToZero();
return;
}
const uint64_t factor = v;
uint64_t window = 0;
for (int i = 0; i < size_; ++i) {
window += factor * words_[i];
words_[i] = window & 0xffffffff;
window >>= 32;
}
// If carry bits remain and there's space for them, grow size_.
if (window && size_ < max_words) {
words_[size_] = window & 0xffffffff;
++size_;
}
}
void MultiplyBy(uint64_t v) {
uint32_t words[2];
words[0] = static_cast<uint32_t>(v);
words[1] = static_cast<uint32_t>(v >> 32);
if (words[1] == 0) {
MultiplyBy(words[0]);
} else {
MultiplyBy(2, words);
}
}
// Multiplies in place by 5 to the power of n. n must be non-negative.
void MultiplyByFiveToTheNth(int n) {
while (n >= kMaxSmallPowerOfFive) {
MultiplyBy(kFiveToNth[kMaxSmallPowerOfFive]);
n -= kMaxSmallPowerOfFive;
}
if (n > 0) {
MultiplyBy(kFiveToNth[n]);
}
}
// Multiplies in place by 10 to the power of n. n must be non-negative.
void MultiplyByTenToTheNth(int n) {
if (n > kMaxSmallPowerOfTen) {
// For large n, raise to a power of 5, then shift left by the same amount.
// (10**n == 5**n * 2**n.) This requires fewer multiplications overall.
MultiplyByFiveToTheNth(n);
ShiftLeft(n);
} else if (n > 0) {
// We can do this more quickly for very small N by using a single
// multiplication.
MultiplyBy(kTenToNth[n]);
}
}
// Returns the value of 5**n, for non-negative n. This implementation uses
// a lookup table, and is faster then seeding a BigUnsigned with 1 and calling
// MultiplyByFiveToTheNth().
static BigUnsigned FiveToTheNth(int n);
// Multiplies by another BigUnsigned, in-place.
template <int M>
void MultiplyBy(const BigUnsigned<M>& other) {
MultiplyBy(other.size(), other.words());
}
void SetToZero() {
std::fill(words_, words_ + size_, 0u);
size_ = 0;
}
// Returns the value of the nth word of this BigUnsigned. This is
// range-checked, and returns 0 on out-of-bounds accesses.
uint32_t GetWord(int index) const {
if (index < 0 || index >= size_) {
return 0;
}
return words_[index];
}
// Returns this integer as a decimal string. This is not used in the decimal-
// to-binary conversion; it is intended to aid in testing.
std::string ToString() const;
int size() const { return size_; }
const uint32_t* words() const { return words_; }
private:
// Reads the number between [begin, end), possibly containing a decimal point,
// into this BigUnsigned.
//
// Callers are required to ensure [begin, end) contains a valid number, with
// one or more decimal digits and at most one decimal point. This routine
// will behave unpredictably if these preconditions are not met.
//
// Only the first `significant_digits` digits are read. Digits beyond this
// limit are "sticky": If the final significant digit is 0 or 5, and if any
// dropped digit is nonzero, then that final significant digit is adjusted up
// to 1 or 6. This adjustment allows for precise rounding.
//
// Returns `exponent_adjustment`, a power-of-ten exponent adjustment to
// account for the decimal point and for dropped significant digits. After
// this function returns,
// actual_value_of_parsed_string ~= *this * 10**exponent_adjustment.
int ReadDigits(const char* begin, const char* end, int significant_digits);
// Performs a step of big integer multiplication. This computes the full
// (64-bit-wide) values that should be added at the given index (step), and
// adds to that location in-place.
//
// Because our math all occurs in place, we must multiply starting from the
// highest word working downward. (This is a bit more expensive due to the
// extra carries involved.)
//
// This must be called in steps, for each word to be calculated, starting from
// the high end and working down to 0. The first value of `step` should be
// `std::min(original_size + other.size_ - 2, max_words - 1)`.
// The reason for this expression is that multiplying the i'th word from one
// multiplicand and the j'th word of another multiplicand creates a
// two-word-wide value to be stored at the (i+j)'th element. The highest
// word indices we will access are `original_size - 1` from this object, and
// `other.size_ - 1` from our operand. Therefore,
// `original_size + other.size_ - 2` is the first step we should calculate,
// but limited on an upper bound by max_words.
// Working from high-to-low ensures that we do not overwrite the portions of
// the initial value of *this which are still needed for later steps.
//
// Once called with step == 0, *this contains the result of the
// multiplication.
//
// `original_size` is the size_ of *this before the first call to
// MultiplyStep(). `other_words` and `other_size` are the contents of our
// operand. `step` is the step to perform, as described above.
void MultiplyStep(int original_size, const uint32_t* other_words,
int other_size, int step);
void MultiplyBy(int other_size, const uint32_t* other_words) {
const int original_size = size_;
const int first_step =
(std::min)(original_size + other_size - 2, max_words - 1);
for (int step = first_step; step >= 0; --step) {
MultiplyStep(original_size, other_words, other_size, step);
}
}
// Adds a 32-bit value to the index'th word, with carry.
void AddWithCarry(int index, uint32_t value) {
if (value) {
while (index < max_words && value > 0) {
words_[index] += value;
// carry if we overflowed in this word:
if (value > words_[index]) {
value = 1;
++index;
} else {
value = 0;
}
}
size_ = (std::min)(max_words, (std::max)(index + 1, size_));
}
}
void AddWithCarry(int index, uint64_t value) {
if (value && index < max_words) {
uint32_t high = value >> 32;
uint32_t low = value & 0xffffffff;
words_[index] += low;
if (words_[index] < low) {
++high;
if (high == 0) {
// Carry from the low word caused our high word to overflow.
// Short circuit here to do the right thing.
AddWithCarry(index + 2, static_cast<uint32_t>(1));
return;
}
}
if (high > 0) {
AddWithCarry(index + 1, high);
} else {
// Normally 32-bit AddWithCarry() sets size_, but since we don't call
// it when `high` is 0, do it ourselves here.
size_ = (std::min)(max_words, (std::max)(index + 1, size_));
}
}
}
// Divide this in place by a constant divisor. Returns the remainder of the
// division.
template <uint32_t divisor>
uint32_t DivMod() {
uint64_t accumulator = 0;
for (int i = size_ - 1; i >= 0; --i) {
accumulator <<= 32;
accumulator += words_[i];
// accumulator / divisor will never overflow an int32_t in this loop
words_[i] = static_cast<uint32_t>(accumulator / divisor);
accumulator = accumulator % divisor;
}
while (size_ > 0 && words_[size_ - 1] == 0) {
--size_;
}
return static_cast<uint32_t>(accumulator);
}
// The number of elements in words_ that may carry significant values.
// All elements beyond this point are 0.
//
// When size_ is 0, this BigUnsigned stores the value 0.
// When size_ is nonzero, is *not* guaranteed that words_[size_ - 1] is
// nonzero. This can occur due to overflow truncation.
// In particular, x.size_ != y.size_ does *not* imply x != y.
int size_;
uint32_t words_[max_words];
};
// Compares two big integer instances.
//
// Returns -1 if lhs < rhs, 0 if lhs == rhs, and 1 if lhs > rhs.
template <int N, int M>
int Compare(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) {
int limit = (std::max)(lhs.size(), rhs.size());
for (int i = limit - 1; i >= 0; --i) {
const uint32_t lhs_word = lhs.GetWord(i);
const uint32_t rhs_word = rhs.GetWord(i);
if (lhs_word < rhs_word) {
return -1;
} else if (lhs_word > rhs_word) {
return 1;
}
}
return 0;
}
template <int N, int M>
bool operator==(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) {
int limit = (std::max)(lhs.size(), rhs.size());
for (int i = 0; i < limit; ++i) {
if (lhs.GetWord(i) != rhs.GetWord(i)) {
return false;
}
}
return true;
}
template <int N, int M>
bool operator!=(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) {
return !(lhs == rhs);
}
template <int N, int M>
bool operator<(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) {
return Compare(lhs, rhs) == -1;
}
template <int N, int M>
bool operator>(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) {
return rhs < lhs;
}
template <int N, int M>
bool operator<=(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) {
return !(rhs < lhs);
}
template <int N, int M>
bool operator>=(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) {
return !(lhs < rhs);
}
// Output operator for BigUnsigned, for testing purposes only.
template <int N>
std::ostream& operator<<(std::ostream& os, const BigUnsigned<N>& num) {
return os << num.ToString();
}
// Explicit instantiation declarations for the sizes of BigUnsigned that we
// are using.
//
// For now, the choices of 4 and 84 are arbitrary; 4 is a small value that is
// still bigger than an int128, and 84 is a large value we will want to use
// in the from_chars implementation.
//
// Comments justifying the use of 84 belong in the from_chars implementation,
// and will be added in a follow-up CL.
extern template class BigUnsigned<4>;
extern template class BigUnsigned<84>;
} // namespace strings_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_CHARCONV_BIGINT_H_

View File

@@ -0,0 +1,260 @@
// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/charconv_bigint.h"
#include <string>
#include "gtest/gtest.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace strings_internal {
TEST(BigUnsigned, ShiftLeft) {
{
// Check that 3 * 2**100 is calculated correctly
BigUnsigned<4> num(3u);
num.ShiftLeft(100);
EXPECT_EQ(num, BigUnsigned<4>("3802951800684688204490109616128"));
}
{
// Test that overflow is truncated properly.
// 15 is 4 bits long, and BigUnsigned<4> is a 128-bit bigint.
// Shifting left by 125 bits should truncate off the high bit, so that
// 15 << 125 == 7 << 125
// after truncation.
BigUnsigned<4> a(15u);
BigUnsigned<4> b(7u);
BigUnsigned<4> c(3u);
a.ShiftLeft(125);
b.ShiftLeft(125);
c.ShiftLeft(125);
EXPECT_EQ(a, b);
EXPECT_NE(a, c);
}
{
// Same test, larger bigint:
BigUnsigned<84> a(15u);
BigUnsigned<84> b(7u);
BigUnsigned<84> c(3u);
a.ShiftLeft(84 * 32 - 3);
b.ShiftLeft(84 * 32 - 3);
c.ShiftLeft(84 * 32 - 3);
EXPECT_EQ(a, b);
EXPECT_NE(a, c);
}
{
// Check that incrementally shifting has the same result as doing it all at
// once (attempting to capture corner cases.)
const std::string seed = "1234567890123456789012345678901234567890";
BigUnsigned<84> a(seed);
for (int i = 1; i <= 84 * 32; ++i) {
a.ShiftLeft(1);
BigUnsigned<84> b(seed);
b.ShiftLeft(i);
EXPECT_EQ(a, b);
}
// And we should have fully rotated all bits off by now:
EXPECT_EQ(a, BigUnsigned<84>(0u));
}
{
// Bit shifting large and small numbers by large and small offsets.
// Intended to exercise bounds-checking corner on ShiftLeft() (directly
// and under asan).
// 2**(32*84)-1
const BigUnsigned<84> all_bits_one(
"1474444211396924248063325089479706787923460402125687709454567433186613"
"6228083464060749874845919674257665016359189106695900028098437021384227"
"3285029708032466536084583113729486015826557532750465299832071590813090"
"2011853039837649252477307070509704043541368002938784757296893793903797"
"8180292336310543540677175225040919704702800559606097685920595947397024"
"8303316808753252115729411497720357971050627997031988036134171378490368"
"6008000778741115399296162550786288457245180872759047016734959330367829"
"5235612397427686310674725251378116268607113017720538636924549612987647"
"5767411074510311386444547332882472126067840027882117834454260409440463"
"9345147252664893456053258463203120637089916304618696601333953616715125"
"2115882482473279040772264257431663818610405673876655957323083702713344"
"4201105427930770976052393421467136557055");
const BigUnsigned<84> zero(0u);
const BigUnsigned<84> one(1u);
// in bounds shifts
for (int i = 1; i < 84*32; ++i) {
// shifting all_bits_one to the left should result in a smaller number,
// since the high bits rotate off and the low bits are replaced with
// zeroes.
BigUnsigned<84> big_shifted = all_bits_one;
big_shifted.ShiftLeft(i);
EXPECT_GT(all_bits_one, big_shifted);
// Shifting 1 to the left should instead result in a larger number.
BigUnsigned<84> small_shifted = one;
small_shifted.ShiftLeft(i);
EXPECT_LT(one, small_shifted);
}
// Shifting by zero or a negative number has no effect
for (int no_op_shift : {0, -1, -84 * 32, std::numeric_limits<int>::min()}) {
BigUnsigned<84> big_shifted = all_bits_one;
big_shifted.ShiftLeft(no_op_shift);
EXPECT_EQ(all_bits_one, big_shifted);
BigUnsigned<84> small_shifted = one;
big_shifted.ShiftLeft(no_op_shift);
EXPECT_EQ(one, small_shifted);
}
// Shifting by an amount greater than the number of bits should result in
// zero.
for (int out_of_bounds_shift :
{84 * 32, 84 * 32 + 1, std::numeric_limits<int>::max()}) {
BigUnsigned<84> big_shifted = all_bits_one;
big_shifted.ShiftLeft(out_of_bounds_shift);
EXPECT_EQ(zero, big_shifted);
BigUnsigned<84> small_shifted = one;
small_shifted.ShiftLeft(out_of_bounds_shift);
EXPECT_EQ(zero, small_shifted);
}
}
}
TEST(BigUnsigned, MultiplyByUint32) {
const BigUnsigned<84> factorial_100(
"933262154439441526816992388562667004907159682643816214685929638952175999"
"932299156089414639761565182862536979208272237582511852109168640000000000"
"00000000000000");
BigUnsigned<84> a(1u);
for (uint32_t i = 1; i <= 100; ++i) {
a.MultiplyBy(i);
}
EXPECT_EQ(a, BigUnsigned<84>(factorial_100));
}
TEST(BigUnsigned, MultiplyByBigUnsigned) {
{
// Put the terms of factorial_200 into two bigints, and multiply them
// together.
const BigUnsigned<84> factorial_200(
"7886578673647905035523632139321850622951359776871732632947425332443594"
"4996340334292030428401198462390417721213891963883025764279024263710506"
"1926624952829931113462857270763317237396988943922445621451664240254033"
"2918641312274282948532775242424075739032403212574055795686602260319041"
"7032406235170085879617892222278962370389737472000000000000000000000000"
"0000000000000000000000000");
BigUnsigned<84> evens(1u);
BigUnsigned<84> odds(1u);
for (uint32_t i = 1; i < 200; i += 2) {
odds.MultiplyBy(i);
evens.MultiplyBy(i + 1);
}
evens.MultiplyBy(odds);
EXPECT_EQ(evens, factorial_200);
}
{
// Multiply various powers of 10 together.
for (int a = 0 ; a < 700; a += 25) {
SCOPED_TRACE(a);
BigUnsigned<84> a_value("3" + std::string(a, '0'));
for (int b = 0; b < (700 - a); b += 25) {
SCOPED_TRACE(b);
BigUnsigned<84> b_value("2" + std::string(b, '0'));
BigUnsigned<84> expected_product("6" + std::string(a + b, '0'));
b_value.MultiplyBy(a_value);
EXPECT_EQ(b_value, expected_product);
}
}
}
}
TEST(BigUnsigned, MultiplyByOverflow) {
{
// Check that multiplcation overflow predictably truncates.
// A big int with all bits on.
BigUnsigned<4> all_bits_on("340282366920938463463374607431768211455");
// Modulo 2**128, this is equal to -1. Therefore the square of this,
// modulo 2**128, should be 1.
all_bits_on.MultiplyBy(all_bits_on);
EXPECT_EQ(all_bits_on, BigUnsigned<4>(1u));
}
{
// Try multiplying a large bigint by 2**50, and compare the result to
// shifting.
BigUnsigned<4> value_1("12345678901234567890123456789012345678");
BigUnsigned<4> value_2("12345678901234567890123456789012345678");
BigUnsigned<4> two_to_fiftieth(1u);
two_to_fiftieth.ShiftLeft(50);
value_1.ShiftLeft(50);
value_2.MultiplyBy(two_to_fiftieth);
EXPECT_EQ(value_1, value_2);
}
}
TEST(BigUnsigned, FiveToTheNth) {
{
// Sanity check that MultiplyByFiveToTheNth gives consistent answers, up to
// and including overflow.
for (int i = 0; i < 1160; ++i) {
SCOPED_TRACE(i);
BigUnsigned<84> value_1(123u);
BigUnsigned<84> value_2(123u);
value_1.MultiplyByFiveToTheNth(i);
for (int j = 0; j < i; j++) {
value_2.MultiplyBy(5u);
}
EXPECT_EQ(value_1, value_2);
}
}
{
// Check that the faster, table-lookup-based static method returns the same
// result that multiplying in-place would return, up to and including
// overflow.
for (int i = 0; i < 1160; ++i) {
SCOPED_TRACE(i);
BigUnsigned<84> value_1(1u);
value_1.MultiplyByFiveToTheNth(i);
BigUnsigned<84> value_2 = BigUnsigned<84>::FiveToTheNth(i);
EXPECT_EQ(value_1, value_2);
}
}
}
TEST(BigUnsigned, TenToTheNth) {
{
// Sanity check MultiplyByTenToTheNth.
for (int i = 0; i < 800; ++i) {
SCOPED_TRACE(i);
BigUnsigned<84> value_1(123u);
BigUnsigned<84> value_2(123u);
value_1.MultiplyByTenToTheNth(i);
for (int j = 0; j < i; j++) {
value_2.MultiplyBy(10u);
}
EXPECT_EQ(value_1, value_2);
}
}
{
// Alternate testing approach, taking advantage of the decimal parser.
for (int i = 0; i < 200; ++i) {
SCOPED_TRACE(i);
BigUnsigned<84> value_1(135u);
value_1.MultiplyByTenToTheNth(i);
BigUnsigned<84> value_2("135" + std::string(i, '0'));
EXPECT_EQ(value_1, value_2);
}
}
}
} // namespace strings_internal
ABSL_NAMESPACE_END
} // namespace absl

View File

@@ -0,0 +1,504 @@
// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/charconv_parse.h"
#include "absl/strings/charconv.h"
#include <cassert>
#include <cstdint>
#include <limits>
#include "absl/strings/internal/memutil.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace {
// ParseFloat<10> will read the first 19 significant digits of the mantissa.
// This number was chosen for multiple reasons.
//
// (a) First, for whatever integer type we choose to represent the mantissa, we
// want to choose the largest possible number of decimal digits for that integer
// type. We are using uint64_t, which can express any 19-digit unsigned
// integer.
//
// (b) Second, we need to parse enough digits that the binary value of any
// mantissa we capture has more bits of resolution than the mantissa
// representation in the target float. Our algorithm requires at least 3 bits
// of headway, but 19 decimal digits give a little more than that.
//
// The following static assertions verify the above comments:
constexpr int kDecimalMantissaDigitsMax = 19;
static_assert(std::numeric_limits<uint64_t>::digits10 ==
kDecimalMantissaDigitsMax,
"(a) above");
// IEEE doubles, which we assume in Abseil, have 53 binary bits of mantissa.
static_assert(std::numeric_limits<double>::is_iec559, "IEEE double assumed");
static_assert(std::numeric_limits<double>::radix == 2, "IEEE double fact");
static_assert(std::numeric_limits<double>::digits == 53, "IEEE double fact");
// The lowest valued 19-digit decimal mantissa we can read still contains
// sufficient information to reconstruct a binary mantissa.
static_assert(1000000000000000000u > (uint64_t{1} << (53 + 3)), "(b) above");
// ParseFloat<16> will read the first 15 significant digits of the mantissa.
//
// Because a base-16-to-base-2 conversion can be done exactly, we do not need
// to maximize the number of scanned hex digits to improve our conversion. What
// is required is to scan two more bits than the mantissa can represent, so that
// we always round correctly.
//
// (One extra bit does not suffice to perform correct rounding, since a number
// exactly halfway between two representable floats has unique rounding rules,
// so we need to differentiate between a "halfway between" number and a "closer
// to the larger value" number.)
constexpr int kHexadecimalMantissaDigitsMax = 15;
// The minimum number of significant bits that will be read from
// kHexadecimalMantissaDigitsMax hex digits. We must subtract by three, since
// the most significant digit can be a "1", which only contributes a single
// significant bit.
constexpr int kGuaranteedHexadecimalMantissaBitPrecision =
4 * kHexadecimalMantissaDigitsMax - 3;
static_assert(kGuaranteedHexadecimalMantissaBitPrecision >
std::numeric_limits<double>::digits + 2,
"kHexadecimalMantissaDigitsMax too small");
// We also impose a limit on the number of significant digits we will read from
// an exponent, to avoid having to deal with integer overflow. We use 9 for
// this purpose.
//
// If we read a 9 digit exponent, the end result of the conversion will
// necessarily be infinity or zero, depending on the sign of the exponent.
// Therefore we can just drop extra digits on the floor without any extra
// logic.
constexpr int kDecimalExponentDigitsMax = 9;
static_assert(std::numeric_limits<int>::digits10 >= kDecimalExponentDigitsMax,
"int type too small");
// To avoid incredibly large inputs causing integer overflow for our exponent,
// we impose an arbitrary but very large limit on the number of significant
// digits we will accept. The implementation refuses to match a string with
// more consecutive significant mantissa digits than this.
constexpr int kDecimalDigitLimit = 50000000;
// Corresponding limit for hexadecimal digit inputs. This is one fourth the
// amount of kDecimalDigitLimit, since each dropped hexadecimal digit requires
// a binary exponent adjustment of 4.
constexpr int kHexadecimalDigitLimit = kDecimalDigitLimit / 4;
// The largest exponent we can read is 999999999 (per
// kDecimalExponentDigitsMax), and the largest exponent adjustment we can get
// from dropped mantissa digits is 2 * kDecimalDigitLimit, and the sum of these
// comfortably fits in an integer.
//
// We count kDecimalDigitLimit twice because there are independent limits for
// numbers before and after the decimal point. (In the case where there are no
// significant digits before the decimal point, there are independent limits for
// post-decimal-point leading zeroes and for significant digits.)
static_assert(999999999 + 2 * kDecimalDigitLimit <
std::numeric_limits<int>::max(),
"int type too small");
static_assert(999999999 + 2 * (4 * kHexadecimalDigitLimit) <
std::numeric_limits<int>::max(),
"int type too small");
// Returns true if the provided bitfield allows parsing an exponent value
// (e.g., "1.5e100").
bool AllowExponent(chars_format flags) {
bool fixed = (flags & chars_format::fixed) == chars_format::fixed;
bool scientific =
(flags & chars_format::scientific) == chars_format::scientific;
return scientific || !fixed;
}
// Returns true if the provided bitfield requires an exponent value be present.
bool RequireExponent(chars_format flags) {
bool fixed = (flags & chars_format::fixed) == chars_format::fixed;
bool scientific =
(flags & chars_format::scientific) == chars_format::scientific;
return scientific && !fixed;
}
const int8_t kAsciiToInt[256] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8,
9, -1, -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1};
// Returns true if `ch` is a digit in the given base
template <int base>
bool IsDigit(char ch);
// Converts a valid `ch` to its digit value in the given base.
template <int base>
unsigned ToDigit(char ch);
// Returns true if `ch` is the exponent delimiter for the given base.
template <int base>
bool IsExponentCharacter(char ch);
// Returns the maximum number of significant digits we will read for a float
// in the given base.
template <int base>
constexpr int MantissaDigitsMax();
// Returns the largest consecutive run of digits we will accept when parsing a
// number in the given base.
template <int base>
constexpr int DigitLimit();
// Returns the amount the exponent must be adjusted by for each dropped digit.
// (For decimal this is 1, since the digits are in base 10 and the exponent base
// is also 10, but for hexadecimal this is 4, since the digits are base 16 but
// the exponent base is 2.)
template <int base>
constexpr int DigitMagnitude();
template <>
bool IsDigit<10>(char ch) {
return ch >= '0' && ch <= '9';
}
template <>
bool IsDigit<16>(char ch) {
return kAsciiToInt[static_cast<unsigned char>(ch)] >= 0;
}
template <>
unsigned ToDigit<10>(char ch) {
return ch - '0';
}
template <>
unsigned ToDigit<16>(char ch) {
return kAsciiToInt[static_cast<unsigned char>(ch)];
}
template <>
bool IsExponentCharacter<10>(char ch) {
return ch == 'e' || ch == 'E';
}
template <>
bool IsExponentCharacter<16>(char ch) {
return ch == 'p' || ch == 'P';
}
template <>
constexpr int MantissaDigitsMax<10>() {
return kDecimalMantissaDigitsMax;
}
template <>
constexpr int MantissaDigitsMax<16>() {
return kHexadecimalMantissaDigitsMax;
}
template <>
constexpr int DigitLimit<10>() {
return kDecimalDigitLimit;
}
template <>
constexpr int DigitLimit<16>() {
return kHexadecimalDigitLimit;
}
template <>
constexpr int DigitMagnitude<10>() {
return 1;
}
template <>
constexpr int DigitMagnitude<16>() {
return 4;
}
// Reads decimal digits from [begin, end) into *out. Returns the number of
// digits consumed.
//
// After max_digits has been read, keeps consuming characters, but no longer
// adjusts *out. If a nonzero digit is dropped this way, *dropped_nonzero_digit
// is set; otherwise, it is left unmodified.
//
// If no digits are matched, returns 0 and leaves *out unchanged.
//
// ConsumeDigits does not protect against overflow on *out; max_digits must
// be chosen with respect to type T to avoid the possibility of overflow.
template <int base, typename T>
int ConsumeDigits(const char* begin, const char* end, int max_digits, T* out,
bool* dropped_nonzero_digit) {
if (base == 10) {
assert(max_digits <= std::numeric_limits<T>::digits10);
} else if (base == 16) {
assert(max_digits * 4 <= std::numeric_limits<T>::digits);
}
const char* const original_begin = begin;
// Skip leading zeros, but only if *out is zero.
// They don't cause an overflow so we don't have to count them for
// `max_digits`.
while (!*out && end != begin && *begin == '0') ++begin;
T accumulator = *out;
const char* significant_digits_end =
(end - begin > max_digits) ? begin + max_digits : end;
while (begin < significant_digits_end && IsDigit<base>(*begin)) {
// Do not guard against *out overflow; max_digits was chosen to avoid this.
// Do assert against it, to detect problems in debug builds.
auto digit = static_cast<T>(ToDigit<base>(*begin));
assert(accumulator * base >= accumulator);
accumulator *= base;
assert(accumulator + digit >= accumulator);
accumulator += digit;
++begin;
}
bool dropped_nonzero = false;
while (begin < end && IsDigit<base>(*begin)) {
dropped_nonzero = dropped_nonzero || (*begin != '0');
++begin;
}
if (dropped_nonzero && dropped_nonzero_digit != nullptr) {
*dropped_nonzero_digit = true;
}
*out = accumulator;
return static_cast<int>(begin - original_begin);
}
// Returns true if `v` is one of the chars allowed inside parentheses following
// a NaN.
bool IsNanChar(char v) {
return (v == '_') || (v >= '0' && v <= '9') || (v >= 'a' && v <= 'z') ||
(v >= 'A' && v <= 'Z');
}
// Checks the range [begin, end) for a strtod()-formatted infinity or NaN. If
// one is found, sets `out` appropriately and returns true.
bool ParseInfinityOrNan(const char* begin, const char* end,
strings_internal::ParsedFloat* out) {
if (end - begin < 3) {
return false;
}
switch (*begin) {
case 'i':
case 'I': {
// An infinity string consists of the characters "inf" or "infinity",
// case insensitive.
if (strings_internal::memcasecmp(begin + 1, "nf", 2) != 0) {
return false;
}
out->type = strings_internal::FloatType::kInfinity;
if (end - begin >= 8 &&
strings_internal::memcasecmp(begin + 3, "inity", 5) == 0) {
out->end = begin + 8;
} else {
out->end = begin + 3;
}
return true;
}
case 'n':
case 'N': {
// A NaN consists of the characters "nan", case insensitive, optionally
// followed by a parenthesized sequence of zero or more alphanumeric
// characters and/or underscores.
if (strings_internal::memcasecmp(begin + 1, "an", 2) != 0) {
return false;
}
out->type = strings_internal::FloatType::kNan;
out->end = begin + 3;
// NaN is allowed to be followed by a parenthesized string, consisting of
// only the characters [a-zA-Z0-9_]. Match that if it's present.
begin += 3;
if (begin < end && *begin == '(') {
const char* nan_begin = begin + 1;
while (nan_begin < end && IsNanChar(*nan_begin)) {
++nan_begin;
}
if (nan_begin < end && *nan_begin == ')') {
// We found an extra NaN specifier range
out->subrange_begin = begin + 1;
out->subrange_end = nan_begin;
out->end = nan_begin + 1;
}
}
return true;
}
default:
return false;
}
}
} // namespace
namespace strings_internal {
template <int base>
strings_internal::ParsedFloat ParseFloat(const char* begin, const char* end,
chars_format format_flags) {
strings_internal::ParsedFloat result;
// Exit early if we're given an empty range.
if (begin == end) return result;
// Handle the infinity and NaN cases.
if (ParseInfinityOrNan(begin, end, &result)) {
return result;
}
const char* const mantissa_begin = begin;
while (begin < end && *begin == '0') {
++begin; // skip leading zeros
}
uint64_t mantissa = 0;
int exponent_adjustment = 0;
bool mantissa_is_inexact = false;
int pre_decimal_digits = ConsumeDigits<base>(
begin, end, MantissaDigitsMax<base>(), &mantissa, &mantissa_is_inexact);
begin += pre_decimal_digits;
int digits_left;
if (pre_decimal_digits >= DigitLimit<base>()) {
// refuse to parse pathological inputs
return result;
} else if (pre_decimal_digits > MantissaDigitsMax<base>()) {
// We dropped some non-fraction digits on the floor. Adjust our exponent
// to compensate.
exponent_adjustment =
static_cast<int>(pre_decimal_digits - MantissaDigitsMax<base>());
digits_left = 0;
} else {
digits_left =
static_cast<int>(MantissaDigitsMax<base>() - pre_decimal_digits);
}
if (begin < end && *begin == '.') {
++begin;
if (mantissa == 0) {
// If we haven't seen any nonzero digits yet, keep skipping zeros. We
// have to adjust the exponent to reflect the changed place value.
const char* begin_zeros = begin;
while (begin < end && *begin == '0') {
++begin;
}
int zeros_skipped = static_cast<int>(begin - begin_zeros);
if (zeros_skipped >= DigitLimit<base>()) {
// refuse to parse pathological inputs
return result;
}
exponent_adjustment -= static_cast<int>(zeros_skipped);
}
int post_decimal_digits = ConsumeDigits<base>(
begin, end, digits_left, &mantissa, &mantissa_is_inexact);
begin += post_decimal_digits;
// Since `mantissa` is an integer, each significant digit we read after
// the decimal point requires an adjustment to the exponent. "1.23e0" will
// be stored as `mantissa` == 123 and `exponent` == -2 (that is,
// "123e-2").
if (post_decimal_digits >= DigitLimit<base>()) {
// refuse to parse pathological inputs
return result;
} else if (post_decimal_digits > digits_left) {
exponent_adjustment -= digits_left;
} else {
exponent_adjustment -= post_decimal_digits;
}
}
// If we've found no mantissa whatsoever, this isn't a number.
if (mantissa_begin == begin) {
return result;
}
// A bare "." doesn't count as a mantissa either.
if (begin - mantissa_begin == 1 && *mantissa_begin == '.') {
return result;
}
if (mantissa_is_inexact) {
// We dropped significant digits on the floor. Handle this appropriately.
if (base == 10) {
// If we truncated significant decimal digits, store the full range of the
// mantissa for future big integer math for exact rounding.
result.subrange_begin = mantissa_begin;
result.subrange_end = begin;
} else if (base == 16) {
// If we truncated hex digits, reflect this fact by setting the low
// ("sticky") bit. This allows for correct rounding in all cases.
mantissa |= 1;
}
}
result.mantissa = mantissa;
const char* const exponent_begin = begin;
result.literal_exponent = 0;
bool found_exponent = false;
if (AllowExponent(format_flags) && begin < end &&
IsExponentCharacter<base>(*begin)) {
bool negative_exponent = false;
++begin;
if (begin < end && *begin == '-') {
negative_exponent = true;
++begin;
} else if (begin < end && *begin == '+') {
++begin;
}
const char* const exponent_digits_begin = begin;
// Exponent is always expressed in decimal, even for hexadecimal floats.
begin += ConsumeDigits<10>(begin, end, kDecimalExponentDigitsMax,
&result.literal_exponent, nullptr);
if (begin == exponent_digits_begin) {
// there were no digits where we expected an exponent. We failed to read
// an exponent and should not consume the 'e' after all. Rewind 'begin'.
found_exponent = false;
begin = exponent_begin;
} else {
found_exponent = true;
if (negative_exponent) {
result.literal_exponent = -result.literal_exponent;
}
}
}
if (!found_exponent && RequireExponent(format_flags)) {
// Provided flags required an exponent, but none was found. This results
// in a failure to scan.
return result;
}
// Success!
result.type = strings_internal::FloatType::kNumber;
if (result.mantissa > 0) {
result.exponent = result.literal_exponent +
(DigitMagnitude<base>() * exponent_adjustment);
} else {
result.exponent = 0;
}
result.end = begin;
return result;
}
template ParsedFloat ParseFloat<10>(const char* begin, const char* end,
chars_format format_flags);
template ParsedFloat ParseFloat<16>(const char* begin, const char* end,
chars_format format_flags);
} // namespace strings_internal
ABSL_NAMESPACE_END
} // namespace absl

View File

@@ -0,0 +1,99 @@
// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef ABSL_STRINGS_INTERNAL_CHARCONV_PARSE_H_
#define ABSL_STRINGS_INTERNAL_CHARCONV_PARSE_H_
#include <cstdint>
#include "absl/base/config.h"
#include "absl/strings/charconv.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace strings_internal {
// Enum indicating whether a parsed float is a number or special value.
enum class FloatType { kNumber, kInfinity, kNan };
// The decomposed parts of a parsed `float` or `double`.
struct ParsedFloat {
// Representation of the parsed mantissa, with the decimal point adjusted to
// make it an integer.
//
// During decimal scanning, this contains 19 significant digits worth of
// mantissa value. If digits beyond this point are found, they
// are truncated, and if any of these dropped digits are nonzero, then
// `mantissa` is inexact, and the full mantissa is stored in [subrange_begin,
// subrange_end).
//
// During hexadecimal scanning, this contains 15 significant hex digits worth
// of mantissa value. Digits beyond this point are sticky -- they are
// truncated, but if any dropped digits are nonzero, the low bit of mantissa
// will be set. (This allows for precise rounding, and avoids the need
// to store the full mantissa in [subrange_begin, subrange_end).)
uint64_t mantissa = 0;
// Floating point expontent. This reflects any decimal point adjustments and
// any truncated digits from the mantissa. The absolute value of the parsed
// number is represented by mantissa * (base ** exponent), where base==10 for
// decimal floats, and base==2 for hexadecimal floats.
int exponent = 0;
// The literal exponent value scanned from the input, or 0 if none was
// present. This does not reflect any adjustments applied to mantissa.
int literal_exponent = 0;
// The type of number scanned.
FloatType type = FloatType::kNumber;
// When non-null, [subrange_begin, subrange_end) marks a range of characters
// that require further processing. The meaning is dependent on float type.
// If type == kNumber and this is set, this is a "wide input": the input
// mantissa contained more than 19 digits. The range contains the full
// mantissa. It plus `literal_exponent` need to be examined to find the best
// floating point match.
// If type == kNan and this is set, the range marks the contents of a
// matched parenthesized character region after the NaN.
const char* subrange_begin = nullptr;
const char* subrange_end = nullptr;
// One-past-the-end of the successfully parsed region, or nullptr if no
// matching pattern was found.
const char* end = nullptr;
};
// Read the floating point number in the provided range, and populate
// ParsedFloat accordingly.
//
// format_flags is a bitmask value specifying what patterns this API will match.
// `scientific` and `fixed` are honored per std::from_chars rules
// ([utility.from.chars], C++17): if exactly one of these bits is set, then an
// exponent is required, or dislallowed, respectively.
//
// Template parameter `base` must be either 10 or 16. For base 16, a "0x" is
// *not* consumed. The `hex` bit from format_flags is ignored by ParseFloat.
template <int base>
ParsedFloat ParseFloat(const char* begin, const char* end,
absl::chars_format format_flags);
extern template ParsedFloat ParseFloat<10>(const char* begin, const char* end,
absl::chars_format format_flags);
extern template ParsedFloat ParseFloat<16>(const char* begin, const char* end,
absl::chars_format format_flags);
} // namespace strings_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_CHARCONV_PARSE_H_

View File

@@ -0,0 +1,357 @@
// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/charconv_parse.h"
#include <string>
#include <utility>
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "absl/base/internal/raw_logging.h"
#include "absl/strings/str_cat.h"
using absl::chars_format;
using absl::strings_internal::FloatType;
using absl::strings_internal::ParsedFloat;
using absl::strings_internal::ParseFloat;
namespace {
// Check that a given string input is parsed to the expected mantissa and
// exponent.
//
// Input string `s` must contain a '$' character. It marks the end of the
// characters that should be consumed by the match. It is stripped from the
// input to ParseFloat.
//
// If input string `s` contains '[' and ']' characters, these mark the region
// of characters that should be marked as the "subrange". For NaNs, this is
// the location of the extended NaN string. For numbers, this is the location
// of the full, over-large mantissa.
template <int base>
void ExpectParsedFloat(std::string s, absl::chars_format format_flags,
FloatType expected_type, uint64_t expected_mantissa,
int expected_exponent,
int expected_literal_exponent = -999) {
SCOPED_TRACE(s);
int begin_subrange = -1;
int end_subrange = -1;
// If s contains '[' and ']', then strip these characters and set the subrange
// indices appropriately.
std::string::size_type open_bracket_pos = s.find('[');
if (open_bracket_pos != std::string::npos) {
begin_subrange = static_cast<int>(open_bracket_pos);
s.replace(open_bracket_pos, 1, "");
std::string::size_type close_bracket_pos = s.find(']');
ABSL_RAW_CHECK(close_bracket_pos != absl::string_view::npos,
"Test input contains [ without matching ]");
end_subrange = static_cast<int>(close_bracket_pos);
s.replace(close_bracket_pos, 1, "");
}
const std::string::size_type expected_characters_matched = s.find('$');
ABSL_RAW_CHECK(expected_characters_matched != std::string::npos,
"Input string must contain $");
s.replace(expected_characters_matched, 1, "");
ParsedFloat parsed =
ParseFloat<base>(s.data(), s.data() + s.size(), format_flags);
EXPECT_NE(parsed.end, nullptr);
if (parsed.end == nullptr) {
return; // The following tests are not useful if we fully failed to parse
}
EXPECT_EQ(parsed.type, expected_type);
if (begin_subrange == -1) {
EXPECT_EQ(parsed.subrange_begin, nullptr);
EXPECT_EQ(parsed.subrange_end, nullptr);
} else {
EXPECT_EQ(parsed.subrange_begin, s.data() + begin_subrange);
EXPECT_EQ(parsed.subrange_end, s.data() + end_subrange);
}
if (parsed.type == FloatType::kNumber) {
EXPECT_EQ(parsed.mantissa, expected_mantissa);
EXPECT_EQ(parsed.exponent, expected_exponent);
if (expected_literal_exponent != -999) {
EXPECT_EQ(parsed.literal_exponent, expected_literal_exponent);
}
}
auto characters_matched = static_cast<int>(parsed.end - s.data());
EXPECT_EQ(characters_matched, expected_characters_matched);
}
// Check that a given string input is parsed to the expected mantissa and
// exponent.
//
// Input string `s` must contain a '$' character. It marks the end of the
// characters that were consumed by the match.
template <int base>
void ExpectNumber(std::string s, absl::chars_format format_flags,
uint64_t expected_mantissa, int expected_exponent,
int expected_literal_exponent = -999) {
ExpectParsedFloat<base>(std::move(s), format_flags, FloatType::kNumber,
expected_mantissa, expected_exponent,
expected_literal_exponent);
}
// Check that a given string input is parsed to the given special value.
//
// This tests against both number bases, since infinities and NaNs have
// identical representations in both modes.
void ExpectSpecial(const std::string& s, absl::chars_format format_flags,
FloatType type) {
ExpectParsedFloat<10>(s, format_flags, type, 0, 0);
ExpectParsedFloat<16>(s, format_flags, type, 0, 0);
}
// Check that a given input string is not matched by Float.
template <int base>
void ExpectFailedParse(absl::string_view s, absl::chars_format format_flags) {
ParsedFloat parsed =
ParseFloat<base>(s.data(), s.data() + s.size(), format_flags);
EXPECT_EQ(parsed.end, nullptr);
}
TEST(ParseFloat, SimpleValue) {
// Test that various forms of floating point numbers all parse correctly.
ExpectNumber<10>("1.23456789e5$", chars_format::general, 123456789, -3);
ExpectNumber<10>("1.23456789e+5$", chars_format::general, 123456789, -3);
ExpectNumber<10>("1.23456789E5$", chars_format::general, 123456789, -3);
ExpectNumber<10>("1.23456789e05$", chars_format::general, 123456789, -3);
ExpectNumber<10>("123.456789e3$", chars_format::general, 123456789, -3);
ExpectNumber<10>("0.000123456789e9$", chars_format::general, 123456789, -3);
ExpectNumber<10>("123456.789$", chars_format::general, 123456789, -3);
ExpectNumber<10>("123456789e-3$", chars_format::general, 123456789, -3);
ExpectNumber<16>("1.234abcdefp28$", chars_format::general, 0x1234abcdef, -8);
ExpectNumber<16>("1.234abcdefp+28$", chars_format::general, 0x1234abcdef, -8);
ExpectNumber<16>("1.234ABCDEFp28$", chars_format::general, 0x1234abcdef, -8);
ExpectNumber<16>("1.234AbCdEfP0028$", chars_format::general, 0x1234abcdef,
-8);
ExpectNumber<16>("123.4abcdefp20$", chars_format::general, 0x1234abcdef, -8);
ExpectNumber<16>("0.0001234abcdefp44$", chars_format::general, 0x1234abcdef,
-8);
ExpectNumber<16>("1234abcd.ef$", chars_format::general, 0x1234abcdef, -8);
ExpectNumber<16>("1234abcdefp-8$", chars_format::general, 0x1234abcdef, -8);
// ExpectNumber does not attempt to drop trailing zeroes.
ExpectNumber<10>("0001.2345678900e005$", chars_format::general, 12345678900,
-5);
ExpectNumber<16>("0001.234abcdef000p28$", chars_format::general,
0x1234abcdef000, -20);
// Ensure non-matching characters after a number are ignored, even when they
// look like potentially matching characters.
ExpectNumber<10>("1.23456789e5$ ", chars_format::general, 123456789, -3);
ExpectNumber<10>("1.23456789e5$e5e5", chars_format::general, 123456789, -3);
ExpectNumber<10>("1.23456789e5$.25", chars_format::general, 123456789, -3);
ExpectNumber<10>("1.23456789e5$-", chars_format::general, 123456789, -3);
ExpectNumber<10>("1.23456789e5$PUPPERS!!!", chars_format::general, 123456789,
-3);
ExpectNumber<10>("123456.789$efghij", chars_format::general, 123456789, -3);
ExpectNumber<10>("123456.789$e", chars_format::general, 123456789, -3);
ExpectNumber<10>("123456.789$p5", chars_format::general, 123456789, -3);
ExpectNumber<10>("123456.789$.10", chars_format::general, 123456789, -3);
ExpectNumber<16>("1.234abcdefp28$ ", chars_format::general, 0x1234abcdef,
-8);
ExpectNumber<16>("1.234abcdefp28$p28", chars_format::general, 0x1234abcdef,
-8);
ExpectNumber<16>("1.234abcdefp28$.125", chars_format::general, 0x1234abcdef,
-8);
ExpectNumber<16>("1.234abcdefp28$-", chars_format::general, 0x1234abcdef, -8);
ExpectNumber<16>("1.234abcdefp28$KITTEHS!!!", chars_format::general,
0x1234abcdef, -8);
ExpectNumber<16>("1234abcd.ef$ghijk", chars_format::general, 0x1234abcdef,
-8);
ExpectNumber<16>("1234abcd.ef$p", chars_format::general, 0x1234abcdef, -8);
ExpectNumber<16>("1234abcd.ef$.10", chars_format::general, 0x1234abcdef, -8);
// Ensure we can read a full resolution mantissa without overflow.
ExpectNumber<10>("9999999999999999999$", chars_format::general,
9999999999999999999u, 0);
ExpectNumber<16>("fffffffffffffff$", chars_format::general,
0xfffffffffffffffu, 0);
// Check that zero is consistently read.
ExpectNumber<10>("0$", chars_format::general, 0, 0);
ExpectNumber<16>("0$", chars_format::general, 0, 0);
ExpectNumber<10>("000000000000000000000000000000000000000$",
chars_format::general, 0, 0);
ExpectNumber<16>("000000000000000000000000000000000000000$",
chars_format::general, 0, 0);
ExpectNumber<10>("0000000000000000000000.000000000000000000$",
chars_format::general, 0, 0);
ExpectNumber<16>("0000000000000000000000.000000000000000000$",
chars_format::general, 0, 0);
ExpectNumber<10>("0.00000000000000000000000000000000e123456$",
chars_format::general, 0, 0);
ExpectNumber<16>("0.00000000000000000000000000000000p123456$",
chars_format::general, 0, 0);
}
TEST(ParseFloat, LargeDecimalMantissa) {
// After 19 significant decimal digits in the mantissa, ParsedFloat will
// truncate additional digits. We need to test that:
// 1) the truncation to 19 digits happens
// 2) the returned exponent reflects the dropped significant digits
// 3) a correct literal_exponent is set
//
// If and only if a significant digit is found after 19 digits, then the
// entirety of the mantissa in case the exact value is needed to make a
// rounding decision. The [ and ] characters below denote where such a
// subregion was marked by by ParseFloat. They are not part of the input.
// Mark a capture group only if a dropped digit is significant (nonzero).
ExpectNumber<10>("100000000000000000000000000$", chars_format::general,
1000000000000000000,
/* adjusted exponent */ 8);
ExpectNumber<10>("123456789123456789100000000$", chars_format::general,
1234567891234567891,
/* adjusted exponent */ 8);
ExpectNumber<10>("[123456789123456789123456789]$", chars_format::general,
1234567891234567891,
/* adjusted exponent */ 8,
/* literal exponent */ 0);
ExpectNumber<10>("[123456789123456789100000009]$", chars_format::general,
1234567891234567891,
/* adjusted exponent */ 8,
/* literal exponent */ 0);
ExpectNumber<10>("[123456789123456789120000000]$", chars_format::general,
1234567891234567891,
/* adjusted exponent */ 8,
/* literal exponent */ 0);
// Leading zeroes should not count towards the 19 significant digit limit
ExpectNumber<10>("[00000000123456789123456789123456789]$",
chars_format::general, 1234567891234567891,
/* adjusted exponent */ 8,
/* literal exponent */ 0);
ExpectNumber<10>("00000000123456789123456789100000000$",
chars_format::general, 1234567891234567891,
/* adjusted exponent */ 8);
// Truncated digits after the decimal point should not cause a further
// exponent adjustment.
ExpectNumber<10>("1.234567891234567891e123$", chars_format::general,
1234567891234567891, 105);
ExpectNumber<10>("[1.23456789123456789123456789]e123$", chars_format::general,
1234567891234567891,
/* adjusted exponent */ 105,
/* literal exponent */ 123);
// Ensure we truncate, and not round. (The from_chars algorithm we use
// depends on our guess missing low, if it misses, so we need the rounding
// error to be downward.)
ExpectNumber<10>("[1999999999999999999999]$", chars_format::general,
1999999999999999999,
/* adjusted exponent */ 3,
/* literal exponent */ 0);
}
TEST(ParseFloat, LargeHexadecimalMantissa) {
// After 15 significant hex digits in the mantissa, ParsedFloat will treat
// additional digits as sticky, We need to test that:
// 1) The truncation to 15 digits happens
// 2) The returned exponent reflects the dropped significant digits
// 3) If a nonzero digit is dropped, the low bit of mantissa is set.
ExpectNumber<16>("123456789abcdef123456789abcdef$", chars_format::general,
0x123456789abcdef, 60);
// Leading zeroes should not count towards the 15 significant digit limit
ExpectNumber<16>("000000123456789abcdef123456789abcdef$",
chars_format::general, 0x123456789abcdef, 60);
// Truncated digits after the radix point should not cause a further
// exponent adjustment.
ExpectNumber<16>("1.23456789abcdefp100$", chars_format::general,
0x123456789abcdef, 44);
ExpectNumber<16>("1.23456789abcdef123456789abcdefp100$",
chars_format::general, 0x123456789abcdef, 44);
// test sticky digit behavior. The low bit should be set iff any dropped
// digit is nonzero.
ExpectNumber<16>("123456789abcdee123456789abcdee$", chars_format::general,
0x123456789abcdef, 60);
ExpectNumber<16>("123456789abcdee000000000000001$", chars_format::general,
0x123456789abcdef, 60);
ExpectNumber<16>("123456789abcdee000000000000000$", chars_format::general,
0x123456789abcdee, 60);
}
TEST(ParseFloat, ScientificVsFixed) {
// In fixed mode, an exponent is never matched (but the remainder of the
// number will be matched.)
ExpectNumber<10>("1.23456789$e5", chars_format::fixed, 123456789, -8);
ExpectNumber<10>("123456.789$", chars_format::fixed, 123456789, -3);
ExpectNumber<16>("1.234abcdef$p28", chars_format::fixed, 0x1234abcdef, -36);
ExpectNumber<16>("1234abcd.ef$", chars_format::fixed, 0x1234abcdef, -8);
// In scientific mode, numbers don't match *unless* they have an exponent.
ExpectNumber<10>("1.23456789e5$", chars_format::scientific, 123456789, -3);
ExpectFailedParse<10>("-123456.789$", chars_format::scientific);
ExpectNumber<16>("1.234abcdefp28$", chars_format::scientific, 0x1234abcdef,
-8);
ExpectFailedParse<16>("1234abcd.ef$", chars_format::scientific);
}
TEST(ParseFloat, Infinity) {
ExpectFailedParse<10>("in", chars_format::general);
ExpectFailedParse<16>("in", chars_format::general);
ExpectFailedParse<10>("inx", chars_format::general);
ExpectFailedParse<16>("inx", chars_format::general);
ExpectSpecial("inf$", chars_format::general, FloatType::kInfinity);
ExpectSpecial("Inf$", chars_format::general, FloatType::kInfinity);
ExpectSpecial("INF$", chars_format::general, FloatType::kInfinity);
ExpectSpecial("inf$inite", chars_format::general, FloatType::kInfinity);
ExpectSpecial("iNfInItY$", chars_format::general, FloatType::kInfinity);
ExpectSpecial("infinity$!!!", chars_format::general, FloatType::kInfinity);
}
TEST(ParseFloat, NaN) {
ExpectFailedParse<10>("na", chars_format::general);
ExpectFailedParse<16>("na", chars_format::general);
ExpectFailedParse<10>("nah", chars_format::general);
ExpectFailedParse<16>("nah", chars_format::general);
ExpectSpecial("nan$", chars_format::general, FloatType::kNan);
ExpectSpecial("NaN$", chars_format::general, FloatType::kNan);
ExpectSpecial("nAn$", chars_format::general, FloatType::kNan);
ExpectSpecial("NAN$", chars_format::general, FloatType::kNan);
ExpectSpecial("NaN$aNaNaNaNaBatman!", chars_format::general, FloatType::kNan);
// A parenthesized sequence of the characters [a-zA-Z0-9_] is allowed to
// appear after an NaN. Check that this is allowed, and that the correct
// characters are grouped.
//
// (The characters [ and ] in the pattern below delimit the expected matched
// subgroup; they are not part of the input passed to ParseFloat.)
ExpectSpecial("nan([0xabcdef])$", chars_format::general, FloatType::kNan);
ExpectSpecial("nan([0xabcdef])$...", chars_format::general, FloatType::kNan);
ExpectSpecial("nan([0xabcdef])$)...", chars_format::general, FloatType::kNan);
ExpectSpecial("nan([])$", chars_format::general, FloatType::kNan);
ExpectSpecial("nan([aAzZ09_])$", chars_format::general, FloatType::kNan);
// If the subgroup contains illegal characters, don't match it at all.
ExpectSpecial("nan$(bad-char)", chars_format::general, FloatType::kNan);
// Also cope with a missing close paren.
ExpectSpecial("nan$(0xabcdef", chars_format::general, FloatType::kNan);
}
} // namespace

View File

@@ -0,0 +1,89 @@
// Copyright 2020 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/cord_internal.h"
#include <atomic>
#include <cassert>
#include <memory>
#include "absl/container/inlined_vector.h"
#include "absl/strings/internal/cord_rep_btree.h"
#include "absl/strings/internal/cord_rep_flat.h"
#include "absl/strings/internal/cord_rep_ring.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cord_internal {
ABSL_CONST_INIT std::atomic<bool> cord_btree_enabled(kCordEnableBtreeDefault);
ABSL_CONST_INIT std::atomic<bool> cord_ring_buffer_enabled(
kCordEnableRingBufferDefault);
ABSL_CONST_INIT std::atomic<bool> shallow_subcords_enabled(
kCordShallowSubcordsDefault);
ABSL_CONST_INIT std::atomic<bool> cord_btree_exhaustive_validation(false);
void CordRep::Destroy(CordRep* rep) {
assert(rep != nullptr);
absl::InlinedVector<CordRep*, Constants::kInlinedVectorSize> pending;
while (true) {
assert(!rep->refcount.IsImmortal());
if (rep->tag == CONCAT) {
CordRepConcat* rep_concat = rep->concat();
CordRep* right = rep_concat->right;
if (!right->refcount.Decrement()) {
pending.push_back(right);
}
CordRep* left = rep_concat->left;
delete rep_concat;
rep = nullptr;
if (!left->refcount.Decrement()) {
rep = left;
continue;
}
} else if (rep->tag == BTREE) {
CordRepBtree::Destroy(rep->btree());
rep = nullptr;
} else if (rep->tag == RING) {
CordRepRing::Destroy(rep->ring());
rep = nullptr;
} else if (rep->tag == EXTERNAL) {
CordRepExternal::Delete(rep);
rep = nullptr;
} else if (rep->tag == SUBSTRING) {
CordRepSubstring* rep_substring = rep->substring();
CordRep* child = rep_substring->child;
delete rep_substring;
rep = nullptr;
if (!child->refcount.Decrement()) {
rep = child;
continue;
}
} else {
CordRepFlat::Delete(rep);
rep = nullptr;
}
if (!pending.empty()) {
rep = pending.back();
pending.pop_back();
} else {
break;
}
}
}
} // namespace cord_internal
ABSL_NAMESPACE_END
} // namespace absl

View File

@@ -0,0 +1,599 @@
// Copyright 2021 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef ABSL_STRINGS_INTERNAL_CORD_INTERNAL_H_
#define ABSL_STRINGS_INTERNAL_CORD_INTERNAL_H_
#include <atomic>
#include <cassert>
#include <cstddef>
#include <cstdint>
#include <type_traits>
#include "absl/base/config.h"
#include "absl/base/internal/endian.h"
#include "absl/base/internal/invoke.h"
#include "absl/base/optimization.h"
#include "absl/container/internal/compressed_tuple.h"
#include "absl/meta/type_traits.h"
#include "absl/strings/string_view.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cord_internal {
class CordzInfo;
// Default feature enable states for cord ring buffers
enum CordFeatureDefaults {
kCordEnableBtreeDefault = false,
kCordEnableRingBufferDefault = false,
kCordShallowSubcordsDefault = false
};
extern std::atomic<bool> cord_btree_enabled;
extern std::atomic<bool> cord_ring_buffer_enabled;
extern std::atomic<bool> shallow_subcords_enabled;
// `cord_btree_exhaustive_validation` can be set to force exhaustive validation
// in debug assertions, and code that calls `IsValid()` explicitly. By default,
// assertions should be relatively cheap and AssertValid() can easily lead to
// O(n^2) complexity as recursive / full tree validation is O(n).
extern std::atomic<bool> cord_btree_exhaustive_validation;
inline void enable_cord_btree(bool enable) {
cord_btree_enabled.store(enable, std::memory_order_relaxed);
}
inline void enable_cord_ring_buffer(bool enable) {
cord_ring_buffer_enabled.store(enable, std::memory_order_relaxed);
}
inline void enable_shallow_subcords(bool enable) {
shallow_subcords_enabled.store(enable, std::memory_order_relaxed);
}
enum Constants {
// The inlined size to use with absl::InlinedVector.
//
// Note: The InlinedVectors in this file (and in cord.h) do not need to use
// the same value for their inlined size. The fact that they do is historical.
// It may be desirable for each to use a different inlined size optimized for
// that InlinedVector's usage.
//
// TODO(jgm): Benchmark to see if there's a more optimal value than 47 for
// the inlined vector size (47 exists for backward compatibility).
kInlinedVectorSize = 47,
// Prefer copying blocks of at most this size, otherwise reference count.
kMaxBytesToCopy = 511
};
// Compact class for tracking the reference count and state flags for CordRep
// instances. Data is stored in an atomic int32_t for compactness and speed.
class RefcountAndFlags {
public:
constexpr RefcountAndFlags() : count_{kRefIncrement} {}
struct Immortal {};
explicit constexpr RefcountAndFlags(Immortal) : count_(kImmortalFlag) {}
// Increments the reference count. Imposes no memory ordering.
inline void Increment() {
count_.fetch_add(kRefIncrement, std::memory_order_relaxed);
}
// Asserts that the current refcount is greater than 0. If the refcount is
// greater than 1, decrements the reference count.
//
// Returns false if there are no references outstanding; true otherwise.
// Inserts barriers to ensure that state written before this method returns
// false will be visible to a thread that just observed this method returning
// false. Always returns false when the immortal bit is set.
inline bool Decrement() {
int32_t refcount = count_.load(std::memory_order_acquire) & kRefcountMask;
assert(refcount > 0 || refcount & kImmortalFlag);
return refcount != kRefIncrement &&
(count_.fetch_sub(kRefIncrement, std::memory_order_acq_rel) &
kRefcountMask) != kRefIncrement;
}
// Same as Decrement but expect that refcount is greater than 1.
inline bool DecrementExpectHighRefcount() {
int32_t refcount =
count_.fetch_sub(kRefIncrement, std::memory_order_acq_rel) &
kRefcountMask;
assert(refcount > 0 || refcount & kImmortalFlag);
return refcount != kRefIncrement;
}
// Returns the current reference count using acquire semantics.
inline int32_t Get() const {
return count_.load(std::memory_order_acquire) >> kNumFlags;
}
// Returns whether the atomic integer is 1.
// If the reference count is used in the conventional way, a
// reference count of 1 implies that the current thread owns the
// reference and no other thread shares it.
// This call performs the test for a reference count of one, and
// performs the memory barrier needed for the owning thread
// to act on the object, knowing that it has exclusive access to the
// object. Always returns false when the immortal bit is set.
inline bool IsOne() {
return (count_.load(std::memory_order_acquire) & kRefcountMask) ==
kRefIncrement;
}
bool IsImmortal() const {
return (count_.load(std::memory_order_relaxed) & kImmortalFlag) != 0;
}
private:
// We reserve the bottom bits for flags.
// kImmortalBit indicates that this entity should never be collected; it is
// used for the StringConstant constructor to avoid collecting immutable
// constant cords.
// kReservedFlag is reserved for future use.
enum {
kNumFlags = 2,
kImmortalFlag = 0x1,
kReservedFlag = 0x2,
kRefIncrement = (1 << kNumFlags),
// Bitmask to use when checking refcount by equality. This masks out
// all flags except kImmortalFlag, which is part of the refcount for
// purposes of equality. (A refcount of 0 or 1 does not count as 0 or 1
// if the immortal bit is set.)
kRefcountMask = ~kReservedFlag,
};
std::atomic<int32_t> count_;
};
// The overhead of a vtable is too much for Cord, so we roll our own subclasses
// using only a single byte to differentiate classes from each other - the "tag"
// byte. Define the subclasses first so we can provide downcasting helper
// functions in the base class.
struct CordRepConcat;
struct CordRepExternal;
struct CordRepFlat;
struct CordRepSubstring;
class CordRepRing;
class CordRepBtree;
// Various representations that we allow
enum CordRepKind {
CONCAT = 0,
SUBSTRING = 1,
BTREE = 2,
RING = 3,
EXTERNAL = 4,
// We have different tags for different sized flat arrays,
// starting with FLAT, and limited to MAX_FLAT_TAG. The 225 value is based on
// the current 'size to tag' encoding of 8 / 32 bytes. If a new tag is needed
// in the future, then 'FLAT' and 'MAX_FLAT_TAG' should be adjusted as well
// as the Tag <---> Size logic so that FLAT stil represents the minimum flat
// allocation size. (32 bytes as of now).
FLAT = 5,
MAX_FLAT_TAG = 225
};
// There are various locations where we want to check if some rep is a 'plain'
// data edge, i.e. an external or flat rep. By having FLAT == EXTERNAL + 1, we
// can perform this check in a single branch as 'tag >= EXTERNAL'
// Likewise, we have some locations where we check for 'ring or external/flat',
// so likewise align RING to EXTERNAL.
// Note that we can leave this optimization to the compiler. The compiler will
// DTRT when it sees a condition like `tag == EXTERNAL || tag >= FLAT`.
static_assert(RING == BTREE + 1, "BTREE and RING not consecutive");
static_assert(EXTERNAL == RING + 1, "BTREE and EXTERNAL not consecutive");
static_assert(FLAT == EXTERNAL + 1, "EXTERNAL and FLAT not consecutive");
struct CordRep {
CordRep() = default;
constexpr CordRep(RefcountAndFlags::Immortal immortal, size_t l)
: length(l), refcount(immortal), tag(EXTERNAL), storage{} {}
// The following three fields have to be less than 32 bytes since
// that is the smallest supported flat node size.
size_t length;
RefcountAndFlags refcount;
// If tag < FLAT, it represents CordRepKind and indicates the type of node.
// Otherwise, the node type is CordRepFlat and the tag is the encoded size.
uint8_t tag;
// `storage` provides two main purposes:
// - the starting point for FlatCordRep.Data() [flexible-array-member]
// - 3 bytes of additional storage for use by derived classes.
// The latter is used by CordrepConcat and CordRepBtree. CordRepConcat stores
// a 'depth' value in storage[0], and the (future) CordRepBtree class stores
// `height`, `begin` and `end` in the 3 entries. Otherwise we would need to
// allocate room for these in the derived class, as not all compilers reuse
// padding space from the base class (clang and gcc do, MSVC does not, etc)
uint8_t storage[3];
// Returns true if this instance's tag matches the requested type.
constexpr bool IsRing() const { return tag == RING; }
constexpr bool IsConcat() const { return tag == CONCAT; }
constexpr bool IsSubstring() const { return tag == SUBSTRING; }
constexpr bool IsExternal() const { return tag == EXTERNAL; }
constexpr bool IsFlat() const { return tag >= FLAT; }
constexpr bool IsBtree() const { return tag == BTREE; }
inline CordRepRing* ring();
inline const CordRepRing* ring() const;
inline CordRepConcat* concat();
inline const CordRepConcat* concat() const;
inline CordRepSubstring* substring();
inline const CordRepSubstring* substring() const;
inline CordRepExternal* external();
inline const CordRepExternal* external() const;
inline CordRepFlat* flat();
inline const CordRepFlat* flat() const;
inline CordRepBtree* btree();
inline const CordRepBtree* btree() const;
// --------------------------------------------------------------------
// Memory management
// Destroys the provided `rep`.
static void Destroy(CordRep* rep);
// Increments the reference count of `rep`.
// Requires `rep` to be a non-null pointer value.
static inline CordRep* Ref(CordRep* rep);
// Decrements the reference count of `rep`. Destroys rep if count reaches
// zero. Requires `rep` to be a non-null pointer value.
static inline void Unref(CordRep* rep);
};
struct CordRepConcat : public CordRep {
CordRep* left;
CordRep* right;
uint8_t depth() const { return storage[0]; }
void set_depth(uint8_t depth) { storage[0] = depth; }
};
struct CordRepSubstring : public CordRep {
size_t start; // Starting offset of substring in child
CordRep* child;
};
// Type for function pointer that will invoke the releaser function and also
// delete the `CordRepExternalImpl` corresponding to the passed in
// `CordRepExternal`.
using ExternalReleaserInvoker = void (*)(CordRepExternal*);
// External CordReps are allocated together with a type erased releaser. The
// releaser is stored in the memory directly following the CordRepExternal.
struct CordRepExternal : public CordRep {
CordRepExternal() = default;
explicit constexpr CordRepExternal(absl::string_view str)
: CordRep(RefcountAndFlags::Immortal{}, str.size()),
base(str.data()),
releaser_invoker(nullptr) {}
const char* base;
// Pointer to function that knows how to call and destroy the releaser.
ExternalReleaserInvoker releaser_invoker;
// Deletes (releases) the external rep.
// Requires rep != nullptr and rep->IsExternal()
static void Delete(CordRep* rep);
};
struct Rank1 {};
struct Rank0 : Rank1 {};
template <typename Releaser, typename = ::absl::base_internal::invoke_result_t<
Releaser, absl::string_view>>
void InvokeReleaser(Rank0, Releaser&& releaser, absl::string_view data) {
::absl::base_internal::invoke(std::forward<Releaser>(releaser), data);
}
template <typename Releaser,
typename = ::absl::base_internal::invoke_result_t<Releaser>>
void InvokeReleaser(Rank1, Releaser&& releaser, absl::string_view) {
::absl::base_internal::invoke(std::forward<Releaser>(releaser));
}
// We use CompressedTuple so that we can benefit from EBCO.
template <typename Releaser>
struct CordRepExternalImpl
: public CordRepExternal,
public ::absl::container_internal::CompressedTuple<Releaser> {
// The extra int arg is so that we can avoid interfering with copy/move
// constructors while still benefitting from perfect forwarding.
template <typename T>
CordRepExternalImpl(T&& releaser, int)
: CordRepExternalImpl::CompressedTuple(std::forward<T>(releaser)) {
this->releaser_invoker = &Release;
}
~CordRepExternalImpl() {
InvokeReleaser(Rank0{}, std::move(this->template get<0>()),
absl::string_view(base, length));
}
static void Release(CordRepExternal* rep) {
delete static_cast<CordRepExternalImpl*>(rep);
}
};
inline void CordRepExternal::Delete(CordRep* rep) {
assert(rep != nullptr && rep->IsExternal());
auto* rep_external = static_cast<CordRepExternal*>(rep);
assert(rep_external->releaser_invoker != nullptr);
rep_external->releaser_invoker(rep_external);
}
template <typename Str>
struct ConstInitExternalStorage {
ABSL_CONST_INIT static CordRepExternal value;
};
template <typename Str>
CordRepExternal ConstInitExternalStorage<Str>::value(Str::value);
enum {
kMaxInline = 15,
};
constexpr char GetOrNull(absl::string_view data, size_t pos) {
return pos < data.size() ? data[pos] : '\0';
}
// We store cordz_info as 64 bit pointer value in big endian format. This
// guarantees that the least significant byte of cordz_info matches the last
// byte of the inline data representation in as_chars_, which holds the inlined
// size or the 'is_tree' bit.
using cordz_info_t = int64_t;
// Assert that the `cordz_info` pointer value perfectly overlaps the last half
// of `as_chars_` and can hold a pointer value.
static_assert(sizeof(cordz_info_t) * 2 == kMaxInline + 1, "");
static_assert(sizeof(cordz_info_t) >= sizeof(intptr_t), "");
// BigEndianByte() creates a big endian representation of 'value', i.e.: a big
// endian value where the last byte in the host's representation holds 'value`,
// with all other bytes being 0.
static constexpr cordz_info_t BigEndianByte(unsigned char value) {
#if defined(ABSL_IS_BIG_ENDIAN)
return value;
#else
return static_cast<cordz_info_t>(value) << ((sizeof(cordz_info_t) - 1) * 8);
#endif
}
class InlineData {
public:
// DefaultInitType forces the use of the default initialization constructor.
enum DefaultInitType { kDefaultInit };
// kNullCordzInfo holds the big endian representation of intptr_t(1)
// This is the 'null' / initial value of 'cordz_info'. The null value
// is specifically big endian 1 as with 64-bit pointers, the last
// byte of cordz_info overlaps with the last byte holding the tag.
static constexpr cordz_info_t kNullCordzInfo = BigEndianByte(1);
constexpr InlineData() : as_chars_{0} {}
explicit InlineData(DefaultInitType) {}
explicit constexpr InlineData(CordRep* rep) : as_tree_(rep) {}
explicit constexpr InlineData(absl::string_view chars)
: as_chars_{
GetOrNull(chars, 0), GetOrNull(chars, 1),
GetOrNull(chars, 2), GetOrNull(chars, 3),
GetOrNull(chars, 4), GetOrNull(chars, 5),
GetOrNull(chars, 6), GetOrNull(chars, 7),
GetOrNull(chars, 8), GetOrNull(chars, 9),
GetOrNull(chars, 10), GetOrNull(chars, 11),
GetOrNull(chars, 12), GetOrNull(chars, 13),
GetOrNull(chars, 14), static_cast<char>((chars.size() << 1))} {}
// Returns true if the current instance is empty.
// The 'empty value' is an inlined data value of zero length.
bool is_empty() const { return tag() == 0; }
// Returns true if the current instance holds a tree value.
bool is_tree() const { return (tag() & 1) != 0; }
// Returns true if the current instance holds a cordz_info value.
// Requires the current instance to hold a tree value.
bool is_profiled() const {
assert(is_tree());
return as_tree_.cordz_info != kNullCordzInfo;
}
// Returns true if either of the provided instances hold a cordz_info value.
// This method is more efficient than the equivalent `data1.is_profiled() ||
// data2.is_profiled()`. Requires both arguments to hold a tree.
static bool is_either_profiled(const InlineData& data1,
const InlineData& data2) {
assert(data1.is_tree() && data2.is_tree());
return (data1.as_tree_.cordz_info | data2.as_tree_.cordz_info) !=
kNullCordzInfo;
}
// Returns the cordz_info sampling instance for this instance, or nullptr
// if the current instance is not sampled and does not have CordzInfo data.
// Requires the current instance to hold a tree value.
CordzInfo* cordz_info() const {
assert(is_tree());
intptr_t info =
static_cast<intptr_t>(absl::big_endian::ToHost64(as_tree_.cordz_info));
assert(info & 1);
return reinterpret_cast<CordzInfo*>(info - 1);
}
// Sets the current cordz_info sampling instance for this instance, or nullptr
// if the current instance is not sampled and does not have CordzInfo data.
// Requires the current instance to hold a tree value.
void set_cordz_info(CordzInfo* cordz_info) {
assert(is_tree());
intptr_t info = reinterpret_cast<intptr_t>(cordz_info) | 1;
as_tree_.cordz_info = absl::big_endian::FromHost64(info);
}
// Resets the current cordz_info to null / empty.
void clear_cordz_info() {
assert(is_tree());
as_tree_.cordz_info = kNullCordzInfo;
}
// Returns a read only pointer to the character data inside this instance.
// Requires the current instance to hold inline data.
const char* as_chars() const {
assert(!is_tree());
return as_chars_;
}
// Returns a mutable pointer to the character data inside this instance.
// Should be used for 'write only' operations setting an inlined value.
// Applications can set the value of inlined data either before or after
// setting the inlined size, i.e., both of the below are valid:
//
// // Set inlined data and inline size
// memcpy(data_.as_chars(), data, size);
// data_.set_inline_size(size);
//
// // Set inlined size and inline data
// data_.set_inline_size(size);
// memcpy(data_.as_chars(), data, size);
//
// It's an error to read from the returned pointer without a preceding write
// if the current instance does not hold inline data, i.e.: is_tree() == true.
char* as_chars() { return as_chars_; }
// Returns the tree value of this value.
// Requires the current instance to hold a tree value.
CordRep* as_tree() const {
assert(is_tree());
return as_tree_.rep;
}
// Initialize this instance to holding the tree value `rep`,
// initializing the cordz_info to null, i.e.: 'not profiled'.
void make_tree(CordRep* rep) {
as_tree_.rep = rep;
as_tree_.cordz_info = kNullCordzInfo;
}
// Set the tree value of this instance to 'rep`.
// Requires the current instance to already hold a tree value.
// Does not affect the value of cordz_info.
void set_tree(CordRep* rep) {
assert(is_tree());
as_tree_.rep = rep;
}
// Returns the size of the inlined character data inside this instance.
// Requires the current instance to hold inline data.
size_t inline_size() const {
assert(!is_tree());
return tag() >> 1;
}
// Sets the size of the inlined character data inside this instance.
// Requires `size` to be <= kMaxInline.
// See the documentation on 'as_chars()' for more information and examples.
void set_inline_size(size_t size) {
ABSL_ASSERT(size <= kMaxInline);
tag() = static_cast<char>(size << 1);
}
private:
// See cordz_info_t for forced alignment and size of `cordz_info` details.
struct AsTree {
explicit constexpr AsTree(absl::cord_internal::CordRep* tree)
: rep(tree), cordz_info(kNullCordzInfo) {}
// This union uses up extra space so that whether rep is 32 or 64 bits,
// cordz_info will still start at the eighth byte, and the last
// byte of cordz_info will still be the last byte of InlineData.
union {
absl::cord_internal::CordRep* rep;
cordz_info_t unused_aligner;
};
cordz_info_t cordz_info;
};
char& tag() { return reinterpret_cast<char*>(this)[kMaxInline]; }
char tag() const { return reinterpret_cast<const char*>(this)[kMaxInline]; }
// If the data has length <= kMaxInline, we store it in `as_chars_`, and
// store the size in the last char of `as_chars_` shifted left + 1.
// Else we store it in a tree and store a pointer to that tree in
// `as_tree_.rep` and store a tag in `tagged_size`.
union {
char as_chars_[kMaxInline + 1];
AsTree as_tree_;
};
};
static_assert(sizeof(InlineData) == kMaxInline + 1, "");
inline CordRepConcat* CordRep::concat() {
assert(IsConcat());
return static_cast<CordRepConcat*>(this);
}
inline const CordRepConcat* CordRep::concat() const {
assert(IsConcat());
return static_cast<const CordRepConcat*>(this);
}
inline CordRepSubstring* CordRep::substring() {
assert(IsSubstring());
return static_cast<CordRepSubstring*>(this);
}
inline const CordRepSubstring* CordRep::substring() const {
assert(IsSubstring());
return static_cast<const CordRepSubstring*>(this);
}
inline CordRepExternal* CordRep::external() {
assert(IsExternal());
return static_cast<CordRepExternal*>(this);
}
inline const CordRepExternal* CordRep::external() const {
assert(IsExternal());
return static_cast<const CordRepExternal*>(this);
}
inline CordRep* CordRep::Ref(CordRep* rep) {
assert(rep != nullptr);
rep->refcount.Increment();
return rep;
}
inline void CordRep::Unref(CordRep* rep) {
assert(rep != nullptr);
// Expect refcount to be 0. Avoiding the cost of an atomic decrement should
// typically outweigh the cost of an extra branch checking for ref == 1.
if (ABSL_PREDICT_FALSE(!rep->refcount.DecrementExpectHighRefcount())) {
Destroy(rep);
}
}
} // namespace cord_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_CORD_INTERNAL_H_

View File

@@ -0,0 +1,954 @@
// Copyright 2021 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/cord_rep_btree.h"
#include <cassert>
#include <cstdint>
#include <iostream>
#include <string>
#include "absl/base/attributes.h"
#include "absl/base/config.h"
#include "absl/base/internal/raw_logging.h"
#include "absl/strings/internal/cord_internal.h"
#include "absl/strings/internal/cord_rep_consume.h"
#include "absl/strings/internal/cord_rep_flat.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cord_internal {
constexpr size_t CordRepBtree::kMaxCapacity; // NOLINT: needed for c++ < c++17
namespace {
using NodeStack = CordRepBtree * [CordRepBtree::kMaxDepth];
using EdgeType = CordRepBtree::EdgeType;
using OpResult = CordRepBtree::OpResult;
using CopyResult = CordRepBtree::CopyResult;
constexpr auto kFront = CordRepBtree::kFront;
constexpr auto kBack = CordRepBtree::kBack;
inline bool exhaustive_validation() {
return cord_btree_exhaustive_validation.load(std::memory_order_relaxed);
}
// Implementation of the various 'Dump' functions.
// Prints the entire tree structure or 'rep'. External callers should
// not specify 'depth' and leave it to its default (0) value.
// Rep may be a CordRepBtree tree, or a SUBSTRING / EXTERNAL / FLAT node.
void DumpAll(const CordRep* rep, bool include_contents, std::ostream& stream,
int depth = 0) {
// Allow for full height trees + substring -> flat / external nodes.
assert(depth <= CordRepBtree::kMaxDepth + 2);
std::string sharing = const_cast<CordRep*>(rep)->refcount.IsOne()
? std::string("Private")
: absl::StrCat("Shared(", rep->refcount.Get(), ")");
std::string sptr = absl::StrCat("0x", absl::Hex(rep));
// Dumps the data contents of `rep` if `include_contents` is true.
// Always emits a new line character.
auto maybe_dump_data = [&stream, include_contents](const CordRep* r) {
if (include_contents) {
// Allow for up to 60 wide display of content data, which with some
// indentation and prefix / labels keeps us within roughly 80-100 wide.
constexpr size_t kMaxDataLength = 60;
stream << ", data = \""
<< CordRepBtree::EdgeData(r).substr(0, kMaxDataLength)
<< (r->length > kMaxDataLength ? "\"..." : "\"");
}
stream << '\n';
};
// For each level, we print the 'shared/private' state and the rep pointer,
// indented by two spaces per recursive depth.
stream << std::string(depth * 2, ' ') << sharing << " (" << sptr << ") ";
if (rep->IsBtree()) {
const CordRepBtree* node = rep->btree();
std::string label =
node->height() ? absl::StrCat("Node(", node->height(), ")") : "Leaf";
stream << label << ", len = " << node->length
<< ", begin = " << node->begin() << ", end = " << node->end()
<< "\n";
for (CordRep* edge : node->Edges()) {
DumpAll(edge, include_contents, stream, depth + 1);
}
} else if (rep->tag == SUBSTRING) {
const CordRepSubstring* substring = rep->substring();
stream << "Substring, len = " << rep->length
<< ", start = " << substring->start;
maybe_dump_data(rep);
DumpAll(substring->child, include_contents, stream, depth + 1);
} else if (rep->tag >= FLAT) {
stream << "Flat, len = " << rep->length;
maybe_dump_data(rep);
} else if (rep->tag == EXTERNAL) {
stream << "Extn, len = " << rep->length;
maybe_dump_data(rep);
}
}
// TODO(b/192061034): add 'bytes to copy' logic to avoid large slop on substring
// small data out of large reps, and general efficiency of 'always copy small
// data'. Consider making this a cord rep internal library function.
CordRepSubstring* CreateSubstring(CordRep* rep, size_t offset, size_t n) {
assert(n != 0);
assert(offset + n <= rep->length);
assert(offset != 0 || n != rep->length);
if (rep->tag == SUBSTRING) {
CordRepSubstring* substring = rep->substring();
offset += substring->start;
rep = CordRep::Ref(substring->child);
CordRep::Unref(substring);
}
CordRepSubstring* substring = new CordRepSubstring();
substring->length = n;
substring->tag = SUBSTRING;
substring->start = offset;
substring->child = rep;
return substring;
}
// TODO(b/192061034): consider making this a cord rep library function.
inline CordRep* MakeSubstring(CordRep* rep, size_t offset, size_t n) {
if (n == rep->length) return rep;
if (n == 0) return CordRep::Unref(rep), nullptr;
return CreateSubstring(rep, offset, n);
}
// TODO(b/192061034): consider making this a cord rep library function.
inline CordRep* MakeSubstring(CordRep* rep, size_t offset) {
if (offset == 0) return rep;
return CreateSubstring(rep, offset, rep->length - offset);
}
template <EdgeType edge_type>
inline absl::string_view Consume(absl::string_view s, size_t n) {
return edge_type == kBack ? s.substr(n) : s.substr(0, s.size() - n);
}
template <EdgeType edge_type>
inline absl::string_view Consume(char* dst, absl::string_view s, size_t n) {
if (edge_type == kBack) {
memcpy(dst, s.data(), n);
return s.substr(n);
} else {
const size_t offset = s.size() - n;
memcpy(dst, s.data() + offset, n);
return s.substr(0, offset);
}
}
// Known issue / optimization weirdness: the store associated with the
// decrement introduces traffic between cpus (even if the result of that
// traffic does nothing), making this faster than a single call to
// refcount.Decrement() checking the zero refcount condition.
template <typename R, typename Fn>
inline void FastUnref(R* r, Fn&& fn) {
if (r->refcount.IsOne()) {
fn(r);
} else if (!r->refcount.DecrementExpectHighRefcount()) {
fn(r);
}
}
// Deletes a leaf node data edge. Requires `rep` to be an EXTERNAL or FLAT
// node, or a SUBSTRING of an EXTERNAL or FLAT node.
void DeleteLeafEdge(CordRep* rep) {
for (;;) {
if (rep->tag >= FLAT) {
CordRepFlat::Delete(rep->flat());
return;
}
if (rep->tag == EXTERNAL) {
CordRepExternal::Delete(rep->external());
return;
}
assert(rep->tag == SUBSTRING);
CordRepSubstring* substring = rep->substring();
rep = substring->child;
assert(rep->tag == EXTERNAL || rep->tag >= FLAT);
delete substring;
if (rep->refcount.Decrement()) return;
}
}
// StackOperations contains the logic to build a left-most or right-most stack
// (leg) down to the leaf level of a btree, and 'unwind' / 'Finalize' methods to
// propagate node changes up the stack.
template <EdgeType edge_type>
struct StackOperations {
// Returns true if the node at 'depth' is not shared, i.e. has a refcount
// of one and all of its parent nodes have a refcount of one.
inline bool owned(int depth) const { return depth < share_depth; }
// Returns the node at 'depth'.
inline CordRepBtree* node(int depth) const { return stack[depth]; }
// Builds a `depth` levels deep stack starting at `tree` recording which nodes
// are private in the form of the 'share depth' where nodes are shared.
inline CordRepBtree* BuildStack(CordRepBtree* tree, int depth) {
assert(depth <= tree->height());
int current_depth = 0;
while (current_depth < depth && tree->refcount.IsOne()) {
stack[current_depth++] = tree;
tree = tree->Edge(edge_type)->btree();
}
share_depth = current_depth + (tree->refcount.IsOne() ? 1 : 0);
while (current_depth < depth) {
stack[current_depth++] = tree;
tree = tree->Edge(edge_type)->btree();
}
return tree;
}
// Builds a stack with the invariant that all nodes are private owned / not
// shared. This is used in iterative updates where a previous propagation
// guaranteed all nodes are owned / private.
inline void BuildOwnedStack(CordRepBtree* tree, int height) {
assert(height <= CordRepBtree::kMaxHeight);
int depth = 0;
while (depth < height) {
assert(tree->refcount.IsOne());
stack[depth++] = tree;
tree = tree->Edge(edge_type)->btree();
}
assert(tree->refcount.IsOne());
share_depth = depth + 1;
}
// Processes the final 'top level' result action for the tree.
// See the 'Action' enum for the various action implications.
static inline CordRepBtree* Finalize(CordRepBtree* tree, OpResult result) {
switch (result.action) {
case CordRepBtree::kPopped:
if (ABSL_PREDICT_FALSE(tree->height() >= CordRepBtree::kMaxHeight)) {
ABSL_RAW_LOG(FATAL, "Max height exceeded");
}
return edge_type == kBack ? CordRepBtree::New(tree, result.tree)
: CordRepBtree::New(result.tree, tree);
case CordRepBtree::kCopied:
CordRep::Unref(tree);
ABSL_FALLTHROUGH_INTENDED;
case CordRepBtree::kSelf:
return result.tree;
}
ABSL_INTERNAL_UNREACHABLE;
return result.tree;
}
// Propagate the action result in 'result' up into all nodes of the stack
// starting at depth 'depth'. 'length' contains the extra length of data that
// was added at the lowest level, and is updated into all nodes of the stack.
// See the 'Action' enum for the various action implications.
// If 'propagate' is true, then any copied node values are updated into the
// stack, which is used for iterative processing on the same stack.
template <bool propagate = false>
inline CordRepBtree* Unwind(CordRepBtree* tree, int depth, size_t length,
OpResult result) {
// TODO(mvels): revisit the below code to check if 3 loops with 3
// (incremental) conditions is faster than 1 loop with a switch.
// Benchmarking and perf recordings indicate the loop with switch is
// fastest, likely because of indirect jumps on the tight case values and
// dense branches. But it's worth considering 3 loops, as the `action`
// transitions are mono directional. E.g.:
// while (action == kPopped) {
// ...
// }
// while (action == kCopied) {
// ...
// }
// ...
// We also found that an "if () do {}" loop here seems faster, possibly
// because it allows the branch predictor more granular heuristics on
// 'single leaf' (`depth` == 0) and 'single depth' (`depth` == 1) cases
// which appear to be the most common use cases.
if (depth != 0) {
do {
CordRepBtree* node = stack[--depth];
const bool owned = depth < share_depth;
switch (result.action) {
case CordRepBtree::kPopped:
assert(!propagate);
result = node->AddEdge<edge_type>(owned, result.tree, length);
break;
case CordRepBtree::kCopied:
result = node->SetEdge<edge_type>(owned, result.tree, length);
if (propagate) stack[depth] = result.tree;
break;
case CordRepBtree::kSelf:
node->length += length;
while (depth > 0) {
node = stack[--depth];
node->length += length;
}
return node;
}
} while (depth > 0);
}
return Finalize(tree, result);
}
// Invokes `Unwind` with `propagate=true` to update the stack node values.
inline CordRepBtree* Propagate(CordRepBtree* tree, int depth, size_t length,
OpResult result) {
return Unwind</*propagate=*/true>(tree, depth, length, result);
}
// `share_depth` contains the depth at which the nodes in the stack become
// shared. I.e., if the top most level is shared (i.e.: `!refcount.IsOne()`),
// then `share_depth` is 0. If the 2nd node is shared (and implicitly all
// nodes below that) then `share_depth` is 1, etc. A `share_depth` greater
// than the depth of the stack indicates that none of the nodes in the stack
// are shared.
int share_depth;
NodeStack stack;
};
} // namespace
void CordRepBtree::Dump(const CordRep* rep, absl::string_view label,
bool include_contents, std::ostream& stream) {
stream << "===================================\n";
if (!label.empty()) {
stream << label << '\n';
stream << "-----------------------------------\n";
}
if (rep) {
DumpAll(rep, include_contents, stream);
} else {
stream << "NULL\n";
}
}
void CordRepBtree::Dump(const CordRep* rep, absl::string_view label,
std::ostream& stream) {
Dump(rep, label, false, stream);
}
void CordRepBtree::Dump(const CordRep* rep, std::ostream& stream) {
Dump(rep, absl::string_view(), false, stream);
}
void CordRepBtree::DestroyLeaf(CordRepBtree* tree, size_t begin, size_t end) {
for (CordRep* edge : tree->Edges(begin, end)) {
FastUnref(edge, DeleteLeafEdge);
}
Delete(tree);
}
void CordRepBtree::DestroyNonLeaf(CordRepBtree* tree, size_t begin,
size_t end) {
for (CordRep* edge : tree->Edges(begin, end)) {
FastUnref(edge->btree(), Destroy);
}
Delete(tree);
}
bool CordRepBtree::IsValid(const CordRepBtree* tree, bool shallow) {
#define NODE_CHECK_VALID(x) \
if (!(x)) { \
ABSL_RAW_LOG(ERROR, "CordRepBtree::CheckValid() FAILED: %s", #x); \
return false; \
}
#define NODE_CHECK_EQ(x, y) \
if ((x) != (y)) { \
ABSL_RAW_LOG(ERROR, \
"CordRepBtree::CheckValid() FAILED: %s != %s (%s vs %s)", #x, \
#y, absl::StrCat(x).c_str(), absl::StrCat(y).c_str()); \
return false; \
}
NODE_CHECK_VALID(tree != nullptr);
NODE_CHECK_VALID(tree->IsBtree());
NODE_CHECK_VALID(tree->height() <= kMaxHeight);
NODE_CHECK_VALID(tree->begin() < tree->capacity());
NODE_CHECK_VALID(tree->end() <= tree->capacity());
NODE_CHECK_VALID(tree->begin() <= tree->end());
size_t child_length = 0;
for (CordRep* edge : tree->Edges()) {
NODE_CHECK_VALID(edge != nullptr);
if (tree->height() > 0) {
NODE_CHECK_VALID(edge->IsBtree());
NODE_CHECK_VALID(edge->btree()->height() == tree->height() - 1);
} else {
NODE_CHECK_VALID(IsDataEdge(edge));
}
child_length += edge->length;
}
NODE_CHECK_EQ(child_length, tree->length);
if ((!shallow || exhaustive_validation()) && tree->height() > 0) {
for (CordRep* edge : tree->Edges()) {
if (!IsValid(edge->btree(), shallow)) return false;
}
}
return true;
#undef NODE_CHECK_VALID
#undef NODE_CHECK_EQ
}
#ifndef NDEBUG
CordRepBtree* CordRepBtree::AssertValid(CordRepBtree* tree, bool shallow) {
if (!IsValid(tree, shallow)) {
Dump(tree, "CordRepBtree validation failed:", false, std::cout);
ABSL_RAW_LOG(FATAL, "CordRepBtree::CheckValid() FAILED");
}
return tree;
}
const CordRepBtree* CordRepBtree::AssertValid(const CordRepBtree* tree,
bool shallow) {
if (!IsValid(tree, shallow)) {
Dump(tree, "CordRepBtree validation failed:", false, std::cout);
ABSL_RAW_LOG(FATAL, "CordRepBtree::CheckValid() FAILED");
}
return tree;
}
#endif // NDEBUG
template <EdgeType edge_type>
inline OpResult CordRepBtree::AddEdge(bool owned, CordRep* edge, size_t delta) {
if (size() >= kMaxCapacity) return {New(edge), kPopped};
OpResult result = ToOpResult(owned);
result.tree->Add<edge_type>(edge);
result.tree->length += delta;
return result;
}
template <EdgeType edge_type>
OpResult CordRepBtree::SetEdge(bool owned, CordRep* edge, size_t delta) {
OpResult result;
const size_t idx = index(edge_type);
if (owned) {
result = {this, kSelf};
CordRep::Unref(edges_[idx]);
} else {
// Create a copy containing all unchanged edges. Unchanged edges are the
// open interval [begin, back) or [begin + 1, end) depending on `edge_type`.
// We conveniently cover both case using a constexpr `shift` being 0 or 1
// as `end :== back + 1`.
result = {CopyRaw(), kCopied};
constexpr int shift = edge_type == kFront ? 1 : 0;
for (CordRep* r : Edges(begin() + shift, back() + shift)) {
CordRep::Ref(r);
}
}
result.tree->edges_[idx] = edge;
result.tree->length += delta;
return result;
}
template <EdgeType edge_type>
CordRepBtree* CordRepBtree::AddCordRep(CordRepBtree* tree, CordRep* rep) {
const int depth = tree->height();
const size_t length = rep->length;
StackOperations<edge_type> ops;
CordRepBtree* leaf = ops.BuildStack(tree, depth);
const OpResult result =
leaf->AddEdge<edge_type>(ops.owned(depth), rep, length);
return ops.Unwind(tree, depth, length, result);
}
template <>
CordRepBtree* CordRepBtree::NewLeaf<kBack>(absl::string_view data,
size_t extra) {
CordRepBtree* leaf = CordRepBtree::New(0);
size_t length = 0;
size_t end = 0;
const size_t cap = leaf->capacity();
while (!data.empty() && end != cap) {
auto* flat = CordRepFlat::New(data.length() + extra);
flat->length = (std::min)(data.length(), flat->Capacity());
length += flat->length;
leaf->edges_[end++] = flat;
data = Consume<kBack>(flat->Data(), data, flat->length);
}
leaf->length = length;
leaf->set_end(end);
return leaf;
}
template <>
CordRepBtree* CordRepBtree::NewLeaf<kFront>(absl::string_view data,
size_t extra) {
CordRepBtree* leaf = CordRepBtree::New(0);
size_t length = 0;
size_t begin = leaf->capacity();
leaf->set_end(leaf->capacity());
while (!data.empty() && begin != 0) {
auto* flat = CordRepFlat::New(data.length() + extra);
flat->length = (std::min)(data.length(), flat->Capacity());
length += flat->length;
leaf->edges_[--begin] = flat;
data = Consume<kFront>(flat->Data(), data, flat->length);
}
leaf->length = length;
leaf->set_begin(begin);
return leaf;
}
template <>
absl::string_view CordRepBtree::AddData<kBack>(absl::string_view data,
size_t extra) {
assert(!data.empty());
assert(size() < capacity());
AlignBegin();
const size_t cap = capacity();
do {
CordRepFlat* flat = CordRepFlat::New(data.length() + extra);
const size_t n = (std::min)(data.length(), flat->Capacity());
flat->length = n;
edges_[fetch_add_end(1)] = flat;
data = Consume<kBack>(flat->Data(), data, n);
} while (!data.empty() && end() != cap);
return data;
}
template <>
absl::string_view CordRepBtree::AddData<kFront>(absl::string_view data,
size_t extra) {
assert(!data.empty());
assert(size() < capacity());
AlignEnd();
do {
CordRepFlat* flat = CordRepFlat::New(data.length() + extra);
const size_t n = (std::min)(data.length(), flat->Capacity());
flat->length = n;
edges_[sub_fetch_begin(1)] = flat;
data = Consume<kFront>(flat->Data(), data, n);
} while (!data.empty() && begin() != 0);
return data;
}
template <EdgeType edge_type>
CordRepBtree* CordRepBtree::AddData(CordRepBtree* tree, absl::string_view data,
size_t extra) {
if (ABSL_PREDICT_FALSE(data.empty())) return tree;
const size_t original_data_size = data.size();
int depth = tree->height();
StackOperations<edge_type> ops;
CordRepBtree* leaf = ops.BuildStack(tree, depth);
// If there is capacity in the last edge, append as much data
// as possible into this last edge.
if (leaf->size() < leaf->capacity()) {
OpResult result = leaf->ToOpResult(ops.owned(depth));
data = result.tree->AddData<edge_type>(data, extra);
if (data.empty()) {
result.tree->length += original_data_size;
return ops.Unwind(tree, depth, original_data_size, result);
}
// We added some data into this leaf, but not all. Propagate the added
// length to the top most node, and rebuild the stack with any newly copied
// or updated nodes. From this point on, the path (leg) from the top most
// node to the right-most node towards the leaf node is privately owned.
size_t delta = original_data_size - data.size();
assert(delta > 0);
result.tree->length += delta;
tree = ops.Propagate(tree, depth, delta, result);
ops.share_depth = depth + 1;
}
// We were unable to append all data into the existing right-most leaf node.
// This means all remaining data must be put into (a) new leaf node(s) which
// we append to the tree. To make this efficient, we iteratively build full
// leaf nodes from `data` until the created leaf contains all remaining data.
// We utilize the `Unwind` method to merge the created leaf into the first
// level towards root that has capacity. On each iteration with remaining
// data, we rebuild the stack in the knowledge that right-most nodes are
// privately owned after the first `Unwind` completes.
for (;;) {
OpResult result = {CordRepBtree::NewLeaf<edge_type>(data, extra), kPopped};
if (result.tree->length == data.size()) {
return ops.Unwind(tree, depth, result.tree->length, result);
}
data = Consume<edge_type>(data, result.tree->length);
tree = ops.Unwind(tree, depth, result.tree->length, result);
depth = tree->height();
ops.BuildOwnedStack(tree, depth);
}
}
template <EdgeType edge_type>
CordRepBtree* CordRepBtree::Merge(CordRepBtree* dst, CordRepBtree* src) {
assert(dst->height() >= src->height());
// Capture source length as we may consume / destroy `src`.
const size_t length = src->length;
// We attempt to merge `src` at its corresponding height in `dst`.
const int depth = dst->height() - src->height();
StackOperations<edge_type> ops;
CordRepBtree* merge_node = ops.BuildStack(dst, depth);
// If there is enough space in `merge_node` for all edges from `src`, add all
// edges to this node, making a fresh copy as needed if not privately owned.
// If `merge_node` does not have capacity for `src`, we rely on `Unwind` and
// `Finalize` to merge `src` into the first level towards `root` where there
// is capacity for another edge, or create a new top level node.
OpResult result;
if (merge_node->size() + src->size() <= kMaxCapacity) {
result = merge_node->ToOpResult(ops.owned(depth));
result.tree->Add<edge_type>(src->Edges());
result.tree->length += src->length;
if (src->refcount.IsOne()) {
Delete(src);
} else {
for (CordRep* edge : src->Edges()) CordRep::Ref(edge);
CordRepBtree::Unref(src);
}
} else {
result = {src, kPopped};
}
// Unless we merged at the top level (i.e.: src and dst are equal height),
// unwind the result towards the top level, and finalize the result.
if (depth) {
return ops.Unwind(dst, depth, length, result);
}
return ops.Finalize(dst, result);
}
CopyResult CordRepBtree::CopySuffix(size_t offset) {
assert(offset < this->length);
// As long as `offset` starts inside the last edge, we can 'drop' the current
// depth. For the most extreme example: if offset references the last data
// edge in the tree, there is only a single edge / path from the top of the
// tree to that last edge, so we can drop all the nodes except that edge.
// The fast path check for this is `back->length >= length - offset`.
int height = this->height();
CordRepBtree* node = this;
size_t len = node->length - offset;
CordRep* back = node->Edge(kBack);
while (back->length >= len) {
offset = back->length - len;
if (--height < 0) {
return {MakeSubstring(CordRep::Ref(back), offset), height};
}
node = back->btree();
back = node->Edge(kBack);
}
if (offset == 0) return {CordRep::Ref(node), height};
// Offset does not point into the last edge, so we span at least two edges.
// Find the index of offset with `IndexBeyond` which provides us the edge
// 'beyond' the offset if offset is not a clean starting point of an edge.
Position pos = node->IndexBeyond(offset);
CordRepBtree* sub = node->CopyToEndFrom(pos.index, len);
const CopyResult result = {sub, height};
// `pos.n` contains a non zero value if the offset is not an exact starting
// point of an edge. In this case, `pos.n` contains the 'trailing' amount of
// bytes of the edge preceding that in `pos.index`. We need to iteratively
// adjust the preceding edge with the 'broken' offset until we have a perfect
// start of the edge.
while (pos.n != 0) {
assert(pos.index >= 1);
const size_t begin = pos.index - 1;
sub->set_begin(begin);
CordRep* const edge = node->Edge(begin);
len = pos.n;
offset = edge->length - len;
if (--height < 0) {
sub->edges_[begin] = MakeSubstring(CordRep::Ref(edge), offset, len);
return result;
}
node = edge->btree();
pos = node->IndexBeyond(offset);
CordRepBtree* nsub = node->CopyToEndFrom(pos.index, len);
sub->edges_[begin] = nsub;
sub = nsub;
}
sub->set_begin(pos.index);
return result;
}
CopyResult CordRepBtree::CopyPrefix(size_t n) {
assert(n > 0);
assert(n <= this->length);
// As long as `n` does not exceed the length of the first edge, we can 'drop'
// the current depth. For the most extreme example: if we'd copy a 1 byte
// prefix from a tree, there is only a single edge / path from the top of the
// tree to the single data edge containing this byte, so we can drop all the
// nodes except the data node.
int height = this->height();
CordRepBtree* node = this;
CordRep* front = node->Edge(kFront);
while (front->length >= n) {
if (--height < 0) return {MakeSubstring(CordRep::Ref(front), 0, n), -1};
node = front->btree();
front = node->Edge(kFront);
}
if (node->length == n) return {CordRep::Ref(node), height};
// `n` spans at least two nodes, find the end point of the span.
Position pos = node->IndexOf(n);
// Create a partial copy of the node up to `pos.index`, with a defined length
// of `n`. Any 'partial last edge' is added further below as needed.
CordRepBtree* sub = node->CopyBeginTo(pos.index, n);
const CopyResult result = {sub, height};
// `pos.n` contains the 'offset inside the edge for IndexOf(n)'. As long as
// this is not zero, we don't have a 'clean cut', so we need to make a
// (partial) copy of that last edge, and repeat this until pos.n is zero.
while (pos.n != 0) {
size_t end = pos.index;
n = pos.n;
CordRep* edge = node->Edge(pos.index);
if (--height < 0) {
sub->edges_[end++] = MakeSubstring(CordRep::Ref(edge), 0, n);
sub->set_end(end);
AssertValid(result.edge->btree());
return result;
}
node = edge->btree();
pos = node->IndexOf(n);
CordRepBtree* nsub = node->CopyBeginTo(pos.index, n);
sub->edges_[end++] = nsub;
sub->set_end(end);
sub = nsub;
}
sub->set_end(pos.index);
AssertValid(result.edge->btree());
return result;
}
CordRep* CordRepBtree::SubTree(size_t offset, size_t n) {
assert(n <= this->length);
assert(offset <= this->length - n);
if (ABSL_PREDICT_FALSE(n == 0)) return nullptr;
CordRepBtree* node = this;
int height = node->height();
Position front = node->IndexOf(offset);
CordRep* left = node->edges_[front.index];
while (front.n + n <= left->length) {
if (--height < 0) return MakeSubstring(CordRep::Ref(left), front.n, n);
node = left->btree();
front = node->IndexOf(front.n);
left = node->edges_[front.index];
}
const Position back = node->IndexBefore(front, n);
CordRep* const right = node->edges_[back.index];
assert(back.index > front.index);
// Get partial suffix and prefix entries.
CopyResult prefix;
CopyResult suffix;
if (height > 0) {
// Copy prefix and suffix of the boundary nodes.
prefix = left->btree()->CopySuffix(front.n);
suffix = right->btree()->CopyPrefix(back.n);
// If there is an edge between the prefix and suffix edges, then the tree
// must remain at its previous (full) height. If we have no edges between
// prefix and suffix edges, then the tree must be as high as either the
// suffix or prefix edges (which are collapsed to their minimum heights).
if (front.index + 1 == back.index) {
height = (std::max)(prefix.height, suffix.height) + 1;
}
// Raise prefix and suffixes to the new tree height.
for (int h = prefix.height + 1; h < height; ++h) {
prefix.edge = CordRepBtree::New(prefix.edge);
}
for (int h = suffix.height + 1; h < height; ++h) {
suffix.edge = CordRepBtree::New(suffix.edge);
}
} else {
// Leaf node, simply take substrings for prefix and suffix.
prefix = CopyResult{MakeSubstring(CordRep::Ref(left), front.n), -1};
suffix = CopyResult{MakeSubstring(CordRep::Ref(right), 0, back.n), -1};
}
// Compose resulting tree.
CordRepBtree* sub = CordRepBtree::New(height);
size_t end = 0;
sub->edges_[end++] = prefix.edge;
for (CordRep* r : node->Edges(front.index + 1, back.index)) {
sub->edges_[end++] = CordRep::Ref(r);
}
sub->edges_[end++] = suffix.edge;
sub->set_end(end);
sub->length = n;
return AssertValid(sub);
}
CordRepBtree* CordRepBtree::MergeTrees(CordRepBtree* left,
CordRepBtree* right) {
return left->height() >= right->height() ? Merge<kBack>(left, right)
: Merge<kFront>(right, left);
}
bool CordRepBtree::IsFlat(absl::string_view* fragment) const {
if (height() == 0 && size() == 1) {
if (fragment) *fragment = Data(begin());
return true;
}
return false;
}
bool CordRepBtree::IsFlat(size_t offset, const size_t n,
absl::string_view* fragment) const {
assert(n <= this->length);
assert(offset <= this->length - n);
if (ABSL_PREDICT_FALSE(n == 0)) return false;
int height = this->height();
const CordRepBtree* node = this;
for (;;) {
const Position front = node->IndexOf(offset);
const CordRep* edge = node->Edge(front.index);
if (edge->length < front.n + n) return false;
if (--height < 0) {
if (fragment) *fragment = EdgeData(edge).substr(front.n, n);
return true;
}
offset = front.n;
node = node->Edge(front.index)->btree();
}
}
char CordRepBtree::GetCharacter(size_t offset) const {
assert(offset < length);
const CordRepBtree* node = this;
int height = node->height();
for (;;) {
Position front = node->IndexOf(offset);
if (--height < 0) return node->Data(front.index)[front.n];
offset = front.n;
node = node->Edge(front.index)->btree();
}
}
Span<char> CordRepBtree::GetAppendBufferSlow(size_t size) {
// The inlined version in `GetAppendBuffer()` deals with all heights <= 3.
assert(height() >= 4);
assert(refcount.IsOne());
// Build a stack of nodes we may potentially need to update if we find a
// non-shared FLAT with capacity at the leaf level.
const int depth = height();
CordRepBtree* node = this;
CordRepBtree* stack[kMaxDepth];
for (int i = 0; i < depth; ++i) {
node = node->Edge(kBack)->btree();
if (!node->refcount.IsOne()) return {};
stack[i] = node;
}
// Must be a privately owned flat.
CordRep* const edge = node->Edge(kBack);
if (!edge->refcount.IsOne() || edge->tag < FLAT) return {};
// Must have capacity.
const size_t avail = edge->flat()->Capacity() - edge->length;
if (avail == 0) return {};
// Build span on remaining capacity.
size_t delta = (std::min)(size, avail);
Span<char> span = {edge->flat()->Data() + edge->length, delta};
edge->length += delta;
this->length += delta;
for (int i = 0; i < depth; ++i) {
stack[i]->length += delta;
}
return span;
}
CordRepBtree* CordRepBtree::CreateSlow(CordRep* rep) {
if (rep->IsBtree()) return rep->btree();
CordRepBtree* node = nullptr;
auto consume = [&node](CordRep* r, size_t offset, size_t length) {
r = MakeSubstring(r, offset, length);
if (node == nullptr) {
node = New(r);
} else {
node = CordRepBtree::AddCordRep<kBack>(node, r);
}
};
Consume(rep, consume);
return node;
}
CordRepBtree* CordRepBtree::AppendSlow(CordRepBtree* tree, CordRep* rep) {
if (ABSL_PREDICT_TRUE(rep->IsBtree())) {
return MergeTrees(tree, rep->btree());
}
auto consume = [&tree](CordRep* r, size_t offset, size_t length) {
r = MakeSubstring(r, offset, length);
tree = CordRepBtree::AddCordRep<kBack>(tree, r);
};
Consume(rep, consume);
return tree;
}
CordRepBtree* CordRepBtree::PrependSlow(CordRepBtree* tree, CordRep* rep) {
if (ABSL_PREDICT_TRUE(rep->IsBtree())) {
return MergeTrees(rep->btree(), tree);
}
auto consume = [&tree](CordRep* r, size_t offset, size_t length) {
r = MakeSubstring(r, offset, length);
tree = CordRepBtree::AddCordRep<kFront>(tree, r);
};
ReverseConsume(rep, consume);
return tree;
}
CordRepBtree* CordRepBtree::Append(CordRepBtree* tree, absl::string_view data,
size_t extra) {
return CordRepBtree::AddData<kBack>(tree, data, extra);
}
CordRepBtree* CordRepBtree::Prepend(CordRepBtree* tree, absl::string_view data,
size_t extra) {
return CordRepBtree::AddData<kFront>(tree, data, extra);
}
template CordRepBtree* CordRepBtree::AddCordRep<kFront>(CordRepBtree* tree,
CordRep* rep);
template CordRepBtree* CordRepBtree::AddCordRep<kBack>(CordRepBtree* tree,
CordRep* rep);
template CordRepBtree* CordRepBtree::AddData<kFront>(CordRepBtree* tree,
absl::string_view data,
size_t extra);
template CordRepBtree* CordRepBtree::AddData<kBack>(CordRepBtree* tree,
absl::string_view data,
size_t extra);
} // namespace cord_internal
ABSL_NAMESPACE_END
} // namespace absl

View File

@@ -0,0 +1,871 @@
// Copyright 2021 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_H_
#define ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_H_
#include <cassert>
#include <cstdint>
#include <iosfwd>
#include "absl/base/config.h"
#include "absl/base/internal/raw_logging.h"
#include "absl/base/optimization.h"
#include "absl/strings/internal/cord_internal.h"
#include "absl/strings/internal/cord_rep_btree.h"
#include "absl/strings/internal/cord_rep_flat.h"
#include "absl/strings/string_view.h"
#include "absl/types/span.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cord_internal {
class CordRepBtreeNavigator;
// CordRepBtree is as the name implies a btree implementation of a Cordrep tree.
// Data is stored at the leaf level only, non leaf nodes contain down pointers
// only. Allowed types of data edges are FLAT, EXTERNAL and SUBSTRINGs of FLAT
// or EXTERNAL nodes. The implementation allows for data to be added to either
// end of the tree only, it does not provide any 'insert' logic. This has the
// benefit that we can expect good fill ratios: all nodes except the outer
// 'legs' will have 100% fill ratios for trees built using Append/Prepend
// methods. Merged trees will typically have a fill ratio well above 50% as in a
// similar fashion, one side of the merged tree will typically have a 100% fill
// ratio, and the 'open' end will average 50%. All operations are O(log(n)) or
// better, and the tree never needs balancing.
//
// All methods accepting a CordRep* or CordRepBtree* adopt a reference on that
// input unless explicitly stated otherwise. All functions returning a CordRep*
// or CordRepBtree* instance transfer a reference back to the caller.
// Simplified, callers both 'donate' and 'consume' a reference count on each
// call, simplifying the API. An example of building a tree:
//
// CordRepBtree* tree = CordRepBtree::Create(MakeFlat("Hello"));
// tree = CordRepBtree::Append(tree, MakeFlat("world"));
//
// In the above example, all inputs are consumed, making each call affecting
// `tree` reference count neutral. The returned `tree` value can be different
// from the input if the input is shared with other threads, or if the tree
// grows in height, but callers typically never have to concern themselves with
// that and trust that all methods DTRT at all times.
class CordRepBtree : public CordRep {
public:
// EdgeType identifies `front` and `back` enum values.
// Various implementations in CordRepBtree such as `Add` and `Edge` are
// generic and templated on operating on either of the boundary edges.
// For more information on the possible edges contained in a CordRepBtree
// instance see the documentation for `edges_`.
enum class EdgeType { kFront, kBack };
// Convenience constants into `EdgeType`
static constexpr EdgeType kFront = EdgeType::kFront;
static constexpr EdgeType kBack = EdgeType::kBack;
// Maximum number of edges: based on experiments and performance data, we can
// pick suitable values resulting in optimum cacheline aligned values. The
// preferred values are based on 64-bit systems where we aim to align this
// class onto 64 bytes, i.e.: 6 = 64 bytes, 14 = 128 bytes, etc.
// TODO(b/192061034): experiment with alternative sizes.
static constexpr size_t kMaxCapacity = 6;
// Reasonable maximum height of the btree. We can expect a fill ratio of at
// least 50%: trees are always expanded at the front or back. Concatenating
// trees will then typically fold at the top most node, where the lower nodes
// are at least at capacity on one side of joined inputs. At a lower fill
// rate of 4 edges per node, we have capacity for ~16 million leaf nodes.
// We will fail / abort if an application ever exceeds this height, which
// should be extremely rare (near impossible) and be an indication of an
// application error: we do not assume it reasonable for any application to
// operate correctly with such monster trees.
// Another compelling reason for the number `12` is that any contextual stack
// required for navigation or insertion requires 12 words and 12 bytes, which
// fits inside 2 cache lines with some room to spare, and is reasonable as a
// local stack variable compared to Cord's current near 400 bytes stack use.
// The maximum `height` value of a node is then `kMaxDepth - 1` as node height
// values start with a value of 0 for leaf nodes.
static constexpr int kMaxDepth = 12;
static constexpr int kMaxHeight = kMaxDepth - 1;
// `Action` defines the action for unwinding changes done at the btree's leaf
// level that need to be propagated up to the parent node(s). Each operation
// on a node has an effect / action defined as follows:
// - kSelf
// The operation (add / update, etc) was performed directly on the node as
// the node is private to the current thread (i.e.: not shared directly or
// indirectly through a refcount > 1). Changes can be propagated directly to
// all parent nodes as all parent nodes are also then private to the current
// thread.
// - kCopied
// The operation (add / update, etc) was performed on a copy of the original
// node, as the node is (potentially) directly or indirectly shared with
// other threads. Changes need to be propagated into the parent nodes where
// the old down pointer must be unreffed and replaced with this new copy.
// Such changes to parent nodes may themselves require a copy if the parent
// node is also shared. A kCopied action can propagate all the way to the
// top node where we then must unref the `tree` input provided by the
// caller, and return the new copy.
// - kPopped
// The operation (typically add) could not be satisfied due to insufficient
// capacity in the targeted node, and a new 'leg' was created that needs to
// be added into the parent node. For example, adding a FLAT inside a leaf
// node that is at capacity will create a new leaf node containing that
// FLAT, that needs to be 'popped' up the btree. Such 'pop' actions can
// cascade up the tree if parent nodes are also at capacity. A 'Popped'
// action propagating all the way to the top of the tree will result in
// the tree becoming one level higher than the current tree through a final
// `CordRepBtree::New(tree, popped)` call, resulting in a new top node
// referencing the old tree and the new (fully popped upwards) 'leg'.
enum Action { kSelf, kCopied, kPopped };
// Result of an operation on a node. See the `Action` enum for details.
struct OpResult {
CordRepBtree* tree;
Action action;
};
// Return value of the CopyPrefix and CopySuffix methods which can
// return a node or data edge at any height inside the tree.
// A height of 0 defines the lowest (leaf) node, a height of -1 identifies
// `edge` as being a plain data node: EXTERNAL / FLAT or SUBSTRING thereof.
struct CopyResult {
CordRep* edge;
int height;
};
// Logical position inside a node:
// - index: index of the edge.
// - n: size or offset value depending on context.
struct Position {
size_t index;
size_t n;
};
// Creates a btree from the given input. Adopts a ref of `rep`.
// If the input `rep` is itself a btree, i.e., `IsBtree()`, then this
// function immediately returns `rep->btree()`. If the input is a valid data
// edge (see IsDataEdge()), then a new leaf node is returned containing `rep`
// as the sole data edge. Else, the input is assumed to be a (legacy) concat
// tree, and the input is consumed and transformed into a btree().
static CordRepBtree* Create(CordRep* rep);
// Destroys the provided tree. Should only be called by cord internal API's,
// typically after a ref_count.Decrement() on the last reference count.
static void Destroy(CordRepBtree* tree);
// Appends / Prepends an existing CordRep instance to this tree.
// The below methods accept three types of input:
// 1) `rep` is a data node (See `IsDataNode` for valid data edges).
// `rep` is appended or prepended to this tree 'as is'.
// 2) `rep` is a BTREE.
// `rep` is merged into `tree` respecting the Append/Prepend order.
// 3) `rep` is some other (legacy) type.
// `rep` is converted in place and added to `tree`
// Requires `tree` and `rep` to be not null.
static CordRepBtree* Append(CordRepBtree* tree, CordRep* rep);
static CordRepBtree* Prepend(CordRepBtree* tree, CordRep* rep);
// Append/Prepend the data in `data` to this tree.
// The `extra` parameter defines how much extra capacity should be allocated
// for any additional FLAT being allocated. This is an optimization hint from
// the caller. For example, a caller may need to add 2 string_views of data
// "abc" and "defghi" which are not consecutive. The caller can in this case
// invoke `AddData(tree, "abc", 6)`, and any newly added flat is allocated
// where possible with at least 6 bytes of extra capacity beyond `length`.
// This helps avoiding data getting fragmented over multiple flats.
// There is no limit on the size of `data`. If `data` can not be stored inside
// a single flat, then the function will iteratively add flats until all data
// has been consumed and appended or prepended to the tree.
static CordRepBtree* Append(CordRepBtree* tree, string_view data,
size_t extra = 0);
static CordRepBtree* Prepend(CordRepBtree* tree, string_view data,
size_t extra = 0);
// Returns a new tree, containing `n` bytes of data from this instance
// starting at offset `offset`. Where possible, the returned tree shares
// (re-uses) data edges and nodes with this instance to minimize the
// combined memory footprint of both trees.
// Requires `offset + n <= length`. Returns `nullptr` if `n` is zero.
CordRep* SubTree(size_t offset, size_t n);
// Returns the character at the given offset.
char GetCharacter(size_t offset) const;
// Returns true if this node holds a single data edge, and if so, sets
// `fragment` to reference the contained data. `fragment` is an optional
// output parameter and allowed to be null.
bool IsFlat(absl::string_view* fragment) const;
// Returns true if the data of `n` bytes starting at offset `offset`
// is contained in a single data edge, and if so, sets fragment to reference
// the contained data. `fragment` is an optional output parameter and allowed
// to be null.
bool IsFlat(size_t offset, size_t n, absl::string_view* fragment) const;
// Returns a span (mutable range of bytes) of up to `size` bytes into the
// last FLAT data edge inside this tree under the following conditions:
// - none of the nodes down into the FLAT node are shared.
// - the last data edge in this tree is a non-shared FLAT.
// - the referenced FLAT has additional capacity available.
// If all these conditions are met, a non-empty span is returned, and the
// length of the flat node and involved tree nodes have been increased by
// `span.length()`. The caller is responsible for immediately assigning values
// to all uninitialized data reference by the returned span.
// Requires `this->refcount.IsOne()`: this function forces the caller to do
// this fast path check on the top level node, as this is the most commonly
// shared node of a cord tree.
Span<char> GetAppendBuffer(size_t size);
// Returns the `height` of the tree. The height of a tree is limited to
// kMaxHeight. `height` is implemented as an `int` as in some places we
// use negative (-1) values for 'data edges'.
int height() const { return static_cast<int>(storage[0]); }
// Properties: begin, back, end, front/back boundary indexes.
size_t begin() const { return static_cast<size_t>(storage[1]); }
size_t back() const { return static_cast<size_t>(storage[2]) - 1; }
size_t end() const { return static_cast<size_t>(storage[2]); }
size_t index(EdgeType edge) const {
return edge == kFront ? begin() : back();
}
// Properties: size and capacity.
// `capacity` contains the current capacity of this instance, where
// `kMaxCapacity` contains the maximum capacity of a btree node.
// For now, `capacity` and `kMaxCapacity` return the same value, but this may
// change in the future if we see benefit in dynamically sizing 'small' nodes
// to 'large' nodes for large data trees.
size_t size() const { return end() - begin(); }
size_t capacity() const { return kMaxCapacity; }
// Edge access
inline CordRep* Edge(size_t index) const;
inline CordRep* Edge(EdgeType edge_type) const;
inline absl::Span<CordRep* const> Edges() const;
inline absl::Span<CordRep* const> Edges(size_t begin, size_t end) const;
// Returns reference to the data edge at `index`.
// Requires this instance to be a leaf node, and `index` to be valid index.
inline absl::string_view Data(size_t index) const;
static const char* EdgeDataPtr(const CordRep* r);
static absl::string_view EdgeData(const CordRep* r);
// Returns true if the provided rep is a FLAT, EXTERNAL or a SUBSTRING node
// holding a FLAT or EXTERNAL child rep.
static bool IsDataEdge(const CordRep* rep);
// Diagnostics: returns true if `tree` is valid and internally consistent.
// If `shallow` is false, then the provided top level node and all child nodes
// below it are recursively checked. If `shallow` is true, only the provided
// node in `tree` and the cumulative length, type and height of the direct
// child nodes of `tree` are checked. The value of `shallow` is ignored if the
// internal `cord_btree_exhaustive_validation` diagnostics variable is true,
// in which case the performed validations works as if `shallow` were false.
// This function is intended for debugging and testing purposes only.
static bool IsValid(const CordRepBtree* tree, bool shallow = false);
// Diagnostics: asserts that the provided tree is valid.
// `AssertValid()` performs a shallow validation by default. `shallow` can be
// set to false in which case an exhaustive validation is performed. This
// function is implemented in terms of calling `IsValid()` and asserting the
// return value to be true. See `IsValid()` for more information.
// This function is intended for debugging and testing purposes only.
static CordRepBtree* AssertValid(CordRepBtree* tree, bool shallow = true);
static const CordRepBtree* AssertValid(const CordRepBtree* tree,
bool shallow = true);
// Diagnostics: dump the contents of this tree to `stream`.
// This function is intended for debugging and testing purposes only.
static void Dump(const CordRep* rep, std::ostream& stream);
static void Dump(const CordRep* rep, absl::string_view label,
std::ostream& stream);
static void Dump(const CordRep* rep, absl::string_view label,
bool include_contents, std::ostream& stream);
// Adds the edge `edge` to this node if possible. `owned` indicates if the
// current node is potentially shared or not with other threads. Returns:
// - {kSelf, <this>}
// The edge was directly added to this node.
// - {kCopied, <node>}
// The edge was added to a copy of this node.
// - {kPopped, New(edge, height())}
// A new leg with the edge was created as this node has no extra capacity.
template <EdgeType edge_type>
inline OpResult AddEdge(bool owned, CordRep* edge, size_t delta);
// Replaces the front or back edge with the provided new edge. Returns:
// - {kSelf, <this>}
// The edge was directly set in this node. The old edge is unreffed.
// - {kCopied, <node>}
// A copy of this node was created with the new edge value.
// In both cases, the function adopts a reference on `edge`.
template <EdgeType edge_type>
OpResult SetEdge(bool owned, CordRep* edge, size_t delta);
// Creates a new empty node at the specified height.
static CordRepBtree* New(int height = 0);
// Creates a new node containing `rep`, with the height being computed
// automatically based on the type of `rep`.
static CordRepBtree* New(CordRep* rep);
// Creates a new node containing both `front` and `back` at height
// `front.height() + 1`. Requires `back.height() == front.height()`.
static CordRepBtree* New(CordRepBtree* front, CordRepBtree* back);
private:
CordRepBtree() = default;
~CordRepBtree() = default;
// Initializes the main properties `tag`, `begin`, `end`, `height`.
inline void InitInstance(int height, size_t begin = 0, size_t end = 0);
// Direct property access begin / end
void set_begin(size_t begin) { storage[1] = static_cast<uint8_t>(begin); }
void set_end(size_t end) { storage[2] = static_cast<uint8_t>(end); }
// Decreases the value of `begin` by `n`, and returns the new value. Notice
// how this returns the new value unlike atomic::fetch_add which returns the
// old value. This is because this is used to prepend edges at 'begin - 1'.
size_t sub_fetch_begin(size_t n) {
storage[1] -= static_cast<uint8_t>(n);
return storage[1];
}
// Increases the value of `end` by `n`, and returns the previous value. This
// function is typically used to append edges at 'end'.
size_t fetch_add_end(size_t n) {
const uint8_t current = storage[2];
storage[2] = static_cast<uint8_t>(current + n);
return current;
}
// Returns the index of the last edge starting on, or before `offset`, with
// `n` containing the relative offset of `offset` inside that edge.
// Requires `offset` < length.
Position IndexOf(size_t offset) const;
// Returns the index of the last edge starting before `offset`, with `n`
// containing the relative offset of `offset` inside that edge.
// This function is useful to find the edges for some span of bytes ending at
// `offset` (i.e., `n` bytes). For example:
//
// Position pos = IndexBefore(n)
// edges = Edges(begin(), pos.index) // All full edges (may be empty)
// last = Sub(Edge(pos.index), 0, pos.n) // Last partial edge (may be empty)
//
// Requires 0 < `offset` <= length.
Position IndexBefore(size_t offset) const;
// Identical to the above function except starting from the position `front`.
// This function is equivalent to `IndexBefore(front.n + offset)`, with
// the difference that this function is optimized to start at `front.index`.
Position IndexBefore(Position front, size_t offset) const;
// Returns the index of the edge directly beyond the edge containing offset
// `offset`, with `n` containing the distance of that edge from `offset`.
// This function is useful for iteratively finding suffix nodes and remaining
// partial bytes in left-most suffix nodes as for example in CopySuffix.
// Requires `offset` < length.
Position IndexBeyond(size_t offset) const;
// Destruction
static void DestroyLeaf(CordRepBtree* tree, size_t begin, size_t end);
static void DestroyNonLeaf(CordRepBtree* tree, size_t begin, size_t end);
static void DestroyTree(CordRepBtree* tree, size_t begin, size_t end);
static void Delete(CordRepBtree* tree) { delete tree; }
// Creates a new leaf node containing as much data as possible from `data`.
// The data is added either forwards or reversed depending on `edge_type`.
// Callers must check the length of the returned node to determine if all data
// was copied or not.
// See the `Append/Prepend` function for the meaning and purpose of `extra`.
template <EdgeType edge_type>
static CordRepBtree* NewLeaf(absl::string_view data, size_t extra);
// Creates a raw copy of this Btree node, copying all properties, but
// without adding any references to existing edges.
CordRepBtree* CopyRaw() const;
// Creates a full copy of this Btree node, adding a reference on all edges.
CordRepBtree* Copy() const;
// Creates a partial copy of this Btree node, copying all edges up to `end`,
// adding a reference on each copied edge, and sets the length of the newly
// created copy to `new_length`.
CordRepBtree* CopyBeginTo(size_t end, size_t new_length) const;
// Creates a partial copy of this Btree node, copying all edges starting at
// `begin`, adding a reference on each copied edge, and sets the length of
// the newly created copy to `new_length`.
CordRepBtree* CopyToEndFrom(size_t begin, size_t new_length) const;
// Returns a tree containing the result of appending `right` to `left`.
static CordRepBtree* MergeTrees(CordRepBtree* left, CordRepBtree* right);
// Fallback functions for `Create()`, `Append()` and `Prepend()` which
// deal with legacy / non conforming input, i.e.: CONCAT trees.
static CordRepBtree* CreateSlow(CordRep* rep);
static CordRepBtree* AppendSlow(CordRepBtree*, CordRep* rep);
static CordRepBtree* PrependSlow(CordRepBtree*, CordRep* rep);
// Aligns existing edges to start at index 0, to allow for a new edge to be
// added to the back of the current edges.
inline void AlignBegin();
// Aligns existing edges to end at `capacity`, to allow for a new edge to be
// added in front of the current edges.
inline void AlignEnd();
// Adds the provided edge to this node.
// Requires this node to have capacity for the edge. Realigns / moves
// existing edges as needed to prepend or append the new edge.
template <EdgeType edge_type>
inline void Add(CordRep* rep);
// Adds the provided edges to this node.
// Requires this node to have capacity for the edges. Realigns / moves
// existing edges as needed to prepend or append the new edges.
template <EdgeType edge_type>
inline void Add(absl::Span<CordRep* const>);
// Adds data from `data` to this node until either all data has been consumed,
// or there is no more capacity for additional flat nodes inside this node.
// Requires the current node to be a leaf node, data to be non empty, and the
// current node to have capacity for at least one more data edge.
// Returns any remaining data from `data` that was not added, which is
// depending on the edge type (front / back) either the remaining prefix of
// suffix of the input.
// See the `Append/Prepend` function for the meaning and purpose of `extra`.
template <EdgeType edge_type>
absl::string_view AddData(absl::string_view data, size_t extra);
// Replace the front or back edge with the provided value.
// Adopts a reference on `edge` and unrefs the old edge.
template <EdgeType edge_type>
inline void SetEdge(CordRep* edge);
// Returns a partial copy of the current tree containing the first `n` bytes
// of data. `CopyResult` contains both the resulting edge and its height. The
// resulting tree may be less high than the current tree, or even be a single
// matching data edge. For example, if `n == 1`, then the result will be the
// single data edge, and height will be set to -1 (one below the owning leaf
// node). If n == 0, this function returns null.
// Requires `n <= length`
CopyResult CopyPrefix(size_t n);
// Returns a partial copy of the current tree containing all data starting
// after `offset`. `CopyResult` contains both the resulting edge and its
// height. The resulting tree may be less high than the current tree, or even
// be a single matching data edge. For example, if `n == length - 1`, then the
// result will be a single data edge, and height will be set to -1 (one below
// the owning leaf node).
// Requires `offset < length`
CopyResult CopySuffix(size_t offset);
// Returns a OpResult value of {this, kSelf} or {Copy(), kCopied}
// depending on the value of `owned`.
inline OpResult ToOpResult(bool owned);
// Adds `rep` to the specified tree, returning the modified tree.
template <EdgeType edge_type>
static CordRepBtree* AddCordRep(CordRepBtree* tree, CordRep* rep);
// Adds `data` to the specified tree, returning the modified tree.
// See the `Append/Prepend` function for the meaning and purpose of `extra`.
template <EdgeType edge_type>
static CordRepBtree* AddData(CordRepBtree* tree, absl::string_view data,
size_t extra = 0);
// Merges `src` into `dst` with `src` being added either before (kFront) or
// after (kBack) `dst`. Requires the height of `dst` to be greater than or
// equal to the height of `src`.
template <EdgeType edge_type>
static CordRepBtree* Merge(CordRepBtree* dst, CordRepBtree* src);
// Fallback version of GetAppendBuffer for large trees: GetAppendBuffer()
// implements an inlined version for trees of limited height (3 levels),
// GetAppendBufferSlow implements the logic for large trees.
Span<char> GetAppendBufferSlow(size_t size);
// `edges_` contains all edges starting from this instance.
// These are explicitly `child` edges only, a cord btree (or any cord tree in
// that respect) does not store `parent` pointers anywhere: multiple trees /
// parents can reference the same shared child edge. The type of these edges
// depends on the height of the node. `Leaf nodes` (height == 0) contain `data
// edges` (external or flat nodes, or sub-strings thereof). All other nodes
// (height > 0) contain pointers to BTREE nodes with a height of `height - 1`.
CordRep* edges_[kMaxCapacity];
friend class CordRepBtreeTestPeer;
friend class CordRepBtreeNavigator;
};
inline CordRepBtree* CordRep::btree() {
assert(IsBtree());
return static_cast<CordRepBtree*>(this);
}
inline const CordRepBtree* CordRep::btree() const {
assert(IsBtree());
return static_cast<const CordRepBtree*>(this);
}
inline void CordRepBtree::InitInstance(int height, size_t begin, size_t end) {
tag = BTREE;
storage[0] = static_cast<uint8_t>(height);
storage[1] = static_cast<uint8_t>(begin);
storage[2] = static_cast<uint8_t>(end);
}
inline CordRep* CordRepBtree::Edge(size_t index) const {
assert(index >= begin());
assert(index < end());
return edges_[index];
}
inline CordRep* CordRepBtree::Edge(EdgeType edge_type) const {
return edges_[edge_type == kFront ? begin() : back()];
}
inline absl::Span<CordRep* const> CordRepBtree::Edges() const {
return {edges_ + begin(), size()};
}
inline absl::Span<CordRep* const> CordRepBtree::Edges(size_t begin,
size_t end) const {
assert(begin <= end);
assert(begin >= this->begin());
assert(end <= this->end());
return {edges_ + begin, static_cast<size_t>(end - begin)};
}
inline const char* CordRepBtree::EdgeDataPtr(const CordRep* r) {
assert(IsDataEdge(r));
size_t offset = 0;
if (r->tag == SUBSTRING) {
offset = r->substring()->start;
r = r->substring()->child;
}
return (r->tag >= FLAT ? r->flat()->Data() : r->external()->base) + offset;
}
inline absl::string_view CordRepBtree::EdgeData(const CordRep* r) {
return absl::string_view(EdgeDataPtr(r), r->length);
}
inline absl::string_view CordRepBtree::Data(size_t index) const {
assert(height() == 0);
return EdgeData(Edge(index));
}
inline bool CordRepBtree::IsDataEdge(const CordRep* rep) {
// The fast path is that `rep` is an EXTERNAL or FLAT node, making the below
// if a single, well predicted branch. We then repeat the FLAT or EXTERNAL
// check in the slow path the SUBSTRING check to optimize for the hot path.
if (rep->tag == EXTERNAL || rep->tag >= FLAT) return true;
if (rep->tag == SUBSTRING) rep = rep->substring()->child;
return rep->tag == EXTERNAL || rep->tag >= FLAT;
}
inline CordRepBtree* CordRepBtree::New(int height) {
CordRepBtree* tree = new CordRepBtree;
tree->length = 0;
tree->InitInstance(height);
return tree;
}
inline CordRepBtree* CordRepBtree::New(CordRep* rep) {
CordRepBtree* tree = new CordRepBtree;
int height = rep->IsBtree() ? rep->btree()->height() + 1 : 0;
tree->length = rep->length;
tree->InitInstance(height, /*begin=*/0, /*end=*/1);
tree->edges_[0] = rep;
return tree;
}
inline CordRepBtree* CordRepBtree::New(CordRepBtree* front,
CordRepBtree* back) {
assert(front->height() == back->height());
CordRepBtree* tree = new CordRepBtree;
tree->length = front->length + back->length;
tree->InitInstance(front->height() + 1, /*begin=*/0, /*end=*/2);
tree->edges_[0] = front;
tree->edges_[1] = back;
return tree;
}
inline void CordRepBtree::DestroyTree(CordRepBtree* tree, size_t begin,
size_t end) {
if (tree->height() == 0) {
DestroyLeaf(tree, begin, end);
} else {
DestroyNonLeaf(tree, begin, end);
}
}
inline void CordRepBtree::Destroy(CordRepBtree* tree) {
DestroyTree(tree, tree->begin(), tree->end());
}
inline CordRepBtree* CordRepBtree::CopyRaw() const {
auto* tree = static_cast<CordRepBtree*>(::operator new(sizeof(CordRepBtree)));
memcpy(static_cast<void*>(tree), this, sizeof(CordRepBtree));
new (&tree->refcount) RefcountAndFlags;
return tree;
}
inline CordRepBtree* CordRepBtree::Copy() const {
CordRepBtree* tree = CopyRaw();
for (CordRep* rep : Edges()) CordRep::Ref(rep);
return tree;
}
inline CordRepBtree* CordRepBtree::CopyToEndFrom(size_t begin,
size_t new_length) const {
assert(begin >= this->begin());
assert(begin <= this->end());
CordRepBtree* tree = CopyRaw();
tree->length = new_length;
tree->set_begin(begin);
for (CordRep* edge : tree->Edges()) CordRep::Ref(edge);
return tree;
}
inline CordRepBtree* CordRepBtree::CopyBeginTo(size_t end,
size_t new_length) const {
assert(end <= capacity());
assert(end >= this->begin());
CordRepBtree* tree = CopyRaw();
tree->length = new_length;
tree->set_end(end);
for (CordRep* edge : tree->Edges()) CordRep::Ref(edge);
return tree;
}
inline void CordRepBtree::AlignBegin() {
// The below code itself does not need to be fast as typically we have
// mono-directional append/prepend calls, and `begin` / `end` are typically
// adjusted no more than once. But we want to avoid potential register clobber
// effects, making the compiler emit register save/store/spills, and minimize
// the size of code.
const size_t delta = begin();
if (ABSL_PREDICT_FALSE(delta != 0)) {
const size_t new_end = end() - delta;
set_begin(0);
set_end(new_end);
// TODO(mvels): we can write this using 2 loads / 2 stores depending on
// total size for the kMaxCapacity = 6 case. I.e., we can branch (switch) on
// size, and then do overlapping load/store of up to 4 pointers (inlined as
// XMM, YMM or ZMM load/store) and up to 2 pointers (XMM / YMM), which is a)
// compact and b) not clobbering any registers.
ABSL_INTERNAL_ASSUME(new_end <= kMaxCapacity);
#ifdef __clang__
#pragma unroll 1
#endif
for (size_t i = 0; i < new_end; ++i) {
edges_[i] = edges_[i + delta];
}
}
}
inline void CordRepBtree::AlignEnd() {
// See comments in `AlignBegin` for motivation on the hand-rolled for loops.
const size_t delta = capacity() - end();
if (delta != 0) {
const size_t new_begin = begin() + delta;
const size_t new_end = end() + delta;
set_begin(new_begin);
set_end(new_end);
ABSL_INTERNAL_ASSUME(new_end <= kMaxCapacity);
#ifdef __clang__
#pragma unroll 1
#endif
for (size_t i = new_end - 1; i >= new_begin; --i) {
edges_[i] = edges_[i - delta];
}
}
}
template <>
inline void CordRepBtree::Add<CordRepBtree::kBack>(CordRep* rep) {
AlignBegin();
edges_[fetch_add_end(1)] = rep;
}
template <>
inline void CordRepBtree::Add<CordRepBtree::kBack>(
absl::Span<CordRep* const> edges) {
AlignBegin();
size_t new_end = end();
for (CordRep* edge : edges) edges_[new_end++] = edge;
set_end(new_end);
}
template <>
inline void CordRepBtree::Add<CordRepBtree::kFront>(CordRep* rep) {
AlignEnd();
edges_[sub_fetch_begin(1)] = rep;
}
template <>
inline void CordRepBtree::Add<CordRepBtree::kFront>(
absl::Span<CordRep* const> edges) {
AlignEnd();
size_t new_begin = begin() - edges.size();
set_begin(new_begin);
for (CordRep* edge : edges) edges_[new_begin++] = edge;
}
template <CordRepBtree::EdgeType edge_type>
inline void CordRepBtree::SetEdge(CordRep* edge) {
const int idx = edge_type == kFront ? begin() : back();
CordRep::Unref(edges_[idx]);
edges_[idx] = edge;
}
inline CordRepBtree::OpResult CordRepBtree::ToOpResult(bool owned) {
return owned ? OpResult{this, kSelf} : OpResult{Copy(), kCopied};
}
inline CordRepBtree::Position CordRepBtree::IndexOf(size_t offset) const {
assert(offset < length);
size_t index = begin();
while (offset >= edges_[index]->length) offset -= edges_[index++]->length;
return {index, offset};
}
inline CordRepBtree::Position CordRepBtree::IndexBefore(size_t offset) const {
assert(offset > 0);
assert(offset <= length);
size_t index = begin();
while (offset > edges_[index]->length) offset -= edges_[index++]->length;
return {index, offset};
}
inline CordRepBtree::Position CordRepBtree::IndexBefore(Position front,
size_t offset) const {
size_t index = front.index;
offset = offset + front.n;
while (offset > edges_[index]->length) offset -= edges_[index++]->length;
return {index, offset};
}
inline CordRepBtree::Position CordRepBtree::IndexBeyond(
const size_t offset) const {
// We need to find the edge which `starting offset` is beyond (>=)`offset`.
// For this we can't use the `offset -= length` logic of IndexOf. Instead, we
// track the offset of the `current edge` in `off`, which we increase as we
// iterate over the edges until we find the matching edge.
size_t off = 0;
size_t index = begin();
while (offset > off) off += edges_[index++]->length;
return {index, off - offset};
}
inline CordRepBtree* CordRepBtree::Create(CordRep* rep) {
if (IsDataEdge(rep)) return New(rep);
return CreateSlow(rep);
}
inline Span<char> CordRepBtree::GetAppendBuffer(size_t size) {
assert(refcount.IsOne());
CordRepBtree* tree = this;
const int height = this->height();
CordRepBtree* n1 = tree;
CordRepBtree* n2 = tree;
CordRepBtree* n3 = tree;
switch (height) {
case 3:
tree = tree->Edge(kBack)->btree();
if (!tree->refcount.IsOne()) return {};
n2 = tree;
ABSL_FALLTHROUGH_INTENDED;
case 2:
tree = tree->Edge(kBack)->btree();
if (!tree->refcount.IsOne()) return {};
n1 = tree;
ABSL_FALLTHROUGH_INTENDED;
case 1:
tree = tree->Edge(kBack)->btree();
if (!tree->refcount.IsOne()) return {};
ABSL_FALLTHROUGH_INTENDED;
case 0:
CordRep* edge = tree->Edge(kBack);
if (!edge->refcount.IsOne()) return {};
if (edge->tag < FLAT) return {};
size_t avail = edge->flat()->Capacity() - edge->length;
if (avail == 0) return {};
size_t delta = (std::min)(size, avail);
Span<char> span = {edge->flat()->Data() + edge->length, delta};
edge->length += delta;
switch (height) {
case 3:
n3->length += delta;
ABSL_FALLTHROUGH_INTENDED;
case 2:
n2->length += delta;
ABSL_FALLTHROUGH_INTENDED;
case 1:
n1->length += delta;
ABSL_FALLTHROUGH_INTENDED;
case 0:
tree->length += delta;
return span;
}
break;
}
return GetAppendBufferSlow(size);
}
extern template CordRepBtree* CordRepBtree::AddCordRep<CordRepBtree::kBack>(
CordRepBtree* tree, CordRep* rep);
extern template CordRepBtree* CordRepBtree::AddCordRep<CordRepBtree::kFront>(
CordRepBtree* tree, CordRep* rep);
inline CordRepBtree* CordRepBtree::Append(CordRepBtree* tree, CordRep* rep) {
if (ABSL_PREDICT_TRUE(IsDataEdge(rep))) {
return CordRepBtree::AddCordRep<kBack>(tree, rep);
}
return AppendSlow(tree, rep);
}
inline CordRepBtree* CordRepBtree::Prepend(CordRepBtree* tree, CordRep* rep) {
if (ABSL_PREDICT_TRUE(IsDataEdge(rep))) {
return CordRepBtree::AddCordRep<kFront>(tree, rep);
}
return PrependSlow(tree, rep);
}
#ifdef NDEBUG
inline CordRepBtree* CordRepBtree::AssertValid(CordRepBtree* tree,
bool /* shallow */) {
return tree;
}
inline const CordRepBtree* CordRepBtree::AssertValid(const CordRepBtree* tree,
bool /* shallow */) {
return tree;
}
#endif
} // namespace cord_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_H_

View File

@@ -0,0 +1,185 @@
// Copyright 2021 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/cord_rep_btree_navigator.h"
#include <cassert>
#include "absl/strings/internal/cord_internal.h"
#include "absl/strings/internal/cord_rep_btree.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cord_internal {
using ReadResult = CordRepBtreeNavigator::ReadResult;
namespace {
// Returns a `CordRepSubstring` from `rep` starting at `offset` of size `n`.
// If `rep` is already a `CordRepSubstring` instance, an adjusted instance is
// created based on the old offset and new offset.
// Adopts a reference on `rep`. Rep must be a valid data edge. Returns
// nullptr if `n == 0`, `rep` if `n == rep->length`.
// Requires `offset < rep->length` and `offset + n <= rep->length`.
// TODO(192061034): move to utility library in internal and optimize for small
// substrings of larger reps.
inline CordRep* Substring(CordRep* rep, size_t offset, size_t n) {
assert(n <= rep->length);
assert(offset < rep->length);
assert(offset <= rep->length - n);
assert(CordRepBtree::IsDataEdge(rep));
if (n == 0) return nullptr;
if (n == rep->length) return CordRep::Ref(rep);
if (rep->tag == SUBSTRING) {
offset += rep->substring()->start;
rep = rep->substring()->child;
}
CordRepSubstring* substring = new CordRepSubstring();
substring->length = n;
substring->tag = SUBSTRING;
substring->start = offset;
substring->child = CordRep::Ref(rep);
return substring;
}
inline CordRep* Substring(CordRep* rep, size_t offset) {
return Substring(rep, offset, rep->length - offset);
}
} // namespace
CordRepBtreeNavigator::Position CordRepBtreeNavigator::Skip(size_t n) {
int height = 0;
size_t index = index_[0];
CordRepBtree* node = node_[0];
CordRep* edge = node->Edge(index);
// Overall logic: Find an edge of at least the length we need to skip.
// We consume all edges which are smaller (i.e., must be 100% skipped).
// If we exhausted all edges on the current level, we move one level
// up the tree, and repeat until we either find the edge, or until we hit
// the top of the tree meaning the skip exceeds tree->length.
while (n >= edge->length) {
n -= edge->length;
while (++index == node->end()) {
if (++height > height_) return {nullptr, n};
node = node_[height];
index = index_[height];
}
edge = node->Edge(index);
}
// If we moved up the tree, descend down to the leaf level, consuming all
// edges that must be skipped.
while (height > 0) {
node = edge->btree();
index_[height] = index;
node_[--height] = node;
index = node->begin();
edge = node->Edge(index);
while (n >= edge->length) {
n -= edge->length;
++index;
assert(index != node->end());
edge = node->Edge(index);
}
}
index_[0] = index;
return {edge, n};
}
ReadResult CordRepBtreeNavigator::Read(size_t edge_offset, size_t n) {
int height = 0;
size_t length = edge_offset + n;
size_t index = index_[0];
CordRepBtree* node = node_[0];
CordRep* edge = node->Edge(index);
assert(edge_offset < edge->length);
if (length < edge->length) {
return {Substring(edge, edge_offset, n), length};
}
// Similar to 'Skip', we consume all edges that are inside the 'length' of
// data that needs to be read. If we exhaust the current level, we move one
// level up the tree and repeat until we hit the final edge that must be
// (partially) read. We consume all edges into `subtree`.
CordRepBtree* subtree = CordRepBtree::New(Substring(edge, edge_offset));
size_t subtree_end = 1;
do {
length -= edge->length;
while (++index == node->end()) {
index_[height] = index;
if (++height > height_) {
subtree->set_end(subtree_end);
if (length == 0) return {subtree, 0};
CordRep::Unref(subtree);
return {nullptr, length};
}
if (length != 0) {
subtree->set_end(subtree_end);
subtree = CordRepBtree::New(subtree);
subtree_end = 1;
}
node = node_[height];
index = index_[height];
}
edge = node->Edge(index);
if (length >= edge->length) {
subtree->length += edge->length;
subtree->edges_[subtree_end++] = CordRep::Ref(edge);
}
} while (length >= edge->length);
CordRepBtree* tree = subtree;
subtree->length += length;
// If we moved up the tree, descend down to the leaf level, consuming all
// edges that must be read, adding 'down' nodes to `subtree`.
while (height > 0) {
node = edge->btree();
index_[height] = index;
node_[--height] = node;
index = node->begin();
edge = node->Edge(index);
if (length != 0) {
CordRepBtree* right = CordRepBtree::New(height);
right->length = length;
subtree->edges_[subtree_end++] = right;
subtree->set_end(subtree_end);
subtree = right;
subtree_end = 0;
while (length >= edge->length) {
subtree->edges_[subtree_end++] = CordRep::Ref(edge);
length -= edge->length;
edge = node->Edge(++index);
}
}
}
// Add any (partial) edge still remaining at the leaf level.
if (length != 0) {
subtree->edges_[subtree_end++] = Substring(edge, 0, length);
}
subtree->set_end(subtree_end);
index_[0] = index;
return {tree, length};
}
} // namespace cord_internal
ABSL_NAMESPACE_END
} // namespace absl

View File

@@ -0,0 +1,265 @@
// Copyright 2021 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_NAVIGATOR_H_
#define ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_NAVIGATOR_H_
#include <cassert>
#include <iostream>
#include "absl/strings/internal/cord_internal.h"
#include "absl/strings/internal/cord_rep_btree.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cord_internal {
// CordRepBtreeNavigator is a bi-directional navigator allowing callers to
// navigate all the (leaf) data edges in a CordRepBtree instance.
//
// A CordRepBtreeNavigator instance is by default empty. Callers initialize a
// navigator instance by calling one of `InitFirst()`, `InitLast()` or
// `InitOffset()`, which establishes a current position. Callers can then
// navigate using the `Next`, `Previous`, `Skip` and `Seek` methods.
//
// The navigator instance does not take or adopt a reference on the provided
// `tree` on any of the initialization calls. Callers are responsible for
// guaranteeing the lifecycle of the provided tree. A navigator instance can
// be reset to the empty state by calling `Reset`.
//
// A navigator only keeps positional state on the 'current data edge', it does
// explicitly not keep any 'offset' state. The class does accept and return
// offsets in the `Read()`, `Skip()` and 'Seek()` methods as these would
// otherwise put a big burden on callers. Callers are expected to maintain
// (returned) offset info if they require such granular state.
class CordRepBtreeNavigator {
public:
// The logical position as returned by the Seek() and Skip() functions.
// Returns the current leaf edge for the desired seek or skip position and
// the offset of that position inside that edge.
struct Position {
CordRep* edge;
size_t offset;
};
// The read result as returned by the Read() function.
// `tree` contains the resulting tree which is identical to the result
// of calling CordRepBtree::SubTree(...) on the tree being navigated.
// `n` contains the number of bytes used from the last navigated to
// edge of the tree.
struct ReadResult {
CordRep* tree;
size_t n;
};
// Returns true if this instance is not empty.
explicit operator bool() const;
// Returns the tree for this instance or nullptr if empty.
CordRepBtree* btree() const;
// Returns the data edge of the current position.
// Requires this instance to not be empty.
CordRep* Current() const;
// Resets this navigator to `tree`, returning the first data edge in the tree.
CordRep* InitFirst(CordRepBtree* tree);
// Resets this navigator to `tree`, returning the last data edge in the tree.
CordRep* InitLast(CordRepBtree* tree);
// Resets this navigator to `tree` returning the data edge at position
// `offset` and the relative offset of `offset` into that data edge.
// Returns `Position.edge = nullptr` if the provided offset is greater
// than or equal to the length of the tree, in which case the state of
// the navigator instance remains unchanged.
Position InitOffset(CordRepBtree* tree, size_t offset);
// Navigates to the next data edge.
// Returns the next data edge or nullptr if there is no next data edge, in
// which case the current position remains unchanged.
CordRep* Next();
// Navigates to the previous data edge.
// Returns the previous data edge or nullptr if there is no previous data
// edge, in which case the current position remains unchanged.
CordRep* Previous();
// Navigates to the data edge at position `offset`. Returns the navigated to
// data edge in `Position.edge` and the relative offset of `offset` into that
// data edge in `Position.offset`. Returns `Position.edge = nullptr` if the
// provide offset is greater than or equal to the tree's length.
Position Seek(size_t offset);
// Reads `n` bytes of data starting at offset `edge_offset` of the current
// data edge, and returns the result in `ReadResult.tree`. `ReadResult.n`
// contains the 'bytes used` from the last / current data edge in the tree.
// This allows users that mix regular navigation (using string views) and
// 'read into cord' navigation to keep track of the current state, and which
// bytes have been consumed from a navigator.
// This function returns `ReadResult.tree = nullptr` if the requested length
// exceeds the length of the tree starting at the current data edge.
ReadResult Read(size_t edge_offset, size_t n);
// Skips `n` bytes forward from the current data edge, returning the navigated
// to data edge in `Position.edge` and `Position.offset` containing the offset
// inside that data edge. Note that the state of the navigator is left
// unchanged if `n` is smaller than the length of the current data edge.
Position Skip(size_t n);
// Resets this instance to the default / empty state.
void Reset();
private:
// Slow path for Next() if Next() reached the end of a leaf node. Backtracks
// up the stack until it finds a node that has a 'next' position available,
// and then does a 'front dive' towards the next leaf node.
CordRep* NextUp();
// Slow path for Previous() if Previous() reached the beginning of a leaf
// node. Backtracks up the stack until it finds a node that has a 'previous'
// position available, and then does a 'back dive' towards the previous leaf
// node.
CordRep* PreviousUp();
// Generic implementation of InitFirst() and InitLast().
template <CordRepBtree::EdgeType edge_type>
CordRep* Init(CordRepBtree* tree);
// `height_` contains the height of the current tree, or -1 if empty.
int height_ = -1;
// `index_` and `node_` contain the navigation state as the 'path' to the
// current data edge which is at `node_[0]->Edge(index_[0])`. The contents
// of these are undefined until the instance is initialized (`height_ >= 0`).
uint8_t index_[CordRepBtree::kMaxHeight];
CordRepBtree* node_[CordRepBtree::kMaxHeight];
};
// Returns true if this instance is not empty.
inline CordRepBtreeNavigator::operator bool() const { return height_ >= 0; }
inline CordRepBtree* CordRepBtreeNavigator::btree() const {
return height_ >= 0 ? node_[height_] : nullptr;
}
inline CordRep* CordRepBtreeNavigator::Current() const {
assert(height_ >= 0);
return node_[0]->Edge(index_[0]);
}
inline void CordRepBtreeNavigator::Reset() { height_ = -1; }
inline CordRep* CordRepBtreeNavigator::InitFirst(CordRepBtree* tree) {
return Init<CordRepBtree::kFront>(tree);
}
inline CordRep* CordRepBtreeNavigator::InitLast(CordRepBtree* tree) {
return Init<CordRepBtree::kBack>(tree);
}
template <CordRepBtree::EdgeType edge_type>
inline CordRep* CordRepBtreeNavigator::Init(CordRepBtree* tree) {
assert(tree != nullptr);
assert(tree->size() > 0);
int height = height_ = tree->height();
size_t index = tree->index(edge_type);
node_[height] = tree;
index_[height] = static_cast<uint8_t>(index);
while (--height >= 0) {
tree = tree->Edge(index)->btree();
node_[height] = tree;
index = tree->index(edge_type);
index_[height] = static_cast<uint8_t>(index);
}
return node_[0]->Edge(index);
}
inline CordRepBtreeNavigator::Position CordRepBtreeNavigator::Seek(
size_t offset) {
assert(btree() != nullptr);
int height = height_;
CordRepBtree* edge = node_[height];
if (ABSL_PREDICT_FALSE(offset >= edge->length)) return {nullptr, 0};
CordRepBtree::Position index = edge->IndexOf(offset);
index_[height] = static_cast<uint8_t>(index.index);
while (--height >= 0) {
edge = edge->Edge(index.index)->btree();
node_[height] = edge;
index = edge->IndexOf(index.n);
index_[height] = static_cast<uint8_t>(index.index);
}
return {edge->Edge(index.index), index.n};
}
inline CordRepBtreeNavigator::Position CordRepBtreeNavigator::InitOffset(
CordRepBtree* tree, size_t offset) {
assert(tree != nullptr);
if (ABSL_PREDICT_FALSE(offset >= tree->length)) return {nullptr, 0};
height_ = tree->height();
node_[height_] = tree;
return Seek(offset);
}
inline CordRep* CordRepBtreeNavigator::Next() {
CordRepBtree* edge = node_[0];
return index_[0] == edge->back() ? NextUp() : edge->Edge(++index_[0]);
}
inline CordRep* CordRepBtreeNavigator::Previous() {
CordRepBtree* edge = node_[0];
return index_[0] == edge->begin() ? PreviousUp() : edge->Edge(--index_[0]);
}
inline CordRep* CordRepBtreeNavigator::NextUp() {
assert(index_[0] == node_[0]->back());
CordRepBtree* edge;
size_t index;
int height = 0;
do {
if (++height > height_) return nullptr;
edge = node_[height];
index = index_[height] + 1;
} while (index == edge->end());
index_[height] = static_cast<uint8_t>(index);
do {
node_[--height] = edge = edge->Edge(index)->btree();
index_[height] = static_cast<uint8_t>(index = edge->begin());
} while (height > 0);
return edge->Edge(index);
}
inline CordRep* CordRepBtreeNavigator::PreviousUp() {
assert(index_[0] == node_[0]->begin());
CordRepBtree* edge;
size_t index;
int height = 0;
do {
if (++height > height_) return nullptr;
edge = node_[height];
index = index_[height];
} while (index == edge->begin());
index_[height] = static_cast<uint8_t>(--index);
do {
node_[--height] = edge = edge->Edge(index)->btree();
index_[height] = static_cast<uint8_t>(index = edge->back());
} while (height > 0);
return edge->Edge(index);
}
} // namespace cord_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_NAVIGATOR_H_

View File

@@ -0,0 +1,325 @@
// Copyright 2021 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/cord_rep_btree_navigator.h"
#include <string>
#include <vector>
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "absl/base/config.h"
#include "absl/base/internal/raw_logging.h"
#include "absl/strings/internal/cord_internal.h"
#include "absl/strings/internal/cord_rep_btree.h"
#include "absl/strings/internal/cord_rep_test_util.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cord_internal {
namespace {
using ::testing::Eq;
using ::testing::Ne;
using ::absl::cordrep_testing::CordRepBtreeFromFlats;
using ::absl::cordrep_testing::CordToString;
using ::absl::cordrep_testing::CreateFlatsFromString;
using ::absl::cordrep_testing::CreateRandomString;
using ::absl::cordrep_testing::MakeFlat;
using ::absl::cordrep_testing::MakeSubstring;
using ReadResult = CordRepBtreeNavigator::ReadResult;
using Position = CordRepBtreeNavigator::Position;
// CordRepBtreeNavigatorTest is a test fixture which automatically creates a
// tree to test navigation logic on. The parameter `count' defines the number of
// data edges in the test tree.
class CordRepBtreeNavigatorTest : public testing::TestWithParam<int> {
public:
using Flats = std::vector<CordRep*>;
static constexpr size_t kCharsPerFlat = 3;
CordRepBtreeNavigatorTest() {
data_ = CreateRandomString(count() * kCharsPerFlat);
flats_ = CreateFlatsFromString(data_, kCharsPerFlat);
// Turn flat 0 or 1 into a substring to cover partial reads on substrings.
if (count() > 1) {
CordRep::Unref(flats_[1]);
flats_[1] = MakeSubstring(kCharsPerFlat, kCharsPerFlat, MakeFlat(data_));
} else {
CordRep::Unref(flats_[0]);
flats_[0] = MakeSubstring(0, kCharsPerFlat, MakeFlat(data_));
}
tree_ = CordRepBtreeFromFlats(flats_);
}
~CordRepBtreeNavigatorTest() override { CordRep::Unref(tree_); }
int count() const { return GetParam(); }
CordRepBtree* tree() { return tree_; }
const std::string& data() const { return data_; }
const std::vector<CordRep*>& flats() const { return flats_; }
static std::string ToString(testing::TestParamInfo<int> param) {
return absl::StrCat(param.param, "_Flats");
}
private:
std::string data_;
Flats flats_;
CordRepBtree* tree_;
};
INSTANTIATE_TEST_SUITE_P(
WithParam, CordRepBtreeNavigatorTest,
testing::Values(1, CordRepBtree::kMaxCapacity - 1,
CordRepBtree::kMaxCapacity,
CordRepBtree::kMaxCapacity* CordRepBtree::kMaxCapacity - 1,
CordRepBtree::kMaxCapacity* CordRepBtree::kMaxCapacity,
CordRepBtree::kMaxCapacity* CordRepBtree::kMaxCapacity + 1,
CordRepBtree::kMaxCapacity* CordRepBtree::kMaxCapacity * 2 +
17),
CordRepBtreeNavigatorTest::ToString);
TEST(CordRepBtreeNavigatorTest, Uninitialized) {
CordRepBtreeNavigator nav;
EXPECT_FALSE(nav);
EXPECT_THAT(nav.btree(), Eq(nullptr));
#if defined(GTEST_HAS_DEATH_TEST) && !defined(NDEBUG)
EXPECT_DEATH(nav.Current(), ".*");
#endif
}
TEST_P(CordRepBtreeNavigatorTest, InitFirst) {
CordRepBtreeNavigator nav;
CordRep* edge = nav.InitFirst(tree());
EXPECT_TRUE(nav);
EXPECT_THAT(nav.btree(), Eq(tree()));
EXPECT_THAT(nav.Current(), Eq(flats().front()));
EXPECT_THAT(edge, Eq(flats().front()));
}
TEST_P(CordRepBtreeNavigatorTest, InitLast) {
CordRepBtreeNavigator nav;
CordRep* edge = nav.InitLast(tree());
EXPECT_TRUE(nav);
EXPECT_THAT(nav.btree(), Eq(tree()));
EXPECT_THAT(nav.Current(), Eq(flats().back()));
EXPECT_THAT(edge, Eq(flats().back()));
}
TEST_P(CordRepBtreeNavigatorTest, NextPrev) {
CordRepBtreeNavigator nav;
nav.InitFirst(tree());
const Flats& flats = this->flats();
EXPECT_THAT(nav.Previous(), Eq(nullptr));
EXPECT_THAT(nav.Current(), Eq(flats.front()));
for (int i = 1; i < flats.size(); ++i) {
ASSERT_THAT(nav.Next(), Eq(flats[i]));
EXPECT_THAT(nav.Current(), Eq(flats[i]));
}
EXPECT_THAT(nav.Next(), Eq(nullptr));
EXPECT_THAT(nav.Current(), Eq(flats.back()));
for (int i = static_cast<int>(flats.size()) - 2; i >= 0; --i) {
ASSERT_THAT(nav.Previous(), Eq(flats[i]));
EXPECT_THAT(nav.Current(), Eq(flats[i]));
}
EXPECT_THAT(nav.Previous(), Eq(nullptr));
EXPECT_THAT(nav.Current(), Eq(flats.front()));
}
TEST_P(CordRepBtreeNavigatorTest, PrevNext) {
CordRepBtreeNavigator nav;
nav.InitLast(tree());
const Flats& flats = this->flats();
EXPECT_THAT(nav.Next(), Eq(nullptr));
EXPECT_THAT(nav.Current(), Eq(flats.back()));
for (int i = static_cast<int>(flats.size()) - 2; i >= 0; --i) {
ASSERT_THAT(nav.Previous(), Eq(flats[i]));
EXPECT_THAT(nav.Current(), Eq(flats[i]));
}
EXPECT_THAT(nav.Previous(), Eq(nullptr));
EXPECT_THAT(nav.Current(), Eq(flats.front()));
for (int i = 1; i < flats.size(); ++i) {
ASSERT_THAT(nav.Next(), Eq(flats[i]));
EXPECT_THAT(nav.Current(), Eq(flats[i]));
}
EXPECT_THAT(nav.Next(), Eq(nullptr));
EXPECT_THAT(nav.Current(), Eq(flats.back()));
}
TEST(CordRepBtreeNavigatorTest, Reset) {
CordRepBtree* tree = CordRepBtree::Create(MakeFlat("abc"));
CordRepBtreeNavigator nav;
nav.InitFirst(tree);
nav.Reset();
EXPECT_FALSE(nav);
EXPECT_THAT(nav.btree(), Eq(nullptr));
#if defined(GTEST_HAS_DEATH_TEST) && !defined(NDEBUG)
EXPECT_DEATH(nav.Current(), ".*");
#endif
CordRep::Unref(tree);
}
TEST_P(CordRepBtreeNavigatorTest, Skip) {
int count = this->count();
const Flats& flats = this->flats();
CordRepBtreeNavigator nav;
nav.InitFirst(tree());
for (int char_offset = 0; char_offset < kCharsPerFlat; ++char_offset) {
Position pos = nav.Skip(char_offset);
EXPECT_THAT(pos.edge, Eq(nav.Current()));
EXPECT_THAT(pos.edge, Eq(flats[0]));
EXPECT_THAT(pos.offset, Eq(char_offset));
}
for (int index1 = 0; index1 < count; ++index1) {
for (int index2 = index1; index2 < count; ++index2) {
for (int char_offset = 0; char_offset < kCharsPerFlat; ++char_offset) {
CordRepBtreeNavigator nav;
nav.InitFirst(tree());
size_t length1 = index1 * kCharsPerFlat;
Position pos1 = nav.Skip(length1 + char_offset);
ASSERT_THAT(pos1.edge, Eq(flats[index1]));
ASSERT_THAT(pos1.edge, Eq(nav.Current()));
ASSERT_THAT(pos1.offset, Eq(char_offset));
size_t length2 = index2 * kCharsPerFlat;
Position pos2 = nav.Skip(length2 - length1 + char_offset);
ASSERT_THAT(pos2.edge, Eq(flats[index2]));
ASSERT_THAT(pos2.edge, Eq(nav.Current()));
ASSERT_THAT(pos2.offset, Eq(char_offset));
}
}
}
}
TEST_P(CordRepBtreeNavigatorTest, Seek) {
int count = this->count();
const Flats& flats = this->flats();
CordRepBtreeNavigator nav;
nav.InitFirst(tree());
for (int char_offset = 0; char_offset < kCharsPerFlat; ++char_offset) {
Position pos = nav.Seek(char_offset);
EXPECT_THAT(pos.edge, Eq(nav.Current()));
EXPECT_THAT(pos.edge, Eq(flats[0]));
EXPECT_THAT(pos.offset, Eq(char_offset));
}
for (int index = 0; index < count; ++index) {
for (int char_offset = 0; char_offset < kCharsPerFlat; ++char_offset) {
size_t offset = index * kCharsPerFlat + char_offset;
Position pos1 = nav.Seek(offset);
ASSERT_THAT(pos1.edge, Eq(flats[index]));
ASSERT_THAT(pos1.edge, Eq(nav.Current()));
ASSERT_THAT(pos1.offset, Eq(char_offset));
}
}
}
TEST(CordRepBtreeNavigatorTest, InitOffset) {
// Whitebox: InitOffset() is implemented in terms of Seek() which is
// exhaustively tested. Only test it initializes / forwards properly..
CordRepBtree* tree = CordRepBtree::Create(MakeFlat("abc"));
tree = CordRepBtree::Append(tree, MakeFlat("def"));
CordRepBtreeNavigator nav;
Position pos = nav.InitOffset(tree, 5);
EXPECT_TRUE(nav);
EXPECT_THAT(nav.btree(), Eq(tree));
EXPECT_THAT(pos.edge, Eq(tree->Edges()[1]));
EXPECT_THAT(pos.edge, Eq(nav.Current()));
EXPECT_THAT(pos.offset, Eq(2));
CordRep::Unref(tree);
}
TEST(CordRepBtreeNavigatorTest, InitOffsetAndSeekBeyondLength) {
CordRepBtree* tree1 = CordRepBtree::Create(MakeFlat("abc"));
CordRepBtree* tree2 = CordRepBtree::Create(MakeFlat("def"));
CordRepBtreeNavigator nav;
nav.InitFirst(tree1);
EXPECT_THAT(nav.Seek(3).edge, Eq(nullptr));
EXPECT_THAT(nav.Seek(100).edge, Eq(nullptr));
EXPECT_THAT(nav.btree(), Eq(tree1));
EXPECT_THAT(nav.Current(), Eq(tree1->Edges().front()));
EXPECT_THAT(nav.InitOffset(tree2, 3).edge, Eq(nullptr));
EXPECT_THAT(nav.InitOffset(tree2, 100).edge, Eq(nullptr));
EXPECT_THAT(nav.btree(), Eq(tree1));
EXPECT_THAT(nav.Current(), Eq(tree1->Edges().front()));
CordRep::Unref(tree1);
CordRep::Unref(tree2);
}
TEST_P(CordRepBtreeNavigatorTest, Read) {
const Flats& flats = this->flats();
const std::string& data = this->data();
for (size_t offset = 0; offset < data.size(); ++offset) {
for (size_t length = 1; length <= data.size() - offset; ++length) {
CordRepBtreeNavigator nav;
nav.InitFirst(tree());
// Skip towards edge holding offset
size_t edge_offset = nav.Skip(offset).offset;
// Read node
ReadResult result = nav.Read(edge_offset, length);
ASSERT_THAT(result.tree, Ne(nullptr));
EXPECT_THAT(result.tree->length, Eq(length));
if (result.tree->tag == BTREE) {
ASSERT_TRUE(CordRepBtree::IsValid(result.tree->btree()));
}
// Verify contents
std::string value = CordToString(result.tree);
EXPECT_THAT(value, Eq(data.substr(offset, length)));
// Verify 'partial last edge' reads.
size_t partial = (offset + length) % kCharsPerFlat;
ASSERT_THAT(result.n, Eq(partial));
// Verify ending position if not EOF
if (offset + length < data.size()) {
size_t index = (offset + length) / kCharsPerFlat;
EXPECT_THAT(nav.Current(), Eq(flats[index]));
}
CordRep::Unref(result.tree);
}
}
}
TEST_P(CordRepBtreeNavigatorTest, ReadBeyondLengthOfTree) {
CordRepBtreeNavigator nav;
nav.InitFirst(tree());
ReadResult result = nav.Read(2, tree()->length);
ASSERT_THAT(result.tree, Eq(nullptr));
}
} // namespace
} // namespace cord_internal
ABSL_NAMESPACE_END
} // namespace absl

View File

@@ -0,0 +1,68 @@
// Copyright 2021 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/cord_rep_btree_reader.h"
#include <cassert>
#include "absl/base/config.h"
#include "absl/strings/internal/cord_internal.h"
#include "absl/strings/internal/cord_rep_btree.h"
#include "absl/strings/internal/cord_rep_btree_navigator.h"
#include "absl/strings/internal/cord_rep_flat.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cord_internal {
absl::string_view CordRepBtreeReader::Read(size_t n, size_t chunk_size,
CordRep*& tree) {
assert(chunk_size <= navigator_.Current()->length);
// If chunk_size is non-zero, we need to start inside last returned edge.
// Else we start reading at the next data edge of the tree.
CordRep* edge = chunk_size ? navigator_.Current() : navigator_.Next();
const size_t offset = chunk_size ? edge->length - chunk_size : 0;
// Read the sub tree and verify we got what we wanted.
ReadResult result = navigator_.Read(offset, n);
tree = result.tree;
// If the data returned in `tree` was covered entirely by `chunk_size`, i.e.,
// read from the 'previous' edge, we did not consume any additional data, and
// can directly return the substring into the current data edge as the next
// chunk. We can easily establish from the above code that `navigator_.Next()`
// has not been called as that requires `chunk_size` to be zero.
if (n < chunk_size) return CordRepBtree::EdgeData(edge).substr(result.n);
// The amount of data taken from the last edge is `chunk_size` and `result.n`
// contains the offset into the current edge trailing the read data (which can
// be 0). As the call to `navigator_.Read()` could have consumed all remaining
// data, calling `navigator_.Current()` is not safe before checking if we
// already consumed all remaining data.
const size_t consumed_by_read = n - chunk_size - result.n;
if (consumed_ + consumed_by_read >= length()) {
consumed_ = length();
return {};
}
// We did not read all data, return remaining data from current edge.
edge = navigator_.Current();
consumed_ += consumed_by_read + edge->length;
return CordRepBtree::EdgeData(edge).substr(result.n);
}
} // namespace cord_internal
ABSL_NAMESPACE_END
} // namespace absl

View File

@@ -0,0 +1,219 @@
// Copyright 2021 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_READER_H_
#define ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_READER_H_
#include <cassert>
#include "absl/base/config.h"
#include "absl/strings/internal/cord_internal.h"
#include "absl/strings/internal/cord_rep_btree.h"
#include "absl/strings/internal/cord_rep_btree_navigator.h"
#include "absl/strings/internal/cord_rep_flat.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cord_internal {
// CordRepBtreeReader implements logic to iterate over cord btrees.
// References to the underlying data are returned as absl::string_view values.
// The most typical use case is a forward only iteration over tree data.
// The class also provides `Skip()`, `Seek()` and `Read()` methods similar to
// CordRepBtreeNavigator that allow more advanced navigation. The class provides
// a `consumed` property which contains the end offset of the chunk last
// returned to the user which is useful in cord iteration logic.
//
// Example: iterate over all data inside a cord btree:
//
// CordRepBtreeReader reader;
// for (string_view sv = reader.Init(tree); !sv.Empty(); sv = sv.Next()) {
// DoSomethingWithDataIn(sv);
// }
//
// All navigation methods always return the next 'chunk' of data. The class
// assumes that all data is directly 'consumed' by the caller. For example:
// invoking `Skip()` will skip the desired number of bytes, and directly
// read and return the next chunk of data directly after the skipped bytes.
//
// Example: iterate over all data inside a btree skipping the first 100 bytes:
//
// CordRepBtreeReader reader;
// absl::string_view sv = reader.Init(tree);
// if (sv.length() > 100) {
// sv.RemovePrefix(100);
// } else {
// sv = reader.Skip(100 - sv.length());
// }
// while (!sv.empty()) {
// DoSomethingWithDataIn(sv);
// absl::string_view sv = reader.Next();
// }
//
// It is important to notice that `consumed` represents the end position of the
// last data edge returned to the caller, not the cumulative data returned to
// the caller which can be less in cases of skipping or seeking over data.
//
// For example, consider a cord btree with five data edges: "abc", "def", "ghi",
// "jkl" and "mno":
//
// absl::string_view sv;
// CordRepBtreeReader reader;
//
// sv = reader.Init(tree); // sv = "abc", reader.consumed() = 3
// sv = reader.Skip(4); // sv = "hi", reader.consumed() = 9
// sv = reader.Skip(2); // sv = "l", reader.consumed() = 12
// sv = reader.Next(); // sv = "mno", reader.consumed() = 15
//
// In the above example, `reader.consumed()` reflects the data edges iterated
// over or skipped by the reader, not the amount of data 'consumed' by the
// caller.
class CordRepBtreeReader {
public:
using ReadResult = CordRepBtreeNavigator::ReadResult;
using Position = CordRepBtreeNavigator::Position;
// Returns true if this instance is not empty.
explicit operator bool() const { return navigator_.btree() != nullptr; }
// Returns the tree referenced by this instance or nullptr if empty.
CordRepBtree* btree() const { return navigator_.btree(); }
// Returns the current data edge inside the referenced btree.
// Requires that the current instance is not empty.
CordRep* node() const { return navigator_.Current(); }
// Returns the length of the referenced tree.
// Requires that the current instance is not empty.
size_t length() const;
// Returns the end offset of the last navigated to chunk, which represents the
// total bytes 'consumed' relative to the start of the tree. The returned
// value is never zero. For example, initializing a reader with a tree with a
// first data edge of 19 bytes will return `consumed() = 19`. See also the
// class comments on the meaning of `consumed`.
// Requires that the current instance is not empty.
size_t consumed() const;
// Resets this instance to an empty value.
void Reset() { navigator_.Reset(); }
// Initializes this instance with `tree`. `tree` must not be null.
// Returns a reference to the first data edge of the provided tree.
absl::string_view Init(CordRepBtree* tree);
// Navigates to and returns the next data edge of the referenced tree.
// Returns an empty string_view if an attempt is made to read beyond the end
// of the tree, i.e.: if `remaining()` is zero indicating an EOF condition.
// Requires that the current instance is not empty.
absl::string_view Next();
// Skips the provided amount of bytes and returns a reference to the data
// directly following the skipped bytes.
absl::string_view Skip(size_t skip);
// Reads `n` bytes into `tree`.
// If `chunk_size` is zero, starts reading at the next data edge. If
// `chunk_size` is non zero, the read starts at the last `chunk_size` bytes of
// the last returned data edge. Effectively, this means that the read starts
// at offset `consumed() - chunk_size`.
// Requires that `chunk_size` is less than or equal to the length of the
// last returned data edge. The purpose of `chunk_size` is to simplify code
// partially consuming a returned chunk and wanting to include the remaining
// bytes in the Read call. For example, the below code will read 1000 bytes of
// data into a cord tree if the first chunk starts with "big:":
//
// CordRepBtreeReader reader;
// absl::string_view sv = reader.Init(tree);
// if (absl::StartsWith(sv, "big:")) {
// CordRepBtree tree;
// sv = reader.Read(1000, sv.size() - 4 /* "big:" */, &tree);
// }
//
// This method will return an empty string view if all remaining data was
// read. If `n` exceeded the amount of remaining data this function will
// return an empty string view and `tree` will be set to nullptr.
// In both cases, `consumed` will be set to `length`.
absl::string_view Read(size_t n, size_t chunk_size, CordRep*& tree);
// Navigates to the chunk at offset `offset`.
// Returns a reference into the navigated to chunk, adjusted for the relative
// position of `offset` into that chunk. For example, calling `Seek(13)` on a
// cord tree containing 2 chunks of 10 and 20 bytes respectively will return
// a string view into the second chunk starting at offset 3 with a size of 17.
// Returns an empty string view if `offset` is equal to or greater than the
// length of the referenced tree.
absl::string_view Seek(size_t offset);
private:
size_t consumed_;
CordRepBtreeNavigator navigator_;
};
inline size_t CordRepBtreeReader::length() const {
assert(btree() != nullptr);
return btree()->length;
}
inline size_t CordRepBtreeReader::consumed() const {
assert(btree() != nullptr);
return consumed_;
}
inline absl::string_view CordRepBtreeReader::Init(CordRepBtree* tree) {
assert(tree != nullptr);
const CordRep* edge = navigator_.InitFirst(tree);
consumed_ = edge->length;
return CordRepBtree::EdgeData(edge);
}
inline absl::string_view CordRepBtreeReader::Next() {
assert(consumed() < length());
const CordRep* edge = navigator_.Next();
assert(edge != nullptr);
consumed_ += edge->length;
return CordRepBtree::EdgeData(edge);
}
inline absl::string_view CordRepBtreeReader::Skip(size_t skip) {
// As we are always positioned on the last 'consumed' edge, we
// need to skip the current edge as well as `skip`.
const size_t edge_length = navigator_.Current()->length;
CordRepBtreeNavigator::Position pos = navigator_.Skip(skip + edge_length);
if (ABSL_PREDICT_FALSE(pos.edge == nullptr)) {
consumed_ = length();
return {};
}
// The combined length of all edges skipped before `pos.edge` is `skip -
// pos.offset`, all of which are 'consumed', as well as the current edge.
consumed_ += skip - pos.offset + pos.edge->length;
return CordRepBtree::EdgeData(pos.edge).substr(pos.offset);
}
inline absl::string_view CordRepBtreeReader::Seek(size_t offset) {
const CordRepBtreeNavigator::Position pos = navigator_.Seek(offset);
if (ABSL_PREDICT_FALSE(pos.edge == nullptr)) {
consumed_ = length();
return {};
}
absl::string_view chunk = CordRepBtree::EdgeData(pos.edge).substr(pos.offset);
consumed_ = offset + chunk.length();
return chunk;
}
} // namespace cord_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_READER_H_

View File

@@ -0,0 +1,285 @@
// Copyright 2021 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/cord_rep_btree_reader.h"
#include <iostream>
#include <random>
#include <string>
#include <vector>
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "absl/base/config.h"
#include "absl/base/internal/raw_logging.h"
#include "absl/strings/cord.h"
#include "absl/strings/internal/cord_internal.h"
#include "absl/strings/internal/cord_rep_btree.h"
#include "absl/strings/internal/cord_rep_test_util.h"
#include "absl/strings/string_view.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cord_internal {
namespace {
using ::testing::Eq;
using ::testing::IsEmpty;
using ::testing::Ne;
using ::testing::Not;
using ::absl::cordrep_testing::CordRepBtreeFromFlats;
using ::absl::cordrep_testing::MakeFlat;
using ::absl::cordrep_testing::CordToString;
using ::absl::cordrep_testing::CreateFlatsFromString;
using ::absl::cordrep_testing::CreateRandomString;
using ReadResult = CordRepBtreeReader::ReadResult;
TEST(CordRepBtreeReaderTest, Next) {
constexpr size_t kChars = 3;
const size_t cap = CordRepBtree::kMaxCapacity;
int counts[] = {1, 2, cap, cap * cap, cap * cap + 1, cap * cap * 2 + 17};
for (int count : counts) {
std::string data = CreateRandomString(count * kChars);
std::vector<CordRep*> flats = CreateFlatsFromString(data, kChars);
CordRepBtree* node = CordRepBtreeFromFlats(flats);
CordRepBtreeReader reader;
absl::string_view chunk = reader.Init(node);
EXPECT_THAT(chunk, Eq(data.substr(0, chunk.length())));
size_t consumed = chunk.length();
EXPECT_THAT(reader.consumed(), Eq(consumed));
while (consumed < data.length()) {
chunk = reader.Next();
EXPECT_THAT(chunk, Eq(data.substr(consumed, chunk.length())));
consumed += chunk.length();
EXPECT_THAT(reader.consumed(), Eq(consumed));
}
EXPECT_THAT(consumed, Eq(data.length()));
EXPECT_THAT(reader.consumed(), Eq(data.length()));
CordRep::Unref(node);
}
}
TEST(CordRepBtreeReaderTest, Skip) {
constexpr size_t kChars = 3;
const size_t cap = CordRepBtree::kMaxCapacity;
int counts[] = {1, 2, cap, cap * cap, cap * cap + 1, cap * cap * 2 + 17};
for (int count : counts) {
std::string data = CreateRandomString(count * kChars);
std::vector<CordRep*> flats = CreateFlatsFromString(data, kChars);
CordRepBtree* node = CordRepBtreeFromFlats(flats);
for (size_t skip1 = 0; skip1 < data.length() - kChars; ++skip1) {
for (size_t skip2 = 0; skip2 < data.length() - kChars; ++skip2) {
CordRepBtreeReader reader;
absl::string_view chunk = reader.Init(node);
size_t consumed = chunk.length();
chunk = reader.Skip(skip1);
ASSERT_THAT(chunk, Eq(data.substr(consumed + skip1, chunk.length())));
consumed += chunk.length() + skip1;
ASSERT_THAT(reader.consumed(), Eq(consumed));
if (consumed >= data.length()) continue;
size_t skip = std::min(data.length() - consumed - 1, skip2);
chunk = reader.Skip(skip);
ASSERT_THAT(chunk, Eq(data.substr(consumed + skip, chunk.length())));
}
}
CordRep::Unref(node);
}
}
TEST(CordRepBtreeReaderTest, SkipBeyondLength) {
CordRepBtree* tree = CordRepBtree::Create(MakeFlat("abc"));
tree = CordRepBtree::Append(tree, MakeFlat("def"));
CordRepBtreeReader reader;
reader.Init(tree);
EXPECT_THAT(reader.Skip(100), IsEmpty());
EXPECT_THAT(reader.consumed(), Eq(6));
CordRep::Unref(tree);
}
TEST(CordRepBtreeReaderTest, Seek) {
constexpr size_t kChars = 3;
const size_t cap = CordRepBtree::kMaxCapacity;
int counts[] = {1, 2, cap, cap * cap, cap * cap + 1, cap * cap * 2 + 17};
for (int count : counts) {
std::string data = CreateRandomString(count * kChars);
std::vector<CordRep*> flats = CreateFlatsFromString(data, kChars);
CordRepBtree* node = CordRepBtreeFromFlats(flats);
for (size_t seek = 0; seek < data.length() - 1; ++seek) {
CordRepBtreeReader reader;
reader.Init(node);
absl::string_view chunk = reader.Seek(seek);
ASSERT_THAT(chunk, Not(IsEmpty()));
ASSERT_THAT(chunk, Eq(data.substr(seek, chunk.length())));
ASSERT_THAT(reader.consumed(), Eq(seek + chunk.length()));
}
CordRep::Unref(node);
}
}
TEST(CordRepBtreeReaderTest, SeekBeyondLength) {
CordRepBtree* tree = CordRepBtree::Create(MakeFlat("abc"));
tree = CordRepBtree::Append(tree, MakeFlat("def"));
CordRepBtreeReader reader;
reader.Init(tree);
EXPECT_THAT(reader.Seek(6), IsEmpty());
EXPECT_THAT(reader.consumed(), Eq(6));
EXPECT_THAT(reader.Seek(100), IsEmpty());
EXPECT_THAT(reader.consumed(), Eq(6));
CordRep::Unref(tree);
}
TEST(CordRepBtreeReaderTest, Read) {
std::string data = "abcdefghijklmno";
std::vector<CordRep*> flats = CreateFlatsFromString(data, 5);
CordRepBtree* node = CordRepBtreeFromFlats(flats);
CordRep* tree;
CordRepBtreeReader reader;
absl::string_view chunk;
// Read zero bytes
chunk = reader.Init(node);
chunk = reader.Read(0, chunk.length(), tree);
EXPECT_THAT(tree, Eq(nullptr));
EXPECT_THAT(chunk, Eq("abcde"));
EXPECT_THAT(reader.consumed(), Eq(5));
EXPECT_THAT(reader.Next(), Eq("fghij"));
// Read in full
chunk = reader.Init(node);
chunk = reader.Read(15, chunk.length(), tree);
EXPECT_THAT(tree, Ne(nullptr));
EXPECT_THAT(CordToString(tree), Eq("abcdefghijklmno"));
EXPECT_THAT(chunk, Eq(""));
EXPECT_THAT(reader.consumed(), Eq(15));
CordRep::Unref(tree);
// Read < chunk bytes
chunk = reader.Init(node);
chunk = reader.Read(3, chunk.length(), tree);
ASSERT_THAT(tree, Ne(nullptr));
EXPECT_THAT(CordToString(tree), Eq("abc"));
EXPECT_THAT(chunk, Eq("de"));
EXPECT_THAT(reader.consumed(), Eq(5));
EXPECT_THAT(reader.Next(), Eq("fghij"));
CordRep::Unref(tree);
// Read < chunk bytes at offset
chunk = reader.Init(node);
chunk = reader.Read(2, chunk.length() - 2, tree);
ASSERT_THAT(tree, Ne(nullptr));
EXPECT_THAT(CordToString(tree), Eq("cd"));
EXPECT_THAT(chunk, Eq("e"));
EXPECT_THAT(reader.consumed(), Eq(5));
EXPECT_THAT(reader.Next(), Eq("fghij"));
CordRep::Unref(tree);
// Read from consumed chunk
chunk = reader.Init(node);
chunk = reader.Read(3, 0, tree);
ASSERT_THAT(tree, Ne(nullptr));
EXPECT_THAT(CordToString(tree), Eq("fgh"));
EXPECT_THAT(chunk, Eq("ij"));
EXPECT_THAT(reader.consumed(), Eq(10));
EXPECT_THAT(reader.Next(), Eq("klmno"));
CordRep::Unref(tree);
// Read across chunks
chunk = reader.Init(node);
chunk = reader.Read(12, chunk.length() - 2, tree);
ASSERT_THAT(tree, Ne(nullptr));
EXPECT_THAT(CordToString(tree), Eq("cdefghijklmn"));
EXPECT_THAT(chunk, Eq("o"));
EXPECT_THAT(reader.consumed(), Eq(15));
CordRep::Unref(tree);
// Read across chunks landing on exact edge boundary
chunk = reader.Init(node);
chunk = reader.Read(10 - 2, chunk.length() - 2, tree);
ASSERT_THAT(tree, Ne(nullptr));
EXPECT_THAT(CordToString(tree), Eq("cdefghij"));
EXPECT_THAT(chunk, Eq("klmno"));
EXPECT_THAT(reader.consumed(), Eq(15));
CordRep::Unref(tree);
CordRep::Unref(node);
}
TEST(CordRepBtreeReaderTest, ReadExhaustive) {
constexpr size_t kChars = 3;
const size_t cap = CordRepBtree::kMaxCapacity;
int counts[] = {1, 2, cap, cap * cap + 1, cap * cap * cap * 2 + 17};
for (int count : counts) {
std::string data = CreateRandomString(count * kChars);
std::vector<CordRep*> flats = CreateFlatsFromString(data, kChars);
CordRepBtree* node = CordRepBtreeFromFlats(flats);
for (size_t read_size : {kChars - 1, kChars, kChars + 7, cap * cap}) {
CordRepBtreeReader reader;
absl::string_view chunk = reader.Init(node);
// `consumed` tracks the end of last consumed chunk which is the start of
// the next chunk: we always read with `chunk_size = chunk.length()`.
size_t consumed = 0;
size_t remaining = data.length();
while (remaining > 0) {
CordRep* tree;
size_t n = (std::min)(remaining, read_size);
chunk = reader.Read(n, chunk.length(), tree);
EXPECT_THAT(tree, Ne(nullptr));
if (tree) {
EXPECT_THAT(CordToString(tree), Eq(data.substr(consumed, n)));
CordRep::Unref(tree);
}
consumed += n;
remaining -= n;
EXPECT_THAT(reader.consumed(), Eq(consumed + chunk.length()));
if (remaining > 0) {
ASSERT_FALSE(chunk.empty());
ASSERT_THAT(chunk, Eq(data.substr(consumed, chunk.length())));
} else {
ASSERT_TRUE(chunk.empty()) << chunk;
}
}
}
CordRep::Unref(node);
}
}
} // namespace
} // namespace cord_internal
ABSL_NAMESPACE_END
} // namespace absl

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,129 @@
// Copyright 2021 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/cord_rep_consume.h"
#include <array>
#include <utility>
#include "absl/container/inlined_vector.h"
#include "absl/functional/function_ref.h"
#include "absl/strings/internal/cord_internal.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cord_internal {
namespace {
// Unrefs the provided `substring`, and returns `substring->child`
// Adds or assumes a reference on `substring->child`
CordRep* ClipSubstring(CordRepSubstring* substring) {
CordRep* child = substring->child;
if (substring->refcount.IsOne()) {
delete substring;
} else {
CordRep::Ref(child);
CordRep::Unref(substring);
}
return child;
}
// Unrefs the provided `concat`, and returns `{concat->left, concat->right}`
// Adds or assumes a reference on `concat->left` and `concat->right`.
// Returns an array of 2 elements containing the left and right nodes.
std::array<CordRep*, 2> ClipConcat(CordRepConcat* concat) {
std::array<CordRep*, 2> result{concat->left, concat->right};
if (concat->refcount.IsOne()) {
delete concat;
} else {
CordRep::Ref(result[0]);
CordRep::Ref(result[1]);
CordRep::Unref(concat);
}
return result;
}
void Consume(bool forward, CordRep* rep, ConsumeFn consume_fn) {
size_t offset = 0;
size_t length = rep->length;
struct Entry {
CordRep* rep;
size_t offset;
size_t length;
};
absl::InlinedVector<Entry, 40> stack;
for (;;) {
if (rep->tag == CONCAT) {
std::array<CordRep*, 2> res = ClipConcat(rep->concat());
CordRep* left = res[0];
CordRep* right = res[1];
if (left->length <= offset) {
// Don't need left node
offset -= left->length;
CordRep::Unref(left);
rep = right;
continue;
}
size_t length_left = left->length - offset;
if (length_left >= length) {
// Don't need right node
CordRep::Unref(right);
rep = left;
continue;
}
// Need both nodes
size_t length_right = length - length_left;
if (forward) {
stack.push_back({right, 0, length_right});
rep = left;
length = length_left;
} else {
stack.push_back({left, offset, length_left});
rep = right;
offset = 0;
length = length_right;
}
} else if (rep->tag == SUBSTRING) {
offset += rep->substring()->start;
rep = ClipSubstring(rep->substring());
} else {
consume_fn(rep, offset, length);
if (stack.empty()) return;
rep = stack.back().rep;
offset = stack.back().offset;
length = stack.back().length;
stack.pop_back();
}
}
}
} // namespace
void Consume(CordRep* rep, ConsumeFn consume_fn) {
return Consume(true, rep, std::move(consume_fn));
}
void ReverseConsume(CordRep* rep, ConsumeFn consume_fn) {
return Consume(false, rep, std::move(consume_fn));
}
} // namespace cord_internal
ABSL_NAMESPACE_END
} // namespace absl

View File

@@ -0,0 +1,50 @@
// Copyright 2021 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef ABSL_STRINGS_INTERNAL_CORD_REP_CONSUME_H_
#define ABSL_STRINGS_INTERNAL_CORD_REP_CONSUME_H_
#include <functional>
#include "absl/functional/function_ref.h"
#include "absl/strings/internal/cord_internal.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cord_internal {
// Functor for the Consume() and ReverseConsume() functions:
// void ConsumeFunc(CordRep* rep, size_t offset, size_t length);
// See the Consume() and ReverseConsume() function comments for documentation.
using ConsumeFn = FunctionRef<void(CordRep*, size_t, size_t)>;
// Consume() and ReverseConsume() consume CONCAT based trees and invoke the
// provided functor with the contained nodes in the proper forward or reverse
// order, which is used to convert CONCAT trees into other tree or cord data.
// All CONCAT and SUBSTRING nodes are processed internally. The 'offset`
// parameter of the functor is non-zero for any nodes below SUBSTRING nodes.
// It's up to the caller to form these back into SUBSTRING nodes or otherwise
// store offset / prefix information. These functions are intended to be used
// only for migration / transitional code where due to factors such as ODR
// violations, we can not 100% guarantee that all code respects 'new format'
// settings and flags, so we need to be able to parse old data on the fly until
// all old code is deprecated / no longer the default format.
void Consume(CordRep* rep, ConsumeFn consume_fn);
void ReverseConsume(CordRep* rep, ConsumeFn consume_fn);
} // namespace cord_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_CORD_REP_CONSUME_H_

View File

@@ -0,0 +1,173 @@
// Copyright 2021 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/cord_rep_consume.h"
#include <functional>
#include <utility>
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "absl/strings/internal/cord_internal.h"
#include "absl/strings/internal/cord_rep_flat.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cord_internal {
namespace {
using testing::InSequence;
using testing::MockFunction;
// Returns the depth of a node
int Depth(const CordRep* rep) {
return (rep->tag == CONCAT) ? rep->concat()->depth() : 0;
}
// Creates a concatenation of the specified nodes.
CordRepConcat* CreateConcat(CordRep* left, CordRep* right) {
auto* concat = new CordRepConcat();
concat->tag = CONCAT;
concat->left = left;
concat->right = right;
concat->length = left->length + right->length;
concat->set_depth(1 + (std::max)(Depth(left), Depth(right)));
return concat;
}
// Creates a flat with the length set to `length`
CordRepFlat* CreateFlatWithLength(size_t length) {
auto* flat = CordRepFlat::New(length);
flat->length = length;
return flat;
}
// Creates a substring node on the specified child.
CordRepSubstring* CreateSubstring(CordRep* child, size_t start, size_t length) {
auto* rep = new CordRepSubstring();
rep->length = length;
rep->tag = SUBSTRING;
rep->start = start;
rep->child = child;
return rep;
}
// Flats we use in the tests
CordRep* flat[6];
// Creates a test tree
CordRep* CreateTestTree() {
flat[0] = CreateFlatWithLength(1);
flat[1] = CreateFlatWithLength(7);
CordRepConcat* left = CreateConcat(flat[0], CreateSubstring(flat[1], 2, 4));
flat[2] = CreateFlatWithLength(9);
flat[3] = CreateFlatWithLength(13);
CordRepConcat* right1 = CreateConcat(flat[2], flat[3]);
flat[4] = CreateFlatWithLength(15);
flat[5] = CreateFlatWithLength(19);
CordRepConcat* right2 = CreateConcat(flat[4], flat[5]);
CordRepConcat* right = CreateConcat(right1, CreateSubstring(right2, 5, 17));
return CreateConcat(left, right);
}
TEST(CordRepConsumeTest, Consume) {
InSequence in_sequence;
CordRep* tree = CreateTestTree();
MockFunction<void(CordRep*, size_t, size_t)> consume;
EXPECT_CALL(consume, Call(flat[0], 0, 1));
EXPECT_CALL(consume, Call(flat[1], 2, 4));
EXPECT_CALL(consume, Call(flat[2], 0, 9));
EXPECT_CALL(consume, Call(flat[3], 0, 13));
EXPECT_CALL(consume, Call(flat[4], 5, 10));
EXPECT_CALL(consume, Call(flat[5], 0, 7));
Consume(tree, consume.AsStdFunction());
for (CordRep* rep : flat) {
EXPECT_TRUE(rep->refcount.IsOne());
CordRep::Unref(rep);
}
}
TEST(CordRepConsumeTest, ConsumeShared) {
InSequence in_sequence;
CordRep* tree = CreateTestTree();
MockFunction<void(CordRep*, size_t, size_t)> consume;
EXPECT_CALL(consume, Call(flat[0], 0, 1));
EXPECT_CALL(consume, Call(flat[1], 2, 4));
EXPECT_CALL(consume, Call(flat[2], 0, 9));
EXPECT_CALL(consume, Call(flat[3], 0, 13));
EXPECT_CALL(consume, Call(flat[4], 5, 10));
EXPECT_CALL(consume, Call(flat[5], 0, 7));
Consume(CordRep::Ref(tree), consume.AsStdFunction());
for (CordRep* rep : flat) {
EXPECT_FALSE(rep->refcount.IsOne());
CordRep::Unref(rep);
}
CordRep::Unref(tree);
}
TEST(CordRepConsumeTest, Reverse) {
InSequence in_sequence;
CordRep* tree = CreateTestTree();
MockFunction<void(CordRep*, size_t, size_t)> consume;
EXPECT_CALL(consume, Call(flat[5], 0, 7));
EXPECT_CALL(consume, Call(flat[4], 5, 10));
EXPECT_CALL(consume, Call(flat[3], 0, 13));
EXPECT_CALL(consume, Call(flat[2], 0, 9));
EXPECT_CALL(consume, Call(flat[1], 2, 4));
EXPECT_CALL(consume, Call(flat[0], 0, 1));
ReverseConsume(tree, consume.AsStdFunction());
for (CordRep* rep : flat) {
EXPECT_TRUE(rep->refcount.IsOne());
CordRep::Unref(rep);
}
}
TEST(CordRepConsumeTest, ReverseShared) {
InSequence in_sequence;
CordRep* tree = CreateTestTree();
MockFunction<void(CordRep*, size_t, size_t)> consume;
EXPECT_CALL(consume, Call(flat[5], 0, 7));
EXPECT_CALL(consume, Call(flat[4], 5, 10));
EXPECT_CALL(consume, Call(flat[3], 0, 13));
EXPECT_CALL(consume, Call(flat[2], 0, 9));
EXPECT_CALL(consume, Call(flat[1], 2, 4));
EXPECT_CALL(consume, Call(flat[0], 0, 1));
ReverseConsume(CordRep::Ref(tree), consume.AsStdFunction());
for (CordRep* rep : flat) {
EXPECT_FALSE(rep->refcount.IsOne());
CordRep::Unref(rep);
}
CordRep::Unref(tree);
}
TEST(CordRepConsumeTest, UnreachableFlat) {
InSequence in_sequence;
CordRepFlat* flat1 = CreateFlatWithLength(10);
CordRepFlat* flat2 = CreateFlatWithLength(20);
CordRepConcat* concat = CreateConcat(flat1, flat2);
CordRepSubstring* tree = CreateSubstring(concat, 15, 10);
MockFunction<void(CordRep*, size_t, size_t)> consume;
EXPECT_CALL(consume, Call(flat2, 5, 10));
Consume(tree, consume.AsStdFunction());
EXPECT_TRUE(flat2->refcount.IsOne());
CordRep::Unref(flat2);
}
} // namespace
} // namespace cord_internal
ABSL_NAMESPACE_END
} // namespace absl

View File

@@ -0,0 +1,146 @@
// Copyright 2020 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef ABSL_STRINGS_INTERNAL_CORD_REP_FLAT_H_
#define ABSL_STRINGS_INTERNAL_CORD_REP_FLAT_H_
#include <cassert>
#include <cstddef>
#include <cstdint>
#include <memory>
#include "absl/strings/internal/cord_internal.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cord_internal {
// Note: all constants below are never ODR used and internal to cord, we define
// these as static constexpr to avoid 'in struct' definition and usage clutter.
// Largest and smallest flat node lengths we are willing to allocate
// Flat allocation size is stored in tag, which currently can encode sizes up
// to 4K, encoded as multiple of either 8 or 32 bytes.
// If we allow for larger sizes, we need to change this to 8/64, 16/128, etc.
// kMinFlatSize is bounded by tag needing to be at least FLAT * 8 bytes, and
// ideally a 'nice' size aligning with allocation and cacheline sizes like 32.
// kMaxFlatSize is bounded by the size resulting in a computed tag no greater
// than MAX_FLAT_TAG. MAX_FLAT_TAG provides for additional 'high' tag values.
static constexpr size_t kFlatOverhead = offsetof(CordRep, storage);
static constexpr size_t kMinFlatSize = 32;
static constexpr size_t kMaxFlatSize = 4096;
static constexpr size_t kMaxFlatLength = kMaxFlatSize - kFlatOverhead;
static constexpr size_t kMinFlatLength = kMinFlatSize - kFlatOverhead;
constexpr uint8_t AllocatedSizeToTagUnchecked(size_t size) {
return static_cast<uint8_t>((size <= 1024) ? size / 8 + 1
: 129 + size / 32 - 1024 / 32);
}
static_assert(kMinFlatSize / 8 + 1 >= FLAT, "");
static_assert(AllocatedSizeToTagUnchecked(kMaxFlatSize) <= MAX_FLAT_TAG, "");
// Helper functions for rounded div, and rounding to exact sizes.
constexpr size_t DivUp(size_t n, size_t m) { return (n + m - 1) / m; }
constexpr size_t RoundUp(size_t n, size_t m) { return DivUp(n, m) * m; }
// Returns the size to the nearest equal or larger value that can be
// expressed exactly as a tag value.
inline size_t RoundUpForTag(size_t size) {
return RoundUp(size, (size <= 1024) ? 8 : 32);
}
// Converts the allocated size to a tag, rounding down if the size
// does not exactly match a 'tag expressible' size value. The result is
// undefined if the size exceeds the maximum size that can be encoded in
// a tag, i.e., if size is larger than TagToAllocatedSize(<max tag>).
inline uint8_t AllocatedSizeToTag(size_t size) {
const uint8_t tag = AllocatedSizeToTagUnchecked(size);
assert(tag <= MAX_FLAT_TAG);
return tag;
}
// Converts the provided tag to the corresponding allocated size
constexpr size_t TagToAllocatedSize(uint8_t tag) {
return (tag <= 129) ? ((tag - 1) * 8) : (1024 + (tag - 129) * 32);
}
// Converts the provided tag to the corresponding available data length
constexpr size_t TagToLength(uint8_t tag) {
return TagToAllocatedSize(tag) - kFlatOverhead;
}
// Enforce that kMaxFlatSize maps to a well-known exact tag value.
static_assert(TagToAllocatedSize(225) == kMaxFlatSize, "Bad tag logic");
struct CordRepFlat : public CordRep {
// Creates a new flat node.
static CordRepFlat* New(size_t len) {
if (len <= kMinFlatLength) {
len = kMinFlatLength;
} else if (len > kMaxFlatLength) {
len = kMaxFlatLength;
}
// Round size up so it matches a size we can exactly express in a tag.
const size_t size = RoundUpForTag(len + kFlatOverhead);
void* const raw_rep = ::operator new(size);
CordRepFlat* rep = new (raw_rep) CordRepFlat();
rep->tag = AllocatedSizeToTag(size);
return rep;
}
// Deletes a CordRepFlat instance created previously through a call to New().
// Flat CordReps are allocated and constructed with raw ::operator new and
// placement new, and must be destructed and deallocated accordingly.
static void Delete(CordRep*rep) {
assert(rep->tag >= FLAT && rep->tag <= MAX_FLAT_TAG);
#if defined(__cpp_sized_deallocation)
size_t size = TagToAllocatedSize(rep->tag);
rep->~CordRep();
::operator delete(rep, size);
#else
rep->~CordRep();
::operator delete(rep);
#endif
}
// Returns a pointer to the data inside this flat rep.
char* Data() { return reinterpret_cast<char*>(storage); }
const char* Data() const { return reinterpret_cast<const char*>(storage); }
// Returns the maximum capacity (payload size) of this instance.
size_t Capacity() const { return TagToLength(tag); }
// Returns the allocated size (payload + overhead) of this instance.
size_t AllocatedSize() const { return TagToAllocatedSize(tag); }
};
// Now that CordRepFlat is defined, we can define CordRep's helper casts:
inline CordRepFlat* CordRep::flat() {
assert(tag >= FLAT && tag <= MAX_FLAT_TAG);
return reinterpret_cast<CordRepFlat*>(this);
}
inline const CordRepFlat* CordRep::flat() const {
assert(tag >= FLAT && tag <= MAX_FLAT_TAG);
return reinterpret_cast<const CordRepFlat*>(this);
}
} // namespace cord_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_CORD_REP_FLAT_H_

View File

@@ -0,0 +1,771 @@
// Copyright 2020 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/cord_rep_ring.h"
#include <cassert>
#include <cstddef>
#include <cstdint>
#include <iostream>
#include <limits>
#include <memory>
#include <string>
#include "absl/base/internal/raw_logging.h"
#include "absl/base/internal/throw_delegate.h"
#include "absl/base/macros.h"
#include "absl/container/inlined_vector.h"
#include "absl/strings/internal/cord_internal.h"
#include "absl/strings/internal/cord_rep_consume.h"
#include "absl/strings/internal/cord_rep_flat.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cord_internal {
namespace {
using index_type = CordRepRing::index_type;
enum class Direction { kForward, kReversed };
inline bool IsFlatOrExternal(CordRep* rep) {
return rep->IsFlat() || rep->IsExternal();
}
// Verifies that n + extra <= kMaxCapacity: throws std::length_error otherwise.
inline void CheckCapacity(size_t n, size_t extra) {
if (ABSL_PREDICT_FALSE(extra > CordRepRing::kMaxCapacity - n)) {
base_internal::ThrowStdLengthError("Maximum capacity exceeded");
}
}
// Creates a flat from the provided string data, allocating up to `extra`
// capacity in the returned flat depending on kMaxFlatLength limitations.
// Requires `len` to be less or equal to `kMaxFlatLength`
CordRepFlat* CreateFlat(const char* s, size_t n, size_t extra = 0) { // NOLINT
assert(n <= kMaxFlatLength);
auto* rep = CordRepFlat::New(n + extra);
rep->length = n;
memcpy(rep->Data(), s, n);
return rep;
}
// Unrefs the entries in `[head, tail)`.
// Requires all entries to be a FLAT or EXTERNAL node.
void UnrefEntries(const CordRepRing* rep, index_type head, index_type tail) {
rep->ForEach(head, tail, [rep](index_type ix) {
CordRep* child = rep->entry_child(ix);
if (!child->refcount.Decrement()) {
if (child->tag >= FLAT) {
CordRepFlat::Delete(child->flat());
} else {
CordRepExternal::Delete(child->external());
}
}
});
}
} // namespace
std::ostream& operator<<(std::ostream& s, const CordRepRing& rep) {
// Note: 'pos' values are defined as size_t (for overflow reasons), but that
// prints really awkward for small prepended values such as -5. ssize_t is not
// portable (POSIX), so we use ptrdiff_t instead to cast to signed values.
s << " CordRepRing(" << &rep << ", length = " << rep.length
<< ", head = " << rep.head_ << ", tail = " << rep.tail_
<< ", cap = " << rep.capacity_ << ", rc = " << rep.refcount.Get()
<< ", begin_pos_ = " << static_cast<ptrdiff_t>(rep.begin_pos_) << ") {\n";
CordRepRing::index_type head = rep.head();
do {
CordRep* child = rep.entry_child(head);
s << " entry[" << head << "] length = " << rep.entry_length(head)
<< ", child " << child << ", clen = " << child->length
<< ", tag = " << static_cast<int>(child->tag)
<< ", rc = " << child->refcount.Get()
<< ", offset = " << rep.entry_data_offset(head)
<< ", end_pos = " << static_cast<ptrdiff_t>(rep.entry_end_pos(head))
<< "\n";
head = rep.advance(head);
} while (head != rep.tail());
return s << "}\n";
}
void CordRepRing::AddDataOffset(index_type index, size_t n) {
entry_data_offset()[index] += static_cast<offset_type>(n);
}
void CordRepRing::SubLength(index_type index, size_t n) {
entry_end_pos()[index] -= n;
}
class CordRepRing::Filler {
public:
Filler(CordRepRing* rep, index_type pos) : rep_(rep), head_(pos), pos_(pos) {}
index_type head() const { return head_; }
index_type pos() const { return pos_; }
void Add(CordRep* child, size_t offset, pos_type end_pos) {
rep_->entry_end_pos()[pos_] = end_pos;
rep_->entry_child()[pos_] = child;
rep_->entry_data_offset()[pos_] = static_cast<offset_type>(offset);
pos_ = rep_->advance(pos_);
}
private:
CordRepRing* rep_;
index_type head_;
index_type pos_;
};
constexpr size_t CordRepRing::kMaxCapacity; // NOLINT: needed for c++11
bool CordRepRing::IsValid(std::ostream& output) const {
if (capacity_ == 0) {
output << "capacity == 0";
return false;
}
if (head_ >= capacity_ || tail_ >= capacity_) {
output << "head " << head_ << " and/or tail " << tail_ << "exceed capacity "
<< capacity_;
return false;
}
const index_type back = retreat(tail_);
size_t pos_length = Distance(begin_pos_, entry_end_pos(back));
if (pos_length != length) {
output << "length " << length << " does not match positional length "
<< pos_length << " from begin_pos " << begin_pos_ << " and entry["
<< back << "].end_pos " << entry_end_pos(back);
return false;
}
index_type head = head_;
pos_type begin_pos = begin_pos_;
do {
pos_type end_pos = entry_end_pos(head);
size_t entry_length = Distance(begin_pos, end_pos);
if (entry_length == 0) {
output << "entry[" << head << "] has an invalid length " << entry_length
<< " from begin_pos " << begin_pos << " and end_pos " << end_pos;
return false;
}
CordRep* child = entry_child(head);
if (child == nullptr) {
output << "entry[" << head << "].child == nullptr";
return false;
}
if (child->tag < FLAT && child->tag != EXTERNAL) {
output << "entry[" << head << "].child has an invalid tag "
<< static_cast<int>(child->tag);
return false;
}
size_t offset = entry_data_offset(head);
if (offset >= child->length || entry_length > child->length - offset) {
output << "entry[" << head << "] has offset " << offset
<< " and entry length " << entry_length
<< " which are outside of the child's length of " << child->length;
return false;
}
begin_pos = end_pos;
head = advance(head);
} while (head != tail_);
return true;
}
#ifdef EXTRA_CORD_RING_VALIDATION
CordRepRing* CordRepRing::Validate(CordRepRing* rep, const char* file,
int line) {
if (!rep->IsValid(std::cerr)) {
std::cerr << "\nERROR: CordRepRing corrupted";
if (line) std::cerr << " at line " << line;
if (file) std::cerr << " in file " << file;
std::cerr << "\nContent = " << *rep;
abort();
}
return rep;
}
#endif // EXTRA_CORD_RING_VALIDATION
CordRepRing* CordRepRing::New(size_t capacity, size_t extra) {
CheckCapacity(capacity, extra);
size_t size = AllocSize(capacity += extra);
void* mem = ::operator new(size);
auto* rep = new (mem) CordRepRing(static_cast<index_type>(capacity));
rep->tag = RING;
rep->capacity_ = static_cast<index_type>(capacity);
rep->begin_pos_ = 0;
return rep;
}
void CordRepRing::SetCapacityForTesting(size_t capacity) {
// Adjust for the changed layout
assert(capacity <= capacity_);
assert(head() == 0 || head() < tail());
memmove(Layout::Partial(capacity).Pointer<1>(data_) + head(),
Layout::Partial(capacity_).Pointer<1>(data_) + head(),
entries() * sizeof(Layout::ElementType<1>));
memmove(Layout::Partial(capacity, capacity).Pointer<2>(data_) + head(),
Layout::Partial(capacity_, capacity_).Pointer<2>(data_) + head(),
entries() * sizeof(Layout::ElementType<2>));
capacity_ = static_cast<index_type>(capacity);
}
void CordRepRing::Delete(CordRepRing* rep) {
assert(rep != nullptr && rep->IsRing());
#if defined(__cpp_sized_deallocation)
size_t size = AllocSize(rep->capacity_);
rep->~CordRepRing();
::operator delete(rep, size);
#else
rep->~CordRepRing();
::operator delete(rep);
#endif
}
void CordRepRing::Destroy(CordRepRing* rep) {
UnrefEntries(rep, rep->head(), rep->tail());
Delete(rep);
}
template <bool ref>
void CordRepRing::Fill(const CordRepRing* src, index_type head,
index_type tail) {
this->length = src->length;
head_ = 0;
tail_ = advance(0, src->entries(head, tail));
begin_pos_ = src->begin_pos_;
// TODO(mvels): there may be opportunities here for large buffers.
auto* dst_pos = entry_end_pos();
auto* dst_child = entry_child();
auto* dst_offset = entry_data_offset();
src->ForEach(head, tail, [&](index_type index) {
*dst_pos++ = src->entry_end_pos(index);
CordRep* child = src->entry_child(index);
*dst_child++ = ref ? CordRep::Ref(child) : child;
*dst_offset++ = src->entry_data_offset(index);
});
}
CordRepRing* CordRepRing::Copy(CordRepRing* rep, index_type head,
index_type tail, size_t extra) {
CordRepRing* newrep = CordRepRing::New(rep->entries(head, tail), extra);
newrep->Fill<true>(rep, head, tail);
CordRep::Unref(rep);
return newrep;
}
CordRepRing* CordRepRing::Mutable(CordRepRing* rep, size_t extra) {
// Get current number of entries, and check for max capacity.
size_t entries = rep->entries();
if (!rep->refcount.IsOne()) {
return Copy(rep, rep->head(), rep->tail(), extra);
} else if (entries + extra > rep->capacity()) {
const size_t min_grow = rep->capacity() + rep->capacity() / 2;
const size_t min_extra = (std::max)(extra, min_grow - entries);
CordRepRing* newrep = CordRepRing::New(entries, min_extra);
newrep->Fill<false>(rep, rep->head(), rep->tail());
CordRepRing::Delete(rep);
return newrep;
} else {
return rep;
}
}
Span<char> CordRepRing::GetAppendBuffer(size_t size) {
assert(refcount.IsOne());
index_type back = retreat(tail_);
CordRep* child = entry_child(back);
if (child->tag >= FLAT && child->refcount.IsOne()) {
size_t capacity = child->flat()->Capacity();
pos_type end_pos = entry_end_pos(back);
size_t data_offset = entry_data_offset(back);
size_t entry_length = Distance(entry_begin_pos(back), end_pos);
size_t used = data_offset + entry_length;
if (size_t n = (std::min)(capacity - used, size)) {
child->length = data_offset + entry_length + n;
entry_end_pos()[back] = end_pos + n;
this->length += n;
return {child->flat()->Data() + used, n};
}
}
return {nullptr, 0};
}
Span<char> CordRepRing::GetPrependBuffer(size_t size) {
assert(refcount.IsOne());
CordRep* child = entry_child(head_);
size_t data_offset = entry_data_offset(head_);
if (data_offset && child->refcount.IsOne() && child->tag >= FLAT) {
size_t n = (std::min)(data_offset, size);
this->length += n;
begin_pos_ -= n;
data_offset -= n;
entry_data_offset()[head_] = static_cast<offset_type>(data_offset);
return {child->flat()->Data() + data_offset, n};
}
return {nullptr, 0};
}
CordRepRing* CordRepRing::CreateFromLeaf(CordRep* child, size_t offset,
size_t len, size_t extra) {
CordRepRing* rep = CordRepRing::New(1, extra);
rep->head_ = 0;
rep->tail_ = rep->advance(0);
rep->length = len;
rep->entry_end_pos()[0] = len;
rep->entry_child()[0] = child;
rep->entry_data_offset()[0] = static_cast<offset_type>(offset);
return Validate(rep);
}
CordRepRing* CordRepRing::CreateSlow(CordRep* child, size_t extra) {
CordRepRing* rep = nullptr;
Consume(child, [&](CordRep* child_arg, size_t offset, size_t len) {
if (IsFlatOrExternal(child_arg)) {
rep = rep ? AppendLeaf(rep, child_arg, offset, len)
: CreateFromLeaf(child_arg, offset, len, extra);
} else if (rep) {
rep = AddRing<AddMode::kAppend>(rep, child_arg->ring(), offset, len);
} else if (offset == 0 && child_arg->length == len) {
rep = Mutable(child_arg->ring(), extra);
} else {
rep = SubRing(child_arg->ring(), offset, len, extra);
}
});
return Validate(rep, nullptr, __LINE__);
}
CordRepRing* CordRepRing::Create(CordRep* child, size_t extra) {
size_t length = child->length;
if (IsFlatOrExternal(child)) {
return CreateFromLeaf(child, 0, length, extra);
}
if (child->IsRing()) {
return Mutable(child->ring(), extra);
}
return CreateSlow(child, extra);
}
template <CordRepRing::AddMode mode>
CordRepRing* CordRepRing::AddRing(CordRepRing* rep, CordRepRing* ring,
size_t offset, size_t len) {
assert(offset < ring->length);
constexpr bool append = mode == AddMode::kAppend;
Position head = ring->Find(offset);
Position tail = ring->FindTail(head.index, offset + len);
const index_type entries = ring->entries(head.index, tail.index);
rep = Mutable(rep, entries);
// The delta for making ring[head].end_pos into 'len - offset'
const pos_type delta_length =
(append ? rep->begin_pos_ + rep->length : rep->begin_pos_ - len) -
ring->entry_begin_pos(head.index) - head.offset;
// Start filling at `tail`, or `entries` before `head`
Filler filler(rep, append ? rep->tail_ : rep->retreat(rep->head_, entries));
if (ring->refcount.IsOne()) {
// Copy entries from source stealing the ref and adjusting the end position.
// Commit the filler as this is no-op.
ring->ForEach(head.index, tail.index, [&](index_type ix) {
filler.Add(ring->entry_child(ix), ring->entry_data_offset(ix),
ring->entry_end_pos(ix) + delta_length);
});
// Unref entries we did not copy over, and delete source.
if (head.index != ring->head_) UnrefEntries(ring, ring->head_, head.index);
if (tail.index != ring->tail_) UnrefEntries(ring, tail.index, ring->tail_);
CordRepRing::Delete(ring);
} else {
ring->ForEach(head.index, tail.index, [&](index_type ix) {
CordRep* child = ring->entry_child(ix);
filler.Add(child, ring->entry_data_offset(ix),
ring->entry_end_pos(ix) + delta_length);
CordRep::Ref(child);
});
CordRepRing::Unref(ring);
}
if (head.offset) {
// Increase offset of first 'source' entry appended or prepended.
// This is always the entry in `filler.head()`
rep->AddDataOffset(filler.head(), head.offset);
}
if (tail.offset) {
// Reduce length of last 'source' entry appended or prepended.
// This is always the entry tailed by `filler.pos()`
rep->SubLength(rep->retreat(filler.pos()), tail.offset);
}
// Commit changes
rep->length += len;
if (append) {
rep->tail_ = filler.pos();
} else {
rep->head_ = filler.head();
rep->begin_pos_ -= len;
}
return Validate(rep);
}
CordRepRing* CordRepRing::AppendSlow(CordRepRing* rep, CordRep* child) {
Consume(child, [&rep](CordRep* child_arg, size_t offset, size_t len) {
if (child_arg->IsRing()) {
rep = AddRing<AddMode::kAppend>(rep, child_arg->ring(), offset, len);
} else {
rep = AppendLeaf(rep, child_arg, offset, len);
}
});
return rep;
}
CordRepRing* CordRepRing::AppendLeaf(CordRepRing* rep, CordRep* child,
size_t offset, size_t len) {
rep = Mutable(rep, 1);
index_type back = rep->tail_;
const pos_type begin_pos = rep->begin_pos_ + rep->length;
rep->tail_ = rep->advance(rep->tail_);
rep->length += len;
rep->entry_end_pos()[back] = begin_pos + len;
rep->entry_child()[back] = child;
rep->entry_data_offset()[back] = static_cast<offset_type>(offset);
return Validate(rep, nullptr, __LINE__);
}
CordRepRing* CordRepRing::Append(CordRepRing* rep, CordRep* child) {
size_t length = child->length;
if (IsFlatOrExternal(child)) {
return AppendLeaf(rep, child, 0, length);
}
if (child->IsRing()) {
return AddRing<AddMode::kAppend>(rep, child->ring(), 0, length);
}
return AppendSlow(rep, child);
}
CordRepRing* CordRepRing::PrependSlow(CordRepRing* rep, CordRep* child) {
ReverseConsume(child, [&](CordRep* child_arg, size_t offset, size_t len) {
if (IsFlatOrExternal(child_arg)) {
rep = PrependLeaf(rep, child_arg, offset, len);
} else {
rep = AddRing<AddMode::kPrepend>(rep, child_arg->ring(), offset, len);
}
});
return Validate(rep);
}
CordRepRing* CordRepRing::PrependLeaf(CordRepRing* rep, CordRep* child,
size_t offset, size_t len) {
rep = Mutable(rep, 1);
index_type head = rep->retreat(rep->head_);
pos_type end_pos = rep->begin_pos_;
rep->head_ = head;
rep->length += len;
rep->begin_pos_ -= len;
rep->entry_end_pos()[head] = end_pos;
rep->entry_child()[head] = child;
rep->entry_data_offset()[head] = static_cast<offset_type>(offset);
return Validate(rep);
}
CordRepRing* CordRepRing::Prepend(CordRepRing* rep, CordRep* child) {
size_t length = child->length;
if (IsFlatOrExternal(child)) {
return PrependLeaf(rep, child, 0, length);
}
if (child->IsRing()) {
return AddRing<AddMode::kPrepend>(rep, child->ring(), 0, length);
}
return PrependSlow(rep, child);
}
CordRepRing* CordRepRing::Append(CordRepRing* rep, absl::string_view data,
size_t extra) {
if (rep->refcount.IsOne()) {
Span<char> avail = rep->GetAppendBuffer(data.length());
if (!avail.empty()) {
memcpy(avail.data(), data.data(), avail.length());
data.remove_prefix(avail.length());
}
}
if (data.empty()) return Validate(rep);
const size_t flats = (data.length() - 1) / kMaxFlatLength + 1;
rep = Mutable(rep, flats);
Filler filler(rep, rep->tail_);
pos_type pos = rep->begin_pos_ + rep->length;
while (data.length() >= kMaxFlatLength) {
auto* flat = CreateFlat(data.data(), kMaxFlatLength);
filler.Add(flat, 0, pos += kMaxFlatLength);
data.remove_prefix(kMaxFlatLength);
}
if (data.length()) {
auto* flat = CreateFlat(data.data(), data.length(), extra);
filler.Add(flat, 0, pos += data.length());
}
rep->length = pos - rep->begin_pos_;
rep->tail_ = filler.pos();
return Validate(rep);
}
CordRepRing* CordRepRing::Prepend(CordRepRing* rep, absl::string_view data,
size_t extra) {
if (rep->refcount.IsOne()) {
Span<char> avail = rep->GetPrependBuffer(data.length());
if (!avail.empty()) {
const char* tail = data.data() + data.length() - avail.length();
memcpy(avail.data(), tail, avail.length());
data.remove_suffix(avail.length());
}
}
if (data.empty()) return rep;
const size_t flats = (data.length() - 1) / kMaxFlatLength + 1;
rep = Mutable(rep, flats);
pos_type pos = rep->begin_pos_;
Filler filler(rep, rep->retreat(rep->head_, static_cast<index_type>(flats)));
size_t first_size = data.size() - (flats - 1) * kMaxFlatLength;
CordRepFlat* flat = CordRepFlat::New(first_size + extra);
flat->length = first_size + extra;
memcpy(flat->Data() + extra, data.data(), first_size);
data.remove_prefix(first_size);
filler.Add(flat, extra, pos);
pos -= first_size;
while (!data.empty()) {
assert(data.size() >= kMaxFlatLength);
flat = CreateFlat(data.data(), kMaxFlatLength);
filler.Add(flat, 0, pos);
pos -= kMaxFlatLength;
data.remove_prefix(kMaxFlatLength);
}
rep->head_ = filler.head();
rep->length += rep->begin_pos_ - pos;
rep->begin_pos_ = pos;
return Validate(rep);
}
// 32 entries is 32 * sizeof(pos_type) = 4 cache lines on x86
static constexpr index_type kBinarySearchThreshold = 32;
static constexpr index_type kBinarySearchEndCount = 8;
template <bool wrap>
CordRepRing::index_type CordRepRing::FindBinary(index_type head,
index_type tail,
size_t offset) const {
index_type count = tail + (wrap ? capacity_ : 0) - head;
do {
count = (count - 1) / 2;
assert(count < entries(head, tail_));
index_type mid = wrap ? advance(head, count) : head + count;
index_type after_mid = wrap ? advance(mid) : mid + 1;
bool larger = (offset >= entry_end_offset(mid));
head = larger ? after_mid : head;
tail = larger ? tail : mid;
assert(head != tail);
} while (ABSL_PREDICT_TRUE(count > kBinarySearchEndCount));
return head;
}
CordRepRing::Position CordRepRing::FindSlow(index_type head,
size_t offset) const {
index_type tail = tail_;
// Binary search until we are good for linear search
// Optimize for branchless / non wrapping ops
if (tail > head) {
index_type count = tail - head;
if (count > kBinarySearchThreshold) {
head = FindBinary<false>(head, tail, offset);
}
} else {
index_type count = capacity_ + tail - head;
if (count > kBinarySearchThreshold) {
head = FindBinary<true>(head, tail, offset);
}
}
pos_type pos = entry_begin_pos(head);
pos_type end_pos = entry_end_pos(head);
while (offset >= Distance(begin_pos_, end_pos)) {
head = advance(head);
pos = end_pos;
end_pos = entry_end_pos(head);
}
return {head, offset - Distance(begin_pos_, pos)};
}
CordRepRing::Position CordRepRing::FindTailSlow(index_type head,
size_t offset) const {
index_type tail = tail_;
const size_t tail_offset = offset - 1;
// Binary search until we are good for linear search
// Optimize for branchless / non wrapping ops
if (tail > head) {
index_type count = tail - head;
if (count > kBinarySearchThreshold) {
head = FindBinary<false>(head, tail, tail_offset);
}
} else {
index_type count = capacity_ + tail - head;
if (count > kBinarySearchThreshold) {
head = FindBinary<true>(head, tail, tail_offset);
}
}
size_t end_offset = entry_end_offset(head);
while (tail_offset >= end_offset) {
head = advance(head);
end_offset = entry_end_offset(head);
}
return {advance(head), end_offset - offset};
}
char CordRepRing::GetCharacter(size_t offset) const {
assert(offset < length);
Position pos = Find(offset);
size_t data_offset = entry_data_offset(pos.index) + pos.offset;
return GetRepData(entry_child(pos.index))[data_offset];
}
CordRepRing* CordRepRing::SubRing(CordRepRing* rep, size_t offset,
size_t len, size_t extra) {
assert(offset <= rep->length);
assert(offset <= rep->length - len);
if (len == 0) {
CordRep::Unref(rep);
return nullptr;
}
// Find position of first byte
Position head = rep->Find(offset);
Position tail = rep->FindTail(head.index, offset + len);
const size_t new_entries = rep->entries(head.index, tail.index);
if (rep->refcount.IsOne() && extra <= (rep->capacity() - new_entries)) {
// We adopt a privately owned rep and no extra entries needed.
if (head.index != rep->head_) UnrefEntries(rep, rep->head_, head.index);
if (tail.index != rep->tail_) UnrefEntries(rep, tail.index, rep->tail_);
rep->head_ = head.index;
rep->tail_ = tail.index;
} else {
// Copy subset to new rep
rep = Copy(rep, head.index, tail.index, extra);
head.index = rep->head_;
tail.index = rep->tail_;
}
// Adjust begin_pos and length
rep->length = len;
rep->begin_pos_ += offset;
// Adjust head and tail blocks
if (head.offset) {
rep->AddDataOffset(head.index, head.offset);
}
if (tail.offset) {
rep->SubLength(rep->retreat(tail.index), tail.offset);
}
return Validate(rep);
}
CordRepRing* CordRepRing::RemovePrefix(CordRepRing* rep, size_t len,
size_t extra) {
assert(len <= rep->length);
if (len == rep->length) {
CordRep::Unref(rep);
return nullptr;
}
Position head = rep->Find(len);
if (rep->refcount.IsOne()) {
if (head.index != rep->head_) UnrefEntries(rep, rep->head_, head.index);
rep->head_ = head.index;
} else {
rep = Copy(rep, head.index, rep->tail_, extra);
head.index = rep->head_;
}
// Adjust begin_pos and length
rep->length -= len;
rep->begin_pos_ += len;
// Adjust head block
if (head.offset) {
rep->AddDataOffset(head.index, head.offset);
}
return Validate(rep);
}
CordRepRing* CordRepRing::RemoveSuffix(CordRepRing* rep, size_t len,
size_t extra) {
assert(len <= rep->length);
if (len == rep->length) {
CordRep::Unref(rep);
return nullptr;
}
Position tail = rep->FindTail(rep->length - len);
if (rep->refcount.IsOne()) {
// We adopt a privately owned rep, scrub.
if (tail.index != rep->tail_) UnrefEntries(rep, tail.index, rep->tail_);
rep->tail_ = tail.index;
} else {
// Copy subset to new rep
rep = Copy(rep, rep->head_, tail.index, extra);
tail.index = rep->tail_;
}
// Adjust length
rep->length -= len;
// Adjust tail block
if (tail.offset) {
rep->SubLength(rep->retreat(tail.index), tail.offset);
}
return Validate(rep);
}
} // namespace cord_internal
ABSL_NAMESPACE_END
} // namespace absl

View File

@@ -0,0 +1,607 @@
// Copyright 2020 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef ABSL_STRINGS_INTERNAL_CORD_REP_RING_H_
#define ABSL_STRINGS_INTERNAL_CORD_REP_RING_H_
#include <cassert>
#include <cstddef>
#include <cstdint>
#include <iosfwd>
#include <limits>
#include <memory>
#include "absl/container/internal/layout.h"
#include "absl/strings/internal/cord_internal.h"
#include "absl/strings/internal/cord_rep_flat.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cord_internal {
// All operations modifying a ring buffer are implemented as static methods
// requiring a CordRepRing instance with a reference adopted by the method.
//
// The methods return the modified ring buffer, which may be equal to the input
// if the input was not shared, and having large enough capacity to accommodate
// any newly added node(s). Otherwise, a copy of the input rep with the new
// node(s) added is returned.
//
// Any modification on non shared ring buffers with enough capacity will then
// require minimum atomic operations. Caller should where possible provide
// reasonable `extra` hints for both anticipated extra `flat` byte space, as
// well as anticipated extra nodes required for complex operations.
//
// Example of code creating a ring buffer, adding some data to it,
// and discarding the buffer when done:
//
// void FunWithRings() {
// // Create ring with 3 flats
// CordRep* flat = CreateFlat("Hello");
// CordRepRing* ring = CordRepRing::Create(flat, 2);
// ring = CordRepRing::Append(ring, CreateFlat(" "));
// ring = CordRepRing::Append(ring, CreateFlat("world"));
// DoSomethingWithRing(ring);
// CordRep::Unref(ring);
// }
//
// Example of code Copying an existing ring buffer and modifying it:
//
// void MoreFunWithRings(CordRepRing* src) {
// CordRepRing* ring = CordRep::Ref(src)->ring();
// ring = CordRepRing::Append(ring, CreateFlat("Hello"));
// ring = CordRepRing::Append(ring, CreateFlat(" "));
// ring = CordRepRing::Append(ring, CreateFlat("world"));
// DoSomethingWithRing(ring);
// CordRep::Unref(ring);
// }
//
class CordRepRing : public CordRep {
public:
// `pos_type` represents a 'logical position'. A CordRepRing instance has a
// `begin_pos` (default 0), and each node inside the buffer will have an
// `end_pos` which is the `end_pos` of the previous node (or `begin_pos`) plus
// this node's length. The purpose is to allow for a binary search on this
// position, while allowing O(1) prepend and append operations.
using pos_type = size_t;
// `index_type` is the type for the `head`, `tail` and `capacity` indexes.
// Ring buffers are limited to having no more than four billion entries.
using index_type = uint32_t;
// `offset_type` is the type for the data offset inside a child rep's data.
using offset_type = uint32_t;
// Position holds the node index and relative offset into the node for
// some physical offset in the contained data as returned by the Find()
// and FindTail() methods.
struct Position {
index_type index;
size_t offset;
};
// The maximum # of child nodes that can be hosted inside a CordRepRing.
static constexpr size_t kMaxCapacity = (std::numeric_limits<uint32_t>::max)();
// CordRepring can not be default constructed, moved, copied or assigned.
CordRepRing() = delete;
CordRepRing(const CordRepRing&) = delete;
CordRepRing& operator=(const CordRepRing&) = delete;
// Returns true if this instance is valid, false if some or all of the
// invariants are broken. Intended for debug purposes only.
// `output` receives an explanation of the broken invariants.
bool IsValid(std::ostream& output) const;
// Returns the size in bytes for a CordRepRing with `capacity' entries.
static constexpr size_t AllocSize(size_t capacity);
// Returns the distance in bytes from `pos` to `end_pos`.
static constexpr size_t Distance(pos_type pos, pos_type end_pos);
// Creates a new ring buffer from the provided `rep`. Adopts a reference
// on `rep`. The returned ring buffer has a capacity of at least `extra + 1`
static CordRepRing* Create(CordRep* child, size_t extra = 0);
// `head`, `tail` and `capacity` indexes defining the ring buffer boundaries.
index_type head() const { return head_; }
index_type tail() const { return tail_; }
index_type capacity() const { return capacity_; }
// Returns the number of entries in this instance.
index_type entries() const { return entries(head_, tail_); }
// Returns the logical begin position of this instance.
pos_type begin_pos() const { return begin_pos_; }
// Returns the number of entries for a given head-tail range.
// Requires `head` and `tail` values to be less than `capacity()`.
index_type entries(index_type head, index_type tail) const {
assert(head < capacity_ && tail < capacity_);
return tail - head + ((tail > head) ? 0 : capacity_);
}
// Returns the logical end position of entry `index`.
pos_type const& entry_end_pos(index_type index) const {
assert(IsValidIndex(index));
return Layout::Partial().Pointer<0>(data_)[index];
}
// Returns the child pointer of entry `index`.
CordRep* const& entry_child(index_type index) const {
assert(IsValidIndex(index));
return Layout::Partial(capacity()).Pointer<1>(data_)[index];
}
// Returns the data offset of entry `index`
offset_type const& entry_data_offset(index_type index) const {
assert(IsValidIndex(index));
return Layout::Partial(capacity(), capacity()).Pointer<2>(data_)[index];
}
// Appends the provided child node to the `rep` instance.
// Adopts a reference from `rep` and `child` which may not be null.
// If the provided child is a FLAT or EXTERNAL node, or a SUBSTRING node
// containing a FLAT or EXTERNAL node, then flat or external the node is added
// 'as is', with an offset added for the SUBSTRING case.
// If the provided child is a RING or CONCAT tree, or a SUBSTRING of a RING or
// CONCAT tree, then all child nodes not excluded by any start offset or
// length values are added recursively.
static CordRepRing* Append(CordRepRing* rep, CordRep* child);
// Appends the provided string data to the `rep` instance.
// This function will attempt to utilize any remaining capacity in the last
// node of the input if that node is not shared (directly or indirectly), and
// of type FLAT. Remaining data will be added as one or more FLAT nodes.
// Any last node added to the ring buffer will be allocated with up to
// `extra` bytes of capacity for (anticipated) subsequent append actions.
static CordRepRing* Append(CordRepRing* rep, string_view data,
size_t extra = 0);
// Prepends the provided child node to the `rep` instance.
// Adopts a reference from `rep` and `child` which may not be null.
// If the provided child is a FLAT or EXTERNAL node, or a SUBSTRING node
// containing a FLAT or EXTERNAL node, then flat or external the node is
// prepended 'as is', with an optional offset added for the SUBSTRING case.
// If the provided child is a RING or CONCAT tree, or a SUBSTRING of a RING
// or CONCAT tree, then all child nodes not excluded by any start offset or
// length values are added recursively.
static CordRepRing* Prepend(CordRepRing* rep, CordRep* child);
// Prepends the provided string data to the `rep` instance.
// This function will attempt to utilize any remaining capacity in the first
// node of the input if that node is not shared (directly or indirectly), and
// of type FLAT. Remaining data will be added as one or more FLAT nodes.
// Any first node prepnded to the ring buffer will be allocated with up to
// `extra` bytes of capacity for (anticipated) subsequent prepend actions.
static CordRepRing* Prepend(CordRepRing* rep, string_view data,
size_t extra = 0);
// Returns a span referencing potentially unused capacity in the last node.
// The returned span may be empty if no such capacity is available, or if the
// current instance is shared. Else, a span of size `n <= size` is returned.
// If non empty, the ring buffer is adjusted to the new length, with the newly
// added capacity left uninitialized. Callers should assign a value to the
// entire span before any other operations on this instance.
Span<char> GetAppendBuffer(size_t size);
// Returns a span referencing potentially unused capacity in the first node.
// This function is identical to GetAppendBuffer except that it returns a span
// referencing up to `size` capacity directly before the existing data.
Span<char> GetPrependBuffer(size_t size);
// Returns a cord ring buffer containing `len` bytes of data starting at
// `offset`. If the input is not shared, this function will remove all head
// and tail child nodes outside of the requested range, and adjust the new
// head and tail nodes as required. If the input is shared, this function
// returns a new instance sharing some or all of the nodes from the input.
static CordRepRing* SubRing(CordRepRing* r, size_t offset, size_t len,
size_t extra = 0);
// Returns a cord ring buffer with the first `len` bytes removed.
// If the input is not shared, this function will remove all head child nodes
// fully inside the first `length` bytes, and adjust the new head as required.
// If the input is shared, this function returns a new instance sharing some
// or all of the nodes from the input.
static CordRepRing* RemoveSuffix(CordRepRing* r, size_t len,
size_t extra = 0);
// Returns a cord ring buffer with the last `len` bytes removed.
// If the input is not shared, this function will remove all head child nodes
// fully inside the first `length` bytes, and adjust the new head as required.
// If the input is shared, this function returns a new instance sharing some
// or all of the nodes from the input.
static CordRepRing* RemovePrefix(CordRepRing* r, size_t len,
size_t extra = 0);
// Returns the character at `offset`. Requires that `offset < length`.
char GetCharacter(size_t offset) const;
// Returns true if this instance manages a single contiguous buffer, in which
// case the (optional) output parameter `fragment` is set. Otherwise, the
// function returns false, and `fragment` is left unchanged.
bool IsFlat(absl::string_view* fragment) const;
// Returns true if the data starting at `offset` with length `len` is
// managed by this instance inside a single contiguous buffer, in which case
// the (optional) output parameter `fragment` is set to the contiguous memory
// starting at offset `offset` with length `length`. Otherwise, the function
// returns false, and `fragment` is left unchanged.
bool IsFlat(size_t offset, size_t len, absl::string_view* fragment) const;
// Testing only: set capacity to requested capacity.
void SetCapacityForTesting(size_t capacity);
// Returns the CordRep data pointer for the provided CordRep.
// Requires that the provided `rep` is either a FLAT or EXTERNAL CordRep.
static const char* GetLeafData(const CordRep* rep);
// Returns the CordRep data pointer for the provided CordRep.
// Requires that `rep` is either a FLAT, EXTERNAL, or SUBSTRING CordRep.
static const char* GetRepData(const CordRep* rep);
// Advances the provided position, wrapping around capacity as needed.
// Requires `index` < capacity()
inline index_type advance(index_type index) const;
// Advances the provided position by 'n`, wrapping around capacity as needed.
// Requires `index` < capacity() and `n` <= capacity.
inline index_type advance(index_type index, index_type n) const;
// Retreats the provided position, wrapping around 0 as needed.
// Requires `index` < capacity()
inline index_type retreat(index_type index) const;
// Retreats the provided position by 'n', wrapping around 0 as needed.
// Requires `index` < capacity()
inline index_type retreat(index_type index, index_type n) const;
// Returns the logical begin position of entry `index`
pos_type const& entry_begin_pos(index_type index) const {
return (index == head_) ? begin_pos_ : entry_end_pos(retreat(index));
}
// Returns the physical start offset of entry `index`
size_t entry_start_offset(index_type index) const {
return Distance(begin_pos_, entry_begin_pos(index));
}
// Returns the physical end offset of entry `index`
size_t entry_end_offset(index_type index) const {
return Distance(begin_pos_, entry_end_pos(index));
}
// Returns the data length for entry `index`
size_t entry_length(index_type index) const {
return Distance(entry_begin_pos(index), entry_end_pos(index));
}
// Returns the data for entry `index`
absl::string_view entry_data(index_type index) const;
// Returns the position for `offset` as {index, prefix}. `index` holds the
// index of the entry at the specified offset and `prefix` holds the relative
// offset inside that entry.
// Requires `offset` < length.
//
// For example we can implement GetCharacter(offset) as:
// char GetCharacter(size_t offset) {
// Position pos = this->Find(offset);
// return this->entry_data(pos.pos)[pos.offset];
// }
inline Position Find(size_t offset) const;
// Find starting at `head`
inline Position Find(index_type head, size_t offset) const;
// Returns the tail position for `offset` as {tail index, suffix}.
// `tail index` holds holds the index of the entry holding the offset directly
// before 'offset` advanced by one. 'suffix` holds the relative offset from
// that relative offset in the entry to the end of the entry.
// For example, FindTail(length) will return {tail(), 0}, FindTail(length - 5)
// will return {retreat(tail), 5)} provided the preceding entry contains at
// least 5 bytes of data.
// Requires offset >= 1 && offset <= length.
//
// This function is very useful in functions that need to clip the end of some
// ring buffer such as 'RemovePrefix'.
// For example, we could implement RemovePrefix for non shared instances as:
// void RemoveSuffix(size_t n) {
// Position pos = FindTail(length - n);
// UnrefEntries(pos.pos, this->tail_);
// this->tail_ = pos.pos;
// entry(retreat(pos.pos)).end_pos -= pos.offset;
// }
inline Position FindTail(size_t offset) const;
// Find tail starting at `head`
inline Position FindTail(index_type head, size_t offset) const;
// Invokes f(index_type index) for each entry inside the range [head, tail>
template <typename F>
void ForEach(index_type head, index_type tail, F&& f) const {
index_type n1 = (tail > head) ? tail : capacity_;
for (index_type i = head; i < n1; ++i) f(i);
if (tail <= head) {
for (index_type i = 0; i < tail; ++i) f(i);
}
}
// Invokes f(index_type index) for each entry inside this instance.
template <typename F>
void ForEach(F&& f) const {
ForEach(head_, tail_, std::forward<F>(f));
}
// Dump this instance's data tp stream `s` in human readable format, excluding
// the actual data content itself. Intended for debug purposes only.
friend std::ostream& operator<<(std::ostream& s, const CordRepRing& rep);
private:
enum class AddMode { kAppend, kPrepend };
using Layout = container_internal::Layout<pos_type, CordRep*, offset_type>;
class Filler;
class Transaction;
class CreateTransaction;
static constexpr size_t kLayoutAlignment = Layout::Partial().Alignment();
// Creates a new CordRepRing.
explicit CordRepRing(index_type capacity) : capacity_(capacity) {}
// Returns true if `index` is a valid index into this instance.
bool IsValidIndex(index_type index) const;
// Debug use only: validates the provided CordRepRing invariants.
// Verification of all CordRepRing methods can be enabled by defining
// EXTRA_CORD_RING_VALIDATION, i.e.: `--copts=-DEXTRA_CORD_RING_VALIDATION`
// Verification is VERY expensive, so only do it for debugging purposes.
static CordRepRing* Validate(CordRepRing* rep, const char* file = nullptr,
int line = 0);
// Allocates a CordRepRing large enough to hold `capacity + extra' entries.
// The returned capacity may be larger if the allocated memory allows for it.
// The maximum capacity of a CordRepRing is capped at kMaxCapacity.
// Throws `std::length_error` if `capacity + extra' exceeds kMaxCapacity.
static CordRepRing* New(size_t capacity, size_t extra);
// Deallocates (but does not destroy) the provided ring buffer.
static void Delete(CordRepRing* rep);
// Destroys the provided ring buffer, decrementing the reference count of all
// contained child CordReps. The provided 1\`rep` should have a ref count of
// one (pre decrement destroy call observing `refcount.IsOne()`) or zero (post
// decrement destroy call observing `!refcount.Decrement()`).
static void Destroy(CordRepRing* rep);
// Returns a mutable reference to the logical end position array.
pos_type* entry_end_pos() {
return Layout::Partial().Pointer<0>(data_);
}
// Returns a mutable reference to the child pointer array.
CordRep** entry_child() {
return Layout::Partial(capacity()).Pointer<1>(data_);
}
// Returns a mutable reference to the data offset array.
offset_type* entry_data_offset() {
return Layout::Partial(capacity(), capacity()).Pointer<2>(data_);
}
// Find implementations for the non fast path 0 / length cases.
Position FindSlow(index_type head, size_t offset) const;
Position FindTailSlow(index_type head, size_t offset) const;
// Finds the index of the first node that is inside a reasonable distance
// of the node at `offset` from which we can continue with a linear search.
template <bool wrap>
index_type FindBinary(index_type head, index_type tail, size_t offset) const;
// Fills the current (initialized) instance from the provided source, copying
// entries [head, tail). Adds a reference to copied entries if `ref` is true.
template <bool ref>
void Fill(const CordRepRing* src, index_type head, index_type tail);
// Create a copy of 'rep', copying all entries [head, tail), allocating room
// for `extra` entries. Adds a reference on all copied entries.
static CordRepRing* Copy(CordRepRing* rep, index_type head, index_type tail,
size_t extra = 0);
// Returns a Mutable CordRepRing reference from `rep` with room for at least
// `extra` additional nodes. Adopts a reference count from `rep`.
// This function will return `rep` if, and only if:
// - rep.entries + extra <= rep.capacity
// - rep.refcount == 1
// Otherwise, this function will create a new copy of `rep` with additional
// capacity to satisfy `extra` extra nodes, and unref the old `rep` instance.
//
// If a new CordRepRing can not be allocated, or the new capacity would exceed
// the maxmimum capacity, then the input is consumed only, and an exception is
// thrown.
static CordRepRing* Mutable(CordRepRing* rep, size_t extra);
// Slow path for Append(CordRepRing* rep, CordRep* child). This function is
// exercised if the provided `child` in Append() is not a leaf node, i.e., a
// ring buffer or old (concat) cord tree.
static CordRepRing* AppendSlow(CordRepRing* rep, CordRep* child);
// Appends the provided leaf node. Requires `child` to be FLAT or EXTERNAL.
static CordRepRing* AppendLeaf(CordRepRing* rep, CordRep* child,
size_t offset, size_t length);
// Prepends the provided leaf node. Requires `child` to be FLAT or EXTERNAL.
static CordRepRing* PrependLeaf(CordRepRing* rep, CordRep* child,
size_t offset, size_t length);
// Slow path for Prepend(CordRepRing* rep, CordRep* child). This function is
// exercised if the provided `child` in Prepend() is not a leaf node, i.e., a
// ring buffer or old (concat) cord tree.
static CordRepRing* PrependSlow(CordRepRing* rep, CordRep* child);
// Slow path for Create(CordRep* child, size_t extra). This function is
// exercised if the provided `child` in Prepend() is not a leaf node, i.e., a
// ring buffer or old (concat) cord tree.
static CordRepRing* CreateSlow(CordRep* child, size_t extra);
// Creates a new ring buffer from the provided `child` leaf node. Requires
// `child` to be FLAT or EXTERNAL. on `rep`.
// The returned ring buffer has a capacity of at least `1 + extra`
static CordRepRing* CreateFromLeaf(CordRep* child, size_t offset,
size_t length, size_t extra);
// Appends or prepends (depending on AddMode) the ring buffer in `ring' to
// `rep` starting at `offset` with length `len`.
template <AddMode mode>
static CordRepRing* AddRing(CordRepRing* rep, CordRepRing* ring,
size_t offset, size_t len);
// Increases the data offset for entry `index` by `n`.
void AddDataOffset(index_type index, size_t n);
// Descreases the length for entry `index` by `n`.
void SubLength(index_type index, size_t n);
index_type head_;
index_type tail_;
index_type capacity_;
pos_type begin_pos_;
alignas(kLayoutAlignment) char data_[kLayoutAlignment];
friend struct CordRep;
};
constexpr size_t CordRepRing::AllocSize(size_t capacity) {
return sizeof(CordRepRing) - sizeof(data_) +
Layout(capacity, capacity, capacity).AllocSize();
}
inline constexpr size_t CordRepRing::Distance(pos_type pos, pos_type end_pos) {
return (end_pos - pos);
}
inline const char* CordRepRing::GetLeafData(const CordRep* rep) {
return rep->tag != EXTERNAL ? rep->flat()->Data() : rep->external()->base;
}
inline const char* CordRepRing::GetRepData(const CordRep* rep) {
if (rep->tag >= FLAT) return rep->flat()->Data();
if (rep->tag == EXTERNAL) return rep->external()->base;
return GetLeafData(rep->substring()->child) + rep->substring()->start;
}
inline CordRepRing::index_type CordRepRing::advance(index_type index) const {
assert(index < capacity_);
return ++index == capacity_ ? 0 : index;
}
inline CordRepRing::index_type CordRepRing::advance(index_type index,
index_type n) const {
assert(index < capacity_ && n <= capacity_);
return (index += n) >= capacity_ ? index - capacity_ : index;
}
inline CordRepRing::index_type CordRepRing::retreat(index_type index) const {
assert(index < capacity_);
return (index > 0 ? index : capacity_) - 1;
}
inline CordRepRing::index_type CordRepRing::retreat(index_type index,
index_type n) const {
assert(index < capacity_ && n <= capacity_);
return index >= n ? index - n : capacity_ - n + index;
}
inline absl::string_view CordRepRing::entry_data(index_type index) const {
size_t data_offset = entry_data_offset(index);
return {GetRepData(entry_child(index)) + data_offset, entry_length(index)};
}
inline bool CordRepRing::IsValidIndex(index_type index) const {
if (index >= capacity_) return false;
return (tail_ > head_) ? (index >= head_ && index < tail_)
: (index >= head_ || index < tail_);
}
#ifndef EXTRA_CORD_RING_VALIDATION
inline CordRepRing* CordRepRing::Validate(CordRepRing* rep,
const char* /*file*/, int /*line*/) {
return rep;
}
#endif
inline CordRepRing::Position CordRepRing::Find(size_t offset) const {
assert(offset < length);
return (offset == 0) ? Position{head_, 0} : FindSlow(head_, offset);
}
inline CordRepRing::Position CordRepRing::Find(index_type head,
size_t offset) const {
assert(offset < length);
assert(IsValidIndex(head) && offset >= entry_start_offset(head));
return (offset == 0) ? Position{head_, 0} : FindSlow(head, offset);
}
inline CordRepRing::Position CordRepRing::FindTail(size_t offset) const {
assert(offset > 0 && offset <= length);
return (offset == length) ? Position{tail_, 0} : FindTailSlow(head_, offset);
}
inline CordRepRing::Position CordRepRing::FindTail(index_type head,
size_t offset) const {
assert(offset > 0 && offset <= length);
assert(IsValidIndex(head) && offset >= entry_start_offset(head) + 1);
return (offset == length) ? Position{tail_, 0} : FindTailSlow(head, offset);
}
// Now that CordRepRing is defined, we can define CordRep's helper casts:
inline CordRepRing* CordRep::ring() {
assert(IsRing());
return static_cast<CordRepRing*>(this);
}
inline const CordRepRing* CordRep::ring() const {
assert(IsRing());
return static_cast<const CordRepRing*>(this);
}
inline bool CordRepRing::IsFlat(absl::string_view* fragment) const {
if (entries() == 1) {
if (fragment) *fragment = entry_data(head());
return true;
}
return false;
}
inline bool CordRepRing::IsFlat(size_t offset, size_t len,
absl::string_view* fragment) const {
const Position pos = Find(offset);
const absl::string_view data = entry_data(pos.index);
if (data.length() >= len && data.length() - len >= pos.offset) {
if (fragment) *fragment = data.substr(pos.offset, len);
return true;
}
return false;
}
std::ostream& operator<<(std::ostream& s, const CordRepRing& rep);
} // namespace cord_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_CORD_REP_RING_H_

View File

@@ -0,0 +1,118 @@
// Copyright 2021 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef ABSL_STRINGS_INTERNAL_CORD_REP_RING_READER_H_
#define ABSL_STRINGS_INTERNAL_CORD_REP_RING_READER_H_
#include <cassert>
#include <cstddef>
#include <cstdint>
#include "absl/strings/internal/cord_internal.h"
#include "absl/strings/internal/cord_rep_ring.h"
#include "absl/strings/string_view.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cord_internal {
// CordRepRingReader provides basic navigation over CordRepRing data.
class CordRepRingReader {
public:
// Returns true if this instance is not empty.
explicit operator bool() const { return ring_ != nullptr; }
// Returns the ring buffer reference for this instance, or nullptr if empty.
CordRepRing* ring() const { return ring_; }
// Returns the current node index inside the ring buffer for this instance.
// The returned value is undefined if this instance is empty.
CordRepRing::index_type index() const { return index_; }
// Returns the current node inside the ring buffer for this instance.
// The returned value is undefined if this instance is empty.
CordRep* node() const { return ring_->entry_child(index_); }
// Returns the length of the referenced ring buffer.
// Requires the current instance to be non empty.
size_t length() const {
assert(ring_);
return ring_->length;
}
// Returns the end offset of the last navigated-to chunk, which represents the
// total bytes 'consumed' relative to the start of the ring. The returned
// value is never zero. For example, initializing a reader with a ring buffer
// with a first chunk of 19 bytes will return consumed() = 19.
// Requires the current instance to be non empty.
size_t consumed() const {
assert(ring_);
return ring_->entry_end_offset(index_);
}
// Returns the number of bytes remaining beyond the last navigated-to chunk.
// Requires the current instance to be non empty.
size_t remaining() const {
assert(ring_);
return length() - consumed();
}
// Resets this instance to an empty value
void Reset() { ring_ = nullptr; }
// Resets this instance to the start of `ring`. `ring` must not be null.
// Returns a reference into the first chunk of the provided ring.
absl::string_view Reset(CordRepRing* ring) {
assert(ring);
ring_ = ring;
index_ = ring_->head();
return ring_->entry_data(index_);
}
// Navigates to the next chunk inside the reference ring buffer.
// Returns a reference into the navigated-to chunk.
// Requires remaining() to be non zero.
absl::string_view Next() {
assert(remaining());
index_ = ring_->advance(index_);
return ring_->entry_data(index_);
}
// Navigates to the chunk at offset `offset`.
// Returns a reference into the navigated-to chunk, adjusted for the relative
// position of `offset` into that chunk. For example, calling Seek(13) on a
// ring buffer containing 2 chunks of 10 and 20 bytes respectively will return
// a string view into the second chunk starting at offset 3 with a size of 17.
// Requires `offset` to be less than `length()`
absl::string_view Seek(size_t offset) {
assert(offset < length());
size_t current = ring_->entry_end_offset(index_);
CordRepRing::index_type hint = (offset >= current) ? index_ : ring_->head();
const CordRepRing::Position head = ring_->Find(hint, offset);
index_ = head.index;
auto data = ring_->entry_data(head.index);
data.remove_prefix(head.offset);
return data;
}
private:
CordRepRing* ring_ = nullptr;
CordRepRing::index_type index_;
};
} // namespace cord_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_CORD_REP_RING_READER_H_

View File

@@ -0,0 +1,185 @@
// Copyright 2021 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef ABSL_STRINGS_INTERNAL_CORD_REP_TEST_UTIL_H_
#define ABSL_STRINGS_INTERNAL_CORD_REP_TEST_UTIL_H_
#include <cassert>
#include <memory>
#include <random>
#include <string>
#include <vector>
#include "absl/base/config.h"
#include "absl/base/internal/raw_logging.h"
#include "absl/strings/internal/cord_internal.h"
#include "absl/strings/internal/cord_rep_btree.h"
#include "absl/strings/internal/cord_rep_flat.h"
#include "absl/strings/string_view.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cordrep_testing {
inline cord_internal::CordRepSubstring* MakeSubstring(
size_t start, size_t len, cord_internal::CordRep* rep) {
auto* sub = new cord_internal::CordRepSubstring;
sub->tag = cord_internal::SUBSTRING;
sub->start = start;
sub->length = len <= 0 ? rep->length - start + len : len;
sub->child = rep;
return sub;
}
inline cord_internal::CordRepConcat* MakeConcat(cord_internal::CordRep* left,
cord_internal::CordRep* right,
int depth = 0) {
auto* concat = new cord_internal::CordRepConcat;
concat->tag = cord_internal::CONCAT;
concat->length = left->length + right->length;
concat->left = left;
concat->right = right;
concat->set_depth(depth);
return concat;
}
inline cord_internal::CordRepFlat* MakeFlat(absl::string_view value) {
assert(value.length() <= cord_internal::kMaxFlatLength);
auto* flat = cord_internal::CordRepFlat::New(value.length());
flat->length = value.length();
memcpy(flat->Data(), value.data(), value.length());
return flat;
}
// Creates an external node for testing
inline cord_internal::CordRepExternal* MakeExternal(absl::string_view s) {
struct Rep : public cord_internal::CordRepExternal {
std::string s;
explicit Rep(absl::string_view sv) : s(sv) {
this->tag = cord_internal::EXTERNAL;
this->base = s.data();
this->length = s.length();
this->releaser_invoker = [](cord_internal::CordRepExternal* self) {
delete static_cast<Rep*>(self);
};
}
};
return new Rep(s);
}
inline std::string CreateRandomString(size_t n) {
absl::string_view data =
"abcdefghijklmnopqrstuvwxyz"
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
"0123456789~!@#$%^&*()_+=-<>?:\"{}[]|";
std::minstd_rand rnd;
std::uniform_int_distribution<size_t> dist(0, data.size() - 1);
std::string s(n, ' ');
for (size_t i = 0; i < n; ++i) {
s[i] = data[dist(rnd)];
}
return s;
}
// Creates an array of flats from the provided string, chopping
// the provided string up into flats of size `chunk_size` characters
// resulting in roughly `data.size() / chunk_size` total flats.
inline std::vector<cord_internal::CordRep*> CreateFlatsFromString(
absl::string_view data, size_t chunk_size) {
assert(chunk_size > 0);
std::vector<cord_internal::CordRep*> flats;
for (absl::string_view s = data; !s.empty(); s.remove_prefix(chunk_size)) {
flats.push_back(MakeFlat(s.substr(0, chunk_size)));
}
return flats;
}
inline cord_internal::CordRepBtree* CordRepBtreeFromFlats(
absl::Span<cord_internal::CordRep* const> flats) {
assert(!flats.empty());
auto* node = cord_internal::CordRepBtree::Create(flats[0]);
for (size_t i = 1; i < flats.size(); ++i) {
node = cord_internal::CordRepBtree::Append(node, flats[i]);
}
return node;
}
inline void CordToString(cord_internal::CordRep* rep, std::string& s) {
size_t offset = 0;
size_t length = rep->length;
while (rep->tag == cord_internal::SUBSTRING) {
offset += rep->substring()->start;
rep = rep->substring()->child;
}
if (rep->tag == cord_internal::BTREE) {
for (cord_internal::CordRep* edge : rep->btree()->Edges()) {
CordToString(edge, s);
}
} else if (rep->tag >= cord_internal::FLAT) {
s.append(rep->flat()->Data() + offset, length);
} else if (rep->tag == cord_internal::EXTERNAL) {
s.append(rep->external()->base + offset, length);
} else {
ABSL_RAW_LOG(FATAL, "Unsupported tag %d", rep->tag);
}
}
inline std::string CordToString(cord_internal::CordRep* rep) {
std::string s;
s.reserve(rep->length);
CordToString(rep, s);
return s;
}
// RAII Helper class to automatically unref reps on destruction.
class AutoUnref {
public:
~AutoUnref() {
for (CordRep* rep : unrefs_) CordRep::Unref(rep);
}
// Adds `rep` to the list of reps to be unreffed at destruction.
template <typename CordRepType>
CordRepType* Add(CordRepType* rep) {
unrefs_.push_back(rep);
return rep;
}
// Increments the reference count of `rep` by one, and adds it to
// the list of reps to be unreffed at destruction.
template <typename CordRepType>
CordRepType* Ref(CordRepType* rep) {
unrefs_.push_back(CordRep::Ref(rep));
return rep;
}
// Increments the reference count of `rep` by one if `condition` is true,
// and adds it to the list of reps to be unreffed at destruction.
template <typename CordRepType>
CordRepType* RefIf(bool condition, CordRepType* rep) {
if (condition) unrefs_.push_back(CordRep::Ref(rep));
return rep;
}
private:
using CordRep = absl::cord_internal::CordRep;
std::vector<CordRep*> unrefs_;
};
} // namespace cordrep_testing
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_CORD_REP_TEST_UTIL_H_

View File

@@ -0,0 +1,96 @@
// Copyright 2019 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/cordz_functions.h"
#include <atomic>
#include <cmath>
#include <limits>
#include <random>
#include "absl/base/attributes.h"
#include "absl/base/config.h"
#include "absl/base/internal/exponential_biased.h"
#include "absl/base/internal/raw_logging.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cord_internal {
namespace {
// The average interval until the next sample. A value of 0 disables profiling
// while a value of 1 will profile all Cords.
std::atomic<int> g_cordz_mean_interval(50000);
} // namespace
#ifdef ABSL_INTERNAL_CORDZ_ENABLED
// Special negative 'not initialized' per thread value for cordz_next_sample.
static constexpr int64_t kInitCordzNextSample = -1;
ABSL_CONST_INIT thread_local int64_t cordz_next_sample = kInitCordzNextSample;
// kIntervalIfDisabled is the number of profile-eligible events need to occur
// before the code will confirm that cordz is still disabled.
constexpr int64_t kIntervalIfDisabled = 1 << 16;
ABSL_ATTRIBUTE_NOINLINE bool cordz_should_profile_slow() {
thread_local absl::base_internal::ExponentialBiased
exponential_biased_generator;
int32_t mean_interval = get_cordz_mean_interval();
// Check if we disabled profiling. If so, set the next sample to a "large"
// number to minimize the overhead of the should_profile codepath.
if (mean_interval <= 0) {
cordz_next_sample = kIntervalIfDisabled;
return false;
}
// Check if we're always sampling.
if (mean_interval == 1) {
cordz_next_sample = 1;
return true;
}
if (cordz_next_sample <= 0) {
// If first check on current thread, check cordz_should_profile()
// again using the created (initial) stride in cordz_next_sample.
const bool initialized = cordz_next_sample != kInitCordzNextSample;
cordz_next_sample = exponential_biased_generator.GetStride(mean_interval);
return initialized || cordz_should_profile();
}
--cordz_next_sample;
return false;
}
void cordz_set_next_sample_for_testing(int64_t next_sample) {
cordz_next_sample = next_sample;
}
#endif // ABSL_INTERNAL_CORDZ_ENABLED
int32_t get_cordz_mean_interval() {
return g_cordz_mean_interval.load(std::memory_order_acquire);
}
void set_cordz_mean_interval(int32_t mean_interval) {
g_cordz_mean_interval.store(mean_interval, std::memory_order_release);
}
} // namespace cord_internal
ABSL_NAMESPACE_END
} // namespace absl

View File

@@ -0,0 +1,85 @@
// Copyright 2019 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef ABSL_STRINGS_CORDZ_FUNCTIONS_H_
#define ABSL_STRINGS_CORDZ_FUNCTIONS_H_
#include <stdint.h>
#include "absl/base/attributes.h"
#include "absl/base/config.h"
#include "absl/base/optimization.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cord_internal {
// Returns the current sample rate. This represents the average interval
// between samples.
int32_t get_cordz_mean_interval();
// Sets the sample rate with the average interval between samples.
void set_cordz_mean_interval(int32_t mean_interval);
// Enable cordz unless any of the following applies:
// - no thread local support
// - MSVC build
// - Android build
// - Apple build
// - DLL build
// Hashtablez is turned off completely in opensource builds.
// MSVC's static atomics are dynamically initialized in debug mode, which breaks
// sampling.
#if defined(ABSL_HAVE_THREAD_LOCAL) && !defined(_MSC_VER) && \
!defined(ABSL_BUILD_DLL) && !defined(ABSL_CONSUME_DLL) && \
!defined(__ANDROID__) && !defined(__APPLE__)
#define ABSL_INTERNAL_CORDZ_ENABLED 1
#endif
#ifdef ABSL_INTERNAL_CORDZ_ENABLED
// cordz_next_sample is the number of events until the next sample event. If
// the value is 1 or less, the code will check on the next event if cordz is
// enabled, and if so, will sample the Cord. cordz is only enabled when we can
// use thread locals.
ABSL_CONST_INIT extern thread_local int64_t cordz_next_sample;
// Determines if the next sample should be profiled. If it is, the value pointed
// at by next_sample will be set with the interval until the next sample.
bool cordz_should_profile_slow();
// Returns true if the next cord should be sampled.
inline bool cordz_should_profile() {
if (ABSL_PREDICT_TRUE(cordz_next_sample > 1)) {
cordz_next_sample--;
return false;
}
return cordz_should_profile_slow();
}
// Sets the interval until the next sample (for testing only)
void cordz_set_next_sample_for_testing(int64_t next_sample);
#else // ABSL_INTERNAL_CORDZ_ENABLED
inline bool cordz_should_profile() { return false; }
inline void cordz_set_next_sample_for_testing(int64_t) {}
#endif // ABSL_INTERNAL_CORDZ_ENABLED
} // namespace cord_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_CORDZ_FUNCTIONS_H_

View File

@@ -0,0 +1,149 @@
// Copyright 2019 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/cordz_functions.h"
#include <thread> // NOLINT we need real clean new threads
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "absl/base/config.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cord_internal {
namespace {
using ::testing::Eq;
using ::testing::Ge;
using ::testing::Le;
TEST(CordzFunctionsTest, SampleRate) {
int32_t orig_sample_rate = get_cordz_mean_interval();
int32_t expected_sample_rate = 123;
set_cordz_mean_interval(expected_sample_rate);
EXPECT_THAT(get_cordz_mean_interval(), Eq(expected_sample_rate));
set_cordz_mean_interval(orig_sample_rate);
}
// Cordz is disabled when we don't have thread_local. All calls to
// should_profile will return false when cordz is diabled, so we might want to
// avoid those tests.
#ifdef ABSL_INTERNAL_CORDZ_ENABLED
TEST(CordzFunctionsTest, ShouldProfileDisable) {
int32_t orig_sample_rate = get_cordz_mean_interval();
set_cordz_mean_interval(0);
cordz_set_next_sample_for_testing(0);
EXPECT_FALSE(cordz_should_profile());
// 1 << 16 is from kIntervalIfDisabled in cordz_functions.cc.
EXPECT_THAT(cordz_next_sample, Eq(1 << 16));
set_cordz_mean_interval(orig_sample_rate);
}
TEST(CordzFunctionsTest, ShouldProfileAlways) {
int32_t orig_sample_rate = get_cordz_mean_interval();
set_cordz_mean_interval(1);
cordz_set_next_sample_for_testing(1);
EXPECT_TRUE(cordz_should_profile());
EXPECT_THAT(cordz_next_sample, Le(1));
set_cordz_mean_interval(orig_sample_rate);
}
TEST(CordzFunctionsTest, DoesNotAlwaysSampleFirstCord) {
// Set large enough interval such that the chance of 'tons' of threads
// randomly sampling the first call is infinitely small.
set_cordz_mean_interval(10000);
int tries = 0;
bool sampled = false;
do {
++tries;
ASSERT_THAT(tries, Le(1000));
std::thread thread([&sampled] {
sampled = cordz_should_profile();
});
thread.join();
} while (sampled);
}
TEST(CordzFunctionsTest, ShouldProfileRate) {
static constexpr int kDesiredMeanInterval = 1000;
static constexpr int kSamples = 10000;
int32_t orig_sample_rate = get_cordz_mean_interval();
set_cordz_mean_interval(kDesiredMeanInterval);
int64_t sum_of_intervals = 0;
for (int i = 0; i < kSamples; i++) {
// Setting next_sample to 0 will force cordz_should_profile to generate a
// new value for next_sample each iteration.
cordz_set_next_sample_for_testing(0);
cordz_should_profile();
sum_of_intervals += cordz_next_sample;
}
// The sum of independent exponential variables is an Erlang distribution,
// which is a gamma distribution where the shape parameter is equal to the
// number of summands. The distribution used for cordz_should_profile is
// actually floor(Exponential(1/mean)) which introduces bias. However, we can
// apply the squint-really-hard correction factor. That is, when mean is
// large, then if we squint really hard the shape of the distribution between
// N and N+1 looks like a uniform distribution. On average, each value for
// next_sample will be about 0.5 lower than we would expect from an
// exponential distribution. This squint-really-hard correction approach won't
// work when mean is smaller than about 10 but works fine when mean is 1000.
//
// We can use R to calculate a confidence interval. This
// shows how to generate a confidence interval with a false positive rate of
// one in a billion.
//
// $ R -q
// > mean = 1000
// > kSamples = 10000
// > errorRate = 1e-9
// > correction = -kSamples / 2
// > low = qgamma(errorRate/2, kSamples, 1/mean) + correction
// > high = qgamma(1 - errorRate/2, kSamples, 1/mean) + correction
// > low
// [1] 9396115
// > high
// [1] 10618100
EXPECT_THAT(sum_of_intervals, Ge(9396115));
EXPECT_THAT(sum_of_intervals, Le(10618100));
set_cordz_mean_interval(orig_sample_rate);
}
#else // ABSL_INTERNAL_CORDZ_ENABLED
TEST(CordzFunctionsTest, ShouldProfileDisabled) {
int32_t orig_sample_rate = get_cordz_mean_interval();
set_cordz_mean_interval(1);
cordz_set_next_sample_for_testing(0);
EXPECT_FALSE(cordz_should_profile());
set_cordz_mean_interval(orig_sample_rate);
}
#endif // ABSL_INTERNAL_CORDZ_ENABLED
} // namespace
} // namespace cord_internal
ABSL_NAMESPACE_END
} // namespace absl

View File

@@ -0,0 +1,139 @@
// Copyright 2019 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/cordz_handle.h"
#include <atomic>
#include "absl/base/internal/raw_logging.h" // For ABSL_RAW_CHECK
#include "absl/base/internal/spinlock.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cord_internal {
using ::absl::base_internal::SpinLockHolder;
ABSL_CONST_INIT CordzHandle::Queue CordzHandle::global_queue_(absl::kConstInit);
CordzHandle::CordzHandle(bool is_snapshot) : is_snapshot_(is_snapshot) {
if (is_snapshot) {
SpinLockHolder lock(&queue_->mutex);
CordzHandle* dq_tail = queue_->dq_tail.load(std::memory_order_acquire);
if (dq_tail != nullptr) {
dq_prev_ = dq_tail;
dq_tail->dq_next_ = this;
}
queue_->dq_tail.store(this, std::memory_order_release);
}
}
CordzHandle::~CordzHandle() {
ODRCheck();
if (is_snapshot_) {
std::vector<CordzHandle*> to_delete;
{
SpinLockHolder lock(&queue_->mutex);
CordzHandle* next = dq_next_;
if (dq_prev_ == nullptr) {
// We were head of the queue, delete every CordzHandle until we reach
// either the end of the list, or a snapshot handle.
while (next && !next->is_snapshot_) {
to_delete.push_back(next);
next = next->dq_next_;
}
} else {
// Another CordzHandle existed before this one, don't delete anything.
dq_prev_->dq_next_ = next;
}
if (next) {
next->dq_prev_ = dq_prev_;
} else {
queue_->dq_tail.store(dq_prev_, std::memory_order_release);
}
}
for (CordzHandle* handle : to_delete) {
delete handle;
}
}
}
bool CordzHandle::SafeToDelete() const {
return is_snapshot_ || queue_->IsEmpty();
}
void CordzHandle::Delete(CordzHandle* handle) {
assert(handle);
if (handle) {
handle->ODRCheck();
Queue* const queue = handle->queue_;
if (!handle->SafeToDelete()) {
SpinLockHolder lock(&queue->mutex);
CordzHandle* dq_tail = queue->dq_tail.load(std::memory_order_acquire);
if (dq_tail != nullptr) {
handle->dq_prev_ = dq_tail;
dq_tail->dq_next_ = handle;
queue->dq_tail.store(handle, std::memory_order_release);
return;
}
}
delete handle;
}
}
std::vector<const CordzHandle*> CordzHandle::DiagnosticsGetDeleteQueue() {
std::vector<const CordzHandle*> handles;
SpinLockHolder lock(&global_queue_.mutex);
CordzHandle* dq_tail = global_queue_.dq_tail.load(std::memory_order_acquire);
for (const CordzHandle* p = dq_tail; p; p = p->dq_prev_) {
handles.push_back(p);
}
return handles;
}
bool CordzHandle::DiagnosticsHandleIsSafeToInspect(
const CordzHandle* handle) const {
ODRCheck();
if (!is_snapshot_) return false;
if (handle == nullptr) return true;
if (handle->is_snapshot_) return false;
bool snapshot_found = false;
SpinLockHolder lock(&queue_->mutex);
for (const CordzHandle* p = queue_->dq_tail; p; p = p->dq_prev_) {
if (p == handle) return !snapshot_found;
if (p == this) snapshot_found = true;
}
ABSL_ASSERT(snapshot_found); // Assert that 'this' is in delete queue.
return true;
}
std::vector<const CordzHandle*>
CordzHandle::DiagnosticsGetSafeToInspectDeletedHandles() {
ODRCheck();
std::vector<const CordzHandle*> handles;
if (!is_snapshot()) {
return handles;
}
SpinLockHolder lock(&queue_->mutex);
for (const CordzHandle* p = dq_next_; p != nullptr; p = p->dq_next_) {
if (!p->is_snapshot()) {
handles.push_back(p);
}
}
return handles;
}
} // namespace cord_internal
ABSL_NAMESPACE_END
} // namespace absl

View File

@@ -0,0 +1,131 @@
// Copyright 2019 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef ABSL_STRINGS_CORDZ_HANDLE_H_
#define ABSL_STRINGS_CORDZ_HANDLE_H_
#include <atomic>
#include <vector>
#include "absl/base/config.h"
#include "absl/base/internal/raw_logging.h"
#include "absl/base/internal/spinlock.h"
#include "absl/synchronization/mutex.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cord_internal {
// This base class allows multiple types of object (CordzInfo and
// CordzSampleToken) to exist simultaneously on the delete queue (pointed to by
// global_dq_tail and traversed using dq_prev_ and dq_next_). The
// delete queue guarantees that once a profiler creates a CordzSampleToken and
// has gained visibility into a CordzInfo object, that CordzInfo object will not
// be deleted prematurely. This allows the profiler to inspect all CordzInfo
// objects that are alive without needing to hold a global lock.
class CordzHandle {
public:
CordzHandle() : CordzHandle(false) {}
bool is_snapshot() const { return is_snapshot_; }
// Returns true if this instance is safe to be deleted because it is either a
// snapshot, which is always safe to delete, or not included in the global
// delete queue and thus not included in any snapshot.
// Callers are responsible for making sure this instance can not be newly
// discovered by other threads. For example, CordzInfo instances first de-list
// themselves from the global CordzInfo list before determining if they are
// safe to be deleted directly.
// If SafeToDelete returns false, callers MUST use the Delete() method to
// safely queue CordzHandle instances for deletion.
bool SafeToDelete() const;
// Deletes the provided instance, or puts it on the delete queue to be deleted
// once there are no more sample tokens (snapshot) instances potentially
// referencing the instance. `handle` should not be null.
static void Delete(CordzHandle* handle);
// Returns the current entries in the delete queue in LIFO order.
static std::vector<const CordzHandle*> DiagnosticsGetDeleteQueue();
// Returns true if the provided handle is nullptr or guarded by this handle.
// Since the CordzSnapshot token is itself a CordzHandle, this method will
// allow tests to check if that token is keeping an arbitrary CordzHandle
// alive.
bool DiagnosticsHandleIsSafeToInspect(const CordzHandle* handle) const;
// Returns the current entries in the delete queue, in LIFO order, that are
// protected by this. CordzHandle objects are only placed on the delete queue
// after CordzHandle::Delete is called with them as an argument. Only
// CordzHandle objects that are not also CordzSnapshot objects will be
// included in the return vector. For each of the handles in the return
// vector, the earliest that their memory can be freed is when this
// CordzSnapshot object is deleted.
std::vector<const CordzHandle*> DiagnosticsGetSafeToInspectDeletedHandles();
protected:
explicit CordzHandle(bool is_snapshot);
virtual ~CordzHandle();
private:
// Global queue data. CordzHandle stores a pointer to the global queue
// instance to harden against ODR violations.
struct Queue {
constexpr explicit Queue(absl::ConstInitType)
: mutex(absl::kConstInit,
absl::base_internal::SCHEDULE_COOPERATIVE_AND_KERNEL) {}
absl::base_internal::SpinLock mutex;
std::atomic<CordzHandle*> dq_tail ABSL_GUARDED_BY(mutex){nullptr};
// Returns true if this delete queue is empty. This method does not acquire
// the lock, but does a 'load acquire' observation on the delete queue tail.
// It is used inside Delete() to check for the presence of a delete queue
// without holding the lock. The assumption is that the caller is in the
// state of 'being deleted', and can not be newly discovered by a concurrent
// 'being constructed' snapshot instance. Practically, this means that any
// such discovery (`find`, 'first' or 'next', etc) must have proper 'happens
// before / after' semantics and atomic fences.
bool IsEmpty() const ABSL_NO_THREAD_SAFETY_ANALYSIS {
return dq_tail.load(std::memory_order_acquire) == nullptr;
}
};
void ODRCheck() const {
#ifndef NDEBUG
ABSL_RAW_CHECK(queue_ == &global_queue_, "ODR violation in Cord");
#endif
}
ABSL_CONST_INIT static Queue global_queue_;
Queue* const queue_ = &global_queue_;
const bool is_snapshot_;
// dq_prev_ and dq_next_ require the global queue mutex to be held.
// Unfortunately we can't use thread annotations such that the thread safety
// analysis understands that queue_ and global_queue_ are one and the same.
CordzHandle* dq_prev_ = nullptr;
CordzHandle* dq_next_ = nullptr;
};
class CordzSnapshot : public CordzHandle {
public:
CordzSnapshot() : CordzHandle(true) {}
};
} // namespace cord_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_CORDZ_HANDLE_H_

View File

@@ -0,0 +1,265 @@
// Copyright 2019 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/cordz_handle.h"
#include <random>
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "absl/memory/memory.h"
#include "absl/synchronization/internal/thread_pool.h"
#include "absl/synchronization/notification.h"
#include "absl/time/clock.h"
#include "absl/time/time.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cord_internal {
namespace {
using ::testing::ElementsAre;
using ::testing::Gt;
using ::testing::IsEmpty;
using ::testing::SizeIs;
// Local less verbose helper
std::vector<const CordzHandle*> DeleteQueue() {
return CordzHandle::DiagnosticsGetDeleteQueue();
}
struct CordzHandleDeleteTracker : public CordzHandle {
bool* deleted;
explicit CordzHandleDeleteTracker(bool* deleted) : deleted(deleted) {}
~CordzHandleDeleteTracker() override { *deleted = true; }
};
TEST(CordzHandleTest, DeleteQueueIsEmpty) {
EXPECT_THAT(DeleteQueue(), SizeIs(0));
}
TEST(CordzHandleTest, CordzHandleCreateDelete) {
bool deleted = false;
auto* handle = new CordzHandleDeleteTracker(&deleted);
EXPECT_FALSE(handle->is_snapshot());
EXPECT_TRUE(handle->SafeToDelete());
EXPECT_THAT(DeleteQueue(), SizeIs(0));
CordzHandle::Delete(handle);
EXPECT_THAT(DeleteQueue(), SizeIs(0));
EXPECT_TRUE(deleted);
}
TEST(CordzHandleTest, CordzSnapshotCreateDelete) {
auto* snapshot = new CordzSnapshot();
EXPECT_TRUE(snapshot->is_snapshot());
EXPECT_TRUE(snapshot->SafeToDelete());
EXPECT_THAT(DeleteQueue(), ElementsAre(snapshot));
delete snapshot;
EXPECT_THAT(DeleteQueue(), SizeIs(0));
}
TEST(CordzHandleTest, CordzHandleCreateDeleteWithSnapshot) {
bool deleted = false;
auto* snapshot = new CordzSnapshot();
auto* handle = new CordzHandleDeleteTracker(&deleted);
EXPECT_FALSE(handle->SafeToDelete());
CordzHandle::Delete(handle);
EXPECT_THAT(DeleteQueue(), ElementsAre(handle, snapshot));
EXPECT_FALSE(deleted);
EXPECT_FALSE(handle->SafeToDelete());
delete snapshot;
EXPECT_THAT(DeleteQueue(), SizeIs(0));
EXPECT_TRUE(deleted);
}
TEST(CordzHandleTest, MultiSnapshot) {
bool deleted[3] = {false, false, false};
CordzSnapshot* snapshot[3];
CordzHandleDeleteTracker* handle[3];
for (int i = 0; i < 3; ++i) {
snapshot[i] = new CordzSnapshot();
handle[i] = new CordzHandleDeleteTracker(&deleted[i]);
CordzHandle::Delete(handle[i]);
}
EXPECT_THAT(DeleteQueue(), ElementsAre(handle[2], snapshot[2], handle[1],
snapshot[1], handle[0], snapshot[0]));
EXPECT_THAT(deleted, ElementsAre(false, false, false));
delete snapshot[1];
EXPECT_THAT(DeleteQueue(), ElementsAre(handle[2], snapshot[2], handle[1],
handle[0], snapshot[0]));
EXPECT_THAT(deleted, ElementsAre(false, false, false));
delete snapshot[0];
EXPECT_THAT(DeleteQueue(), ElementsAre(handle[2], snapshot[2]));
EXPECT_THAT(deleted, ElementsAre(true, true, false));
delete snapshot[2];
EXPECT_THAT(DeleteQueue(), SizeIs(0));
EXPECT_THAT(deleted, ElementsAre(true, true, deleted));
}
TEST(CordzHandleTest, DiagnosticsHandleIsSafeToInspect) {
CordzSnapshot snapshot1;
EXPECT_TRUE(snapshot1.DiagnosticsHandleIsSafeToInspect(nullptr));
auto* handle1 = new CordzHandle();
EXPECT_TRUE(snapshot1.DiagnosticsHandleIsSafeToInspect(handle1));
CordzHandle::Delete(handle1);
EXPECT_TRUE(snapshot1.DiagnosticsHandleIsSafeToInspect(handle1));
CordzSnapshot snapshot2;
auto* handle2 = new CordzHandle();
EXPECT_TRUE(snapshot1.DiagnosticsHandleIsSafeToInspect(handle1));
EXPECT_TRUE(snapshot1.DiagnosticsHandleIsSafeToInspect(handle2));
EXPECT_FALSE(snapshot2.DiagnosticsHandleIsSafeToInspect(handle1));
EXPECT_TRUE(snapshot2.DiagnosticsHandleIsSafeToInspect(handle2));
CordzHandle::Delete(handle2);
EXPECT_TRUE(snapshot1.DiagnosticsHandleIsSafeToInspect(handle1));
}
TEST(CordzHandleTest, DiagnosticsGetSafeToInspectDeletedHandles) {
EXPECT_THAT(DeleteQueue(), IsEmpty());
auto* handle = new CordzHandle();
auto* snapshot1 = new CordzSnapshot();
// snapshot1 should be able to see handle.
EXPECT_THAT(DeleteQueue(), ElementsAre(snapshot1));
EXPECT_TRUE(snapshot1->DiagnosticsHandleIsSafeToInspect(handle));
EXPECT_THAT(snapshot1->DiagnosticsGetSafeToInspectDeletedHandles(),
IsEmpty());
// This handle will be safe to inspect as long as snapshot1 is alive. However,
// since only snapshot1 can prove that it's alive, it will be hidden from
// snapshot2.
CordzHandle::Delete(handle);
// This snapshot shouldn't be able to see handle because handle was already
// sent to Delete.
auto* snapshot2 = new CordzSnapshot();
// DeleteQueue elements are LIFO order.
EXPECT_THAT(DeleteQueue(), ElementsAre(snapshot2, handle, snapshot1));
EXPECT_TRUE(snapshot1->DiagnosticsHandleIsSafeToInspect(handle));
EXPECT_FALSE(snapshot2->DiagnosticsHandleIsSafeToInspect(handle));
EXPECT_THAT(snapshot1->DiagnosticsGetSafeToInspectDeletedHandles(),
ElementsAre(handle));
EXPECT_THAT(snapshot2->DiagnosticsGetSafeToInspectDeletedHandles(),
IsEmpty());
CordzHandle::Delete(snapshot1);
EXPECT_THAT(DeleteQueue(), ElementsAre(snapshot2));
CordzHandle::Delete(snapshot2);
EXPECT_THAT(DeleteQueue(), IsEmpty());
}
// Create and delete CordzHandle and CordzSnapshot objects in multiple threads
// so that tsan has some time to chew on it and look for memory problems.
TEST(CordzHandleTest, MultiThreaded) {
Notification stop;
static constexpr int kNumThreads = 4;
// Keep the number of handles relatively small so that the test will naturally
// transition to an empty delete queue during the test. If there are, say, 100
// handles, that will virtually never happen. With 10 handles and around 50k
// iterations in each of 4 threads, the delete queue appears to become empty
// around 200 times.
static constexpr int kNumHandles = 10;
// Each thread is going to pick a random index and atomically swap its
// CordzHandle with one in handles. This way, each thread can avoid
// manipulating a CordzHandle that might be operated upon in another thread.
std::vector<std::atomic<CordzHandle*>> handles(kNumHandles);
// global bool which is set when any thread did get some 'safe to inspect'
// handles. On some platforms and OSS tests, we might risk that some pool
// threads are starved, stalled, or just got a few unlikely random 'handle'
// coin tosses, so we satisfy this test with simply observing 'some' thread
// did something meaningful, which should minimize the potential for flakes.
std::atomic<bool> found_safe_to_inspect(false);
{
absl::synchronization_internal::ThreadPool pool(kNumThreads);
for (int i = 0; i < kNumThreads; ++i) {
pool.Schedule([&stop, &handles, &found_safe_to_inspect]() {
std::minstd_rand gen;
std::uniform_int_distribution<int> dist_type(0, 2);
std::uniform_int_distribution<int> dist_handle(0, kNumHandles - 1);
while (!stop.HasBeenNotified()) {
CordzHandle* handle;
switch (dist_type(gen)) {
case 0:
handle = new CordzHandle();
break;
case 1:
handle = new CordzSnapshot();
break;
default:
handle = nullptr;
break;
}
CordzHandle* old_handle = handles[dist_handle(gen)].exchange(handle);
if (old_handle != nullptr) {
std::vector<const CordzHandle*> safe_to_inspect =
old_handle->DiagnosticsGetSafeToInspectDeletedHandles();
for (const CordzHandle* handle : safe_to_inspect) {
// We're in a tight loop, so don't generate too many error
// messages.
ASSERT_FALSE(handle->is_snapshot());
}
if (!safe_to_inspect.empty()) {
found_safe_to_inspect.store(true);
}
CordzHandle::Delete(old_handle);
}
}
// Have each thread attempt to clean up everything. Some thread will be
// the last to reach this cleanup code, and it will be guaranteed to
// clean up everything because nothing remains to create new handles.
for (auto& h : handles) {
if (CordzHandle* handle = h.exchange(nullptr)) {
CordzHandle::Delete(handle);
}
}
});
}
// The threads will hammer away. Give it a little bit of time for tsan to
// spot errors.
absl::SleepFor(absl::Seconds(3));
stop.Notify();
}
// Confirm that the test did *something*. This check will be satisfied as
// long as any thread has deleted a CordzSnapshot object and a non-snapshot
// CordzHandle was deleted after the CordzSnapshot was created.
// See also comments on `found_safe_to_inspect`
EXPECT_TRUE(found_safe_to_inspect.load());
}
} // namespace
} // namespace cord_internal
ABSL_NAMESPACE_END
} // namespace absl

View File

@@ -0,0 +1,445 @@
// Copyright 2019 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/cordz_info.h"
#include "absl/base/config.h"
#include "absl/base/internal/spinlock.h"
#include "absl/container/inlined_vector.h"
#include "absl/debugging/stacktrace.h"
#include "absl/strings/internal/cord_internal.h"
#include "absl/strings/internal/cord_rep_btree.h"
#include "absl/strings/internal/cord_rep_ring.h"
#include "absl/strings/internal/cordz_handle.h"
#include "absl/strings/internal/cordz_statistics.h"
#include "absl/strings/internal/cordz_update_tracker.h"
#include "absl/synchronization/mutex.h"
#include "absl/types/span.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cord_internal {
using ::absl::base_internal::SpinLockHolder;
constexpr int CordzInfo::kMaxStackDepth;
ABSL_CONST_INIT CordzInfo::List CordzInfo::global_list_{absl::kConstInit};
namespace {
// CordRepAnalyzer performs the analysis of a cord.
//
// It computes absolute node counts and total memory usage, and an 'estimated
// fair share memory usage` statistic.
// Conceptually, it divides the 'memory usage' at each location in the 'cord
// graph' by the cumulative reference count of that location. The cumulative
// reference count is the factored total of all edges leading into that node.
//
// The top level node is treated specially: we assume the current thread
// (typically called from the CordzHandler) to hold a reference purely to
// perform a safe analysis, and not being part of the application. So we
// substract 1 from the reference count of the top node to compute the
// 'application fair share' excluding the reference of the current thread.
//
// An example of fair sharing, and why we multiply reference counts:
// Assume we have 2 CordReps, both being a Substring referencing a Flat:
// CordSubstring A (refcount = 5) --> child Flat C (refcount = 2)
// CordSubstring B (refcount = 9) --> child Flat C (refcount = 2)
//
// Flat C has 2 incoming edges from the 2 substrings (refcount = 2) and is not
// referenced directly anywhere else. Translated into a 'fair share', we then
// attribute 50% of the memory (memory / refcount = 2) to each incoming edge.
// Rep A has a refcount of 5, so we attribute each incoming edge 1 / 5th of the
// memory cost below it, i.e.: the fair share of Rep A of the memory used by C
// is then 'memory C / (refcount C * refcount A) + (memory A / refcount A)'.
// It is also easy to see how all incoming edges add up to 100%.
class CordRepAnalyzer {
public:
// Creates an analyzer instance binding to `statistics`.
explicit CordRepAnalyzer(CordzStatistics& statistics)
: statistics_(statistics) {}
// Analyzes the memory statistics and node counts for the provided `rep`, and
// adds the results to `statistics`. Note that node counts and memory sizes
// are not initialized, computed values are added to any existing values.
void AnalyzeCordRep(const CordRep* rep) {
// Process all linear nodes.
// As per the class comments, use refcout - 1 on the top level node, as the
// top level node is assumed to be referenced only for analysis purposes.
size_t refcount = rep->refcount.Get();
RepRef repref{rep, (refcount > 1) ? refcount - 1 : 1};
// Process all top level linear nodes (substrings and flats).
repref = CountLinearReps(repref, memory_usage_);
if (repref.rep != nullptr) {
if (repref.rep->tag == RING) {
AnalyzeRing(repref);
} else if (repref.rep->tag == BTREE) {
AnalyzeBtree(repref);
} else if (repref.rep->tag == CONCAT) {
AnalyzeConcat(repref);
} else {
// We should have either a concat, btree, or ring node if not null.
assert(false);
}
}
// Adds values to output
statistics_.estimated_memory_usage += memory_usage_.total;
statistics_.estimated_fair_share_memory_usage +=
static_cast<size_t>(memory_usage_.fair_share);
}
private:
// RepRef identifies a CordRep* inside the Cord tree with its cumulative
// refcount including itself. For example, a tree consisting of a substring
// with a refcount of 3 and a child flat with a refcount of 4 will have RepRef
// refcounts of 3 and 12 respectively.
struct RepRef {
const CordRep* rep;
size_t refcount;
// Returns a 'child' RepRef which contains the cumulative reference count of
// this instance multiplied by the child's reference count.
RepRef Child(const CordRep* child) const {
return RepRef{child, refcount * child->refcount.Get()};
}
};
// Memory usage values
struct MemoryUsage {
size_t total = 0;
double fair_share = 0.0;
// Adds 'size` memory usage to this class, with a cumulative (recursive)
// reference count of `refcount`
void Add(size_t size, size_t refcount) {
total += size;
fair_share += static_cast<double>(size) / refcount;
}
};
// Returns `rr` if `rr.rep` is not null and a CONCAT type.
// Asserts that `rr.rep` is a concat node or null.
static RepRef AssertConcat(RepRef repref) {
const CordRep* rep = repref.rep;
assert(rep == nullptr || rep->tag == CONCAT);
return (rep != nullptr && rep->tag == CONCAT) ? repref : RepRef{nullptr, 0};
}
// Counts a flat of the provide allocated size
void CountFlat(size_t size) {
statistics_.node_count++;
statistics_.node_counts.flat++;
if (size <= 64) {
statistics_.node_counts.flat_64++;
} else if (size <= 128) {
statistics_.node_counts.flat_128++;
} else if (size <= 256) {
statistics_.node_counts.flat_256++;
} else if (size <= 512) {
statistics_.node_counts.flat_512++;
} else if (size <= 1024) {
statistics_.node_counts.flat_1k++;
}
}
// Processes 'linear' reps (substring, flat, external) not requiring iteration
// or recursion. Returns RefRep{null} if all reps were processed, else returns
// the top-most non-linear concat or ring cordrep.
// Node counts are updated into `statistics_`, memory usage is update into
// `memory_usage`, which typically references `memory_usage_` except for ring
// buffers where we count children unrounded.
RepRef CountLinearReps(RepRef rep, MemoryUsage& memory_usage) {
// Consume all substrings
while (rep.rep->tag == SUBSTRING) {
statistics_.node_count++;
statistics_.node_counts.substring++;
memory_usage.Add(sizeof(CordRepSubstring), rep.refcount);
rep = rep.Child(rep.rep->substring()->child);
}
// Consume possible FLAT
if (rep.rep->tag >= FLAT) {
size_t size = rep.rep->flat()->AllocatedSize();
CountFlat(size);
memory_usage.Add(size, rep.refcount);
return RepRef{nullptr, 0};
}
// Consume possible external
if (rep.rep->tag == EXTERNAL) {
statistics_.node_count++;
statistics_.node_counts.external++;
size_t size = rep.rep->length + sizeof(CordRepExternalImpl<intptr_t>);
memory_usage.Add(size, rep.refcount);
return RepRef{nullptr, 0};
}
return rep;
}
// Analyzes the provided concat node in a flattened recursive way.
void AnalyzeConcat(RepRef rep) {
absl::InlinedVector<RepRef, 47> pending;
while (rep.rep != nullptr) {
const CordRepConcat* concat = rep.rep->concat();
RepRef left = rep.Child(concat->left);
RepRef right = rep.Child(concat->right);
statistics_.node_count++;
statistics_.node_counts.concat++;
memory_usage_.Add(sizeof(CordRepConcat), rep.refcount);
right = AssertConcat(CountLinearReps(right, memory_usage_));
rep = AssertConcat(CountLinearReps(left, memory_usage_));
if (rep.rep != nullptr) {
if (right.rep != nullptr) {
pending.push_back(right);
}
} else if (right.rep != nullptr) {
rep = right;
} else if (!pending.empty()) {
rep = pending.back();
pending.pop_back();
}
}
}
// Analyzes the provided ring.
void AnalyzeRing(RepRef rep) {
statistics_.node_count++;
statistics_.node_counts.ring++;
const CordRepRing* ring = rep.rep->ring();
memory_usage_.Add(CordRepRing::AllocSize(ring->capacity()), rep.refcount);
ring->ForEach([&](CordRepRing::index_type pos) {
CountLinearReps(rep.Child(ring->entry_child(pos)), memory_usage_);
});
}
// Analyzes the provided btree.
void AnalyzeBtree(RepRef rep) {
statistics_.node_count++;
statistics_.node_counts.btree++;
memory_usage_.Add(sizeof(CordRepBtree), rep.refcount);
const CordRepBtree* tree = rep.rep->btree();
if (tree->height() > 0) {
for (CordRep* edge : tree->Edges()) {
AnalyzeBtree(rep.Child(edge));
}
} else {
for (CordRep* edge : tree->Edges()) {
CountLinearReps(rep.Child(edge), memory_usage_);
}
}
}
CordzStatistics& statistics_;
MemoryUsage memory_usage_;
};
} // namespace
CordzInfo* CordzInfo::Head(const CordzSnapshot& snapshot) {
ABSL_ASSERT(snapshot.is_snapshot());
// We can do an 'unsafe' load of 'head', as we are guaranteed that the
// instance it points to is kept alive by the provided CordzSnapshot, so we
// can simply return the current value using an acquire load.
// We do enforce in DEBUG builds that the 'head' value is present in the
// delete queue: ODR violations may lead to 'snapshot' and 'global_list_'
// being in different libraries / modules.
CordzInfo* head = global_list_.head.load(std::memory_order_acquire);
ABSL_ASSERT(snapshot.DiagnosticsHandleIsSafeToInspect(head));
return head;
}
CordzInfo* CordzInfo::Next(const CordzSnapshot& snapshot) const {
ABSL_ASSERT(snapshot.is_snapshot());
// Similar to the 'Head()' function, we do not need a mutex here.
CordzInfo* next = ci_next_.load(std::memory_order_acquire);
ABSL_ASSERT(snapshot.DiagnosticsHandleIsSafeToInspect(this));
ABSL_ASSERT(snapshot.DiagnosticsHandleIsSafeToInspect(next));
return next;
}
void CordzInfo::TrackCord(InlineData& cord, MethodIdentifier method) {
assert(cord.is_tree());
assert(!cord.is_profiled());
CordzInfo* cordz_info = new CordzInfo(cord.as_tree(), nullptr, method);
cord.set_cordz_info(cordz_info);
cordz_info->Track();
}
void CordzInfo::TrackCord(InlineData& cord, const InlineData& src,
MethodIdentifier method) {
assert(cord.is_tree());
assert(src.is_tree());
// Unsample current as we the current cord is being replaced with 'src',
// so any method history is no longer relevant.
CordzInfo* cordz_info = cord.cordz_info();
if (cordz_info != nullptr) cordz_info->Untrack();
// Start new cord sample
cordz_info = new CordzInfo(cord.as_tree(), src.cordz_info(), method);
cord.set_cordz_info(cordz_info);
cordz_info->Track();
}
void CordzInfo::MaybeTrackCordImpl(InlineData& cord, const InlineData& src,
MethodIdentifier method) {
if (src.is_profiled()) {
TrackCord(cord, src, method);
} else if (cord.is_profiled()) {
cord.cordz_info()->Untrack();
cord.clear_cordz_info();
}
}
CordzInfo::MethodIdentifier CordzInfo::GetParentMethod(const CordzInfo* src) {
if (src == nullptr) return MethodIdentifier::kUnknown;
return src->parent_method_ != MethodIdentifier::kUnknown ? src->parent_method_
: src->method_;
}
int CordzInfo::FillParentStack(const CordzInfo* src, void** stack) {
assert(stack);
if (src == nullptr) return 0;
if (src->parent_stack_depth_) {
memcpy(stack, src->parent_stack_, src->parent_stack_depth_ * sizeof(void*));
return src->parent_stack_depth_;
}
memcpy(stack, src->stack_, src->stack_depth_ * sizeof(void*));
return src->stack_depth_;
}
CordzInfo::CordzInfo(CordRep* rep, const CordzInfo* src,
MethodIdentifier method)
: rep_(rep),
stack_depth_(absl::GetStackTrace(stack_, /*max_depth=*/kMaxStackDepth,
/*skip_count=*/1)),
parent_stack_depth_(FillParentStack(src, parent_stack_)),
method_(method),
parent_method_(GetParentMethod(src)),
create_time_(absl::Now()) {
update_tracker_.LossyAdd(method);
if (src) {
// Copy parent counters.
update_tracker_.LossyAdd(src->update_tracker_);
}
}
CordzInfo::~CordzInfo() {
// `rep_` is potentially kept alive if CordzInfo is included
// in a collection snapshot (which should be rare).
if (ABSL_PREDICT_FALSE(rep_)) {
CordRep::Unref(rep_);
}
}
void CordzInfo::Track() {
SpinLockHolder l(&list_->mutex);
CordzInfo* const head = list_->head.load(std::memory_order_acquire);
if (head != nullptr) {
head->ci_prev_.store(this, std::memory_order_release);
}
ci_next_.store(head, std::memory_order_release);
list_->head.store(this, std::memory_order_release);
}
void CordzInfo::Untrack() {
ODRCheck();
{
SpinLockHolder l(&list_->mutex);
CordzInfo* const head = list_->head.load(std::memory_order_acquire);
CordzInfo* const next = ci_next_.load(std::memory_order_acquire);
CordzInfo* const prev = ci_prev_.load(std::memory_order_acquire);
if (next) {
ABSL_ASSERT(next->ci_prev_.load(std::memory_order_acquire) == this);
next->ci_prev_.store(prev, std::memory_order_release);
}
if (prev) {
ABSL_ASSERT(head != this);
ABSL_ASSERT(prev->ci_next_.load(std::memory_order_acquire) == this);
prev->ci_next_.store(next, std::memory_order_release);
} else {
ABSL_ASSERT(head == this);
list_->head.store(next, std::memory_order_release);
}
}
// We can no longer be discovered: perform a fast path check if we are not
// listed on any delete queue, so we can directly delete this instance.
if (SafeToDelete()) {
UnsafeSetCordRep(nullptr);
delete this;
return;
}
// We are likely part of a snapshot, extend the life of the CordRep
{
absl::MutexLock lock(&mutex_);
if (rep_) CordRep::Ref(rep_);
}
CordzHandle::Delete(this);
}
void CordzInfo::Lock(MethodIdentifier method)
ABSL_EXCLUSIVE_LOCK_FUNCTION(mutex_) {
mutex_.Lock();
update_tracker_.LossyAdd(method);
assert(rep_);
}
void CordzInfo::Unlock() ABSL_UNLOCK_FUNCTION(mutex_) {
bool tracked = rep_ != nullptr;
mutex_.Unlock();
if (!tracked) {
Untrack();
}
}
absl::Span<void* const> CordzInfo::GetStack() const {
return absl::MakeConstSpan(stack_, stack_depth_);
}
absl::Span<void* const> CordzInfo::GetParentStack() const {
return absl::MakeConstSpan(parent_stack_, parent_stack_depth_);
}
CordzStatistics CordzInfo::GetCordzStatistics() const {
CordzStatistics stats;
stats.method = method_;
stats.parent_method = parent_method_;
stats.update_tracker = update_tracker_;
if (CordRep* rep = RefCordRep()) {
stats.size = rep->length;
CordRepAnalyzer analyzer(stats);
analyzer.AnalyzeCordRep(rep);
CordRep::Unref(rep);
}
return stats;
}
} // namespace cord_internal
ABSL_NAMESPACE_END
} // namespace absl

View File

@@ -0,0 +1,298 @@
// Copyright 2019 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef ABSL_STRINGS_CORDZ_INFO_H_
#define ABSL_STRINGS_CORDZ_INFO_H_
#include <atomic>
#include <cstdint>
#include <functional>
#include "absl/base/config.h"
#include "absl/base/internal/raw_logging.h"
#include "absl/base/internal/spinlock.h"
#include "absl/base/thread_annotations.h"
#include "absl/strings/internal/cord_internal.h"
#include "absl/strings/internal/cordz_functions.h"
#include "absl/strings/internal/cordz_handle.h"
#include "absl/strings/internal/cordz_statistics.h"
#include "absl/strings/internal/cordz_update_tracker.h"
#include "absl/synchronization/mutex.h"
#include "absl/types/span.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cord_internal {
// CordzInfo tracks a profiled Cord. Each of these objects can be in two places.
// If a Cord is alive, the CordzInfo will be in the global_cordz_infos map, and
// can also be retrieved via the linked list starting with
// global_cordz_infos_head and continued via the cordz_info_next() method. When
// a Cord has reached the end of its lifespan, the CordzInfo object will be
// migrated out of the global_cordz_infos list and the global_cordz_infos_map,
// and will either be deleted or appended to the global_delete_queue. If it is
// placed on the global_delete_queue, the CordzInfo object will be cleaned in
// the destructor of a CordzSampleToken object.
class ABSL_LOCKABLE CordzInfo : public CordzHandle {
public:
using MethodIdentifier = CordzUpdateTracker::MethodIdentifier;
// TrackCord creates a CordzInfo instance which tracks important metrics of
// a sampled cord, and stores the created CordzInfo instance into `cord'. All
// CordzInfo instances are placed in a global list which is used to discover
// and snapshot all actively tracked cords. Callers are responsible for
// calling UntrackCord() before the tracked Cord instance is deleted, or to
// stop tracking the sampled Cord. Callers are also responsible for guarding
// changes to the 'tree' value of a Cord (InlineData.tree) through the Lock()
// and Unlock() calls. Any change resulting in a new tree value for the cord
// requires a call to SetCordRep() before the old tree has been unreffed
// and/or deleted. `method` identifies the Cord public API method initiating
// the cord to be sampled.
// Requires `cord` to hold a tree, and `cord.cordz_info()` to be null.
static void TrackCord(InlineData& cord, MethodIdentifier method);
// Identical to TrackCord(), except that this function fills the
// `parent_stack` and `parent_method` properties of the returned CordzInfo
// instance from the provided `src` instance if `src` is sampled.
// This function should be used for sampling 'copy constructed' and 'copy
// assigned' cords. This function allows 'cord` to be already sampled, in
// which case the CordzInfo will be newly created from `src`.
static void TrackCord(InlineData& cord, const InlineData& src,
MethodIdentifier method);
// Maybe sample the cord identified by 'cord' for method 'method'.
// Uses `cordz_should_profile` to randomly pick cords to be sampled, and if
// so, invokes `TrackCord` to start sampling `cord`.
static void MaybeTrackCord(InlineData& cord, MethodIdentifier method);
// Maybe sample the cord identified by 'cord' for method 'method'.
// `src` identifies a 'parent' cord which is assigned to `cord`, typically the
// input cord for a copy constructor, or an assign method such as `operator=`
// `cord` will be sampled if (and only if) `src` is sampled.
// If `cord` is currently being sampled and `src` is not being sampled, then
// this function will stop sampling the cord and reset the cord's cordz_info.
//
// Previously this function defined that `cord` will be sampled if either
// `src` is sampled, or if `cord` is randomly picked for sampling. However,
// this can cause issues, as there may be paths where some cord is assigned an
// indirect copy of it's own value. As such a 'string of copies' would then
// remain sampled (`src.is_profiled`), then assigning such a cord back to
// 'itself' creates a cycle where the cord will converge to 'always sampled`.
//
// For example:
//
// Cord x;
// for (...) {
// // Copy ctor --> y.is_profiled := x.is_profiled | random(...)
// Cord y = x;
// ...
// // Assign x = y --> x.is_profiled = y.is_profiled | random(...)
// // ==> x.is_profiled |= random(...)
// // ==> x converges to 'always profiled'
// x = y;
// }
static void MaybeTrackCord(InlineData& cord, const InlineData& src,
MethodIdentifier method);
// Stops tracking changes for a sampled cord, and deletes the provided info.
// This function must be called before the sampled cord instance is deleted,
// and before the root cordrep of the sampled cord is unreffed.
// This function may extend the lifetime of the cordrep in cases where the
// CordInfo instance is being held by a concurrent collection thread.
void Untrack();
// Invokes UntrackCord() on `info` if `info` is not null.
static void MaybeUntrackCord(CordzInfo* info);
CordzInfo() = delete;
CordzInfo(const CordzInfo&) = delete;
CordzInfo& operator=(const CordzInfo&) = delete;
// Retrieves the oldest existing CordzInfo.
static CordzInfo* Head(const CordzSnapshot& snapshot)
ABSL_NO_THREAD_SAFETY_ANALYSIS;
// Retrieves the next oldest existing CordzInfo older than 'this' instance.
CordzInfo* Next(const CordzSnapshot& snapshot) const
ABSL_NO_THREAD_SAFETY_ANALYSIS;
// Locks this instance for the update identified by `method`.
// Increases the count for `method` in `update_tracker`.
void Lock(MethodIdentifier method) ABSL_EXCLUSIVE_LOCK_FUNCTION(mutex_);
// Unlocks this instance. If the contained `rep` has been set to null
// indicating the Cord has been cleared or is otherwise no longer sampled,
// then this method will delete this CordzInfo instance.
void Unlock() ABSL_UNLOCK_FUNCTION(mutex_);
// Asserts that this CordzInfo instance is locked.
void AssertHeld() ABSL_ASSERT_EXCLUSIVE_LOCK(mutex_);
// Updates the `rep` property of this instance. This methods is invoked by
// Cord logic each time the root node of a sampled Cord changes, and before
// the old root reference count is deleted. This guarantees that collection
// code can always safely take a reference on the tracked cord.
// Requires a lock to be held through the `Lock()` method.
// TODO(b/117940323): annotate with ABSL_EXCLUSIVE_LOCKS_REQUIRED once all
// Cord code is in a state where this can be proven true by the compiler.
void SetCordRep(CordRep* rep);
// Returns the current `rep` property of this instance with a reference
// added, or null if this instance represents a cord that has since been
// deleted or untracked.
CordRep* RefCordRep() const ABSL_LOCKS_EXCLUDED(mutex_);
// Returns the current value of `rep_` for testing purposes only.
CordRep* GetCordRepForTesting() const ABSL_NO_THREAD_SAFETY_ANALYSIS {
return rep_;
}
// Sets the current value of `rep_` for testing purposes only.
void SetCordRepForTesting(CordRep* rep) ABSL_NO_THREAD_SAFETY_ANALYSIS {
rep_ = rep;
}
// Returns the stack trace for where the cord was first sampled. Cords are
// potentially sampled when they promote from an inlined cord to a tree or
// ring representation, which is not necessarily the location where the cord
// was first created. Some cords are created as inlined cords, and only as
// data is added do they become a non-inlined cord. However, typically the
// location represents reasonably well where the cord is 'created'.
absl::Span<void* const> GetStack() const;
// Returns the stack trace for a sampled cord's 'parent stack trace'. This
// value may be set if the cord is sampled (promoted) after being created
// from, or being assigned the value of an existing (sampled) cord.
absl::Span<void* const> GetParentStack() const;
// Retrieves the CordzStatistics associated with this Cord. The statistics
// are only updated when a Cord goes through a mutation, such as an Append
// or RemovePrefix.
CordzStatistics GetCordzStatistics() const;
private:
using SpinLock = absl::base_internal::SpinLock;
using SpinLockHolder = ::absl::base_internal::SpinLockHolder;
// Global cordz info list. CordzInfo stores a pointer to the global list
// instance to harden against ODR violations.
struct List {
constexpr explicit List(absl::ConstInitType)
: mutex(absl::kConstInit,
absl::base_internal::SCHEDULE_COOPERATIVE_AND_KERNEL) {}
SpinLock mutex;
std::atomic<CordzInfo*> head ABSL_GUARDED_BY(mutex){nullptr};
};
static constexpr int kMaxStackDepth = 64;
explicit CordzInfo(CordRep* rep, const CordzInfo* src,
MethodIdentifier method);
~CordzInfo() override;
// Sets `rep_` without holding a lock.
void UnsafeSetCordRep(CordRep* rep) ABSL_NO_THREAD_SAFETY_ANALYSIS;
void Track();
// Returns the parent method from `src`, which is either `parent_method_` or
// `method_` depending on `parent_method_` being kUnknown.
// Returns kUnknown if `src` is null.
static MethodIdentifier GetParentMethod(const CordzInfo* src);
// Fills the provided stack from `src`, copying either `parent_stack_` or
// `stack_` depending on `parent_stack_` being empty, returning the size of
// the parent stack.
// Returns 0 if `src` is null.
static int FillParentStack(const CordzInfo* src, void** stack);
void ODRCheck() const {
#ifndef NDEBUG
ABSL_RAW_CHECK(list_ == &global_list_, "ODR violation in Cord");
#endif
}
// Non-inlined implementation of `MaybeTrackCord`, which is executed if
// either `src` is sampled or `cord` is sampled, and either untracks or
// tracks `cord` as documented per `MaybeTrackCord`.
static void MaybeTrackCordImpl(InlineData& cord, const InlineData& src,
MethodIdentifier method);
ABSL_CONST_INIT static List global_list_;
List* const list_ = &global_list_;
// ci_prev_ and ci_next_ require the global list mutex to be held.
// Unfortunately we can't use thread annotations such that the thread safety
// analysis understands that list_ and global_list_ are one and the same.
std::atomic<CordzInfo*> ci_prev_{nullptr};
std::atomic<CordzInfo*> ci_next_{nullptr};
mutable absl::Mutex mutex_;
CordRep* rep_ ABSL_GUARDED_BY(mutex_);
void* stack_[kMaxStackDepth];
void* parent_stack_[kMaxStackDepth];
const int stack_depth_;
const int parent_stack_depth_;
const MethodIdentifier method_;
const MethodIdentifier parent_method_;
CordzUpdateTracker update_tracker_;
const absl::Time create_time_;
};
inline ABSL_ATTRIBUTE_ALWAYS_INLINE void CordzInfo::MaybeTrackCord(
InlineData& cord, MethodIdentifier method) {
if (ABSL_PREDICT_FALSE(cordz_should_profile())) {
TrackCord(cord, method);
}
}
inline ABSL_ATTRIBUTE_ALWAYS_INLINE void CordzInfo::MaybeTrackCord(
InlineData& cord, const InlineData& src, MethodIdentifier method) {
if (ABSL_PREDICT_FALSE(InlineData::is_either_profiled(cord, src))) {
MaybeTrackCordImpl(cord, src, method);
}
}
inline ABSL_ATTRIBUTE_ALWAYS_INLINE void CordzInfo::MaybeUntrackCord(
CordzInfo* info) {
if (ABSL_PREDICT_FALSE(info)) {
info->Untrack();
}
}
inline void CordzInfo::AssertHeld() ABSL_ASSERT_EXCLUSIVE_LOCK(mutex_) {
#ifndef NDEBUG
mutex_.AssertHeld();
#endif
}
inline void CordzInfo::SetCordRep(CordRep* rep) {
AssertHeld();
rep_ = rep;
}
inline void CordzInfo::UnsafeSetCordRep(CordRep* rep) { rep_ = rep; }
inline CordRep* CordzInfo::RefCordRep() const ABSL_LOCKS_EXCLUDED(mutex_) {
MutexLock lock(&mutex_);
return rep_ ? CordRep::Ref(rep_) : nullptr;
}
} // namespace cord_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_CORDZ_INFO_H_

View File

@@ -0,0 +1,625 @@
// Copyright 2021 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <iostream>
#include <random>
#include <vector>
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "absl/base/config.h"
#include "absl/strings/cord.h"
#include "absl/strings/internal/cord_internal.h"
#include "absl/strings/internal/cord_rep_btree.h"
#include "absl/strings/internal/cord_rep_flat.h"
#include "absl/strings/internal/cord_rep_ring.h"
#include "absl/strings/internal/cordz_info.h"
#include "absl/strings/internal/cordz_sample_token.h"
#include "absl/strings/internal/cordz_statistics.h"
#include "absl/strings/internal/cordz_update_scope.h"
#include "absl/strings/internal/cordz_update_tracker.h"
#include "absl/synchronization/internal/thread_pool.h"
#include "absl/synchronization/notification.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cord_internal {
// Do not print statistics contents, the matcher prints them as needed.
inline void PrintTo(const CordzStatistics& stats, std::ostream* s) {
if (s) *s << "CordzStatistics{...}";
}
namespace {
using ::testing::Ge;
// Creates a flat of the specified allocated size
CordRepFlat* Flat(size_t size) {
// Round up to a tag size, as we are going to poke an exact tag size back into
// the allocated flat. 'size returning allocators' could grant us more than we
// wanted, but we are ok to poke the 'requested' size in the tag, even in the
// presence of sized deletes, so we need to make sure the size rounds
// perfectly to a tag value.
assert(size >= kMinFlatSize);
size = RoundUpForTag(size);
CordRepFlat* flat = CordRepFlat::New(size - kFlatOverhead);
flat->tag = AllocatedSizeToTag(size);
flat->length = size - kFlatOverhead;
return flat;
}
// Creates an external of the specified length
CordRepExternal* External(int length = 512) {
return static_cast<CordRepExternal*>(
NewExternalRep(absl::string_view("", length), [](absl::string_view) {}));
}
// Creates a substring on the provided rep of length - 1
CordRepSubstring* Substring(CordRep* rep) {
auto* substring = new CordRepSubstring;
substring->length = rep->length - 1;
substring->tag = SUBSTRING;
substring->child = rep;
return substring;
}
// Creates a concat on the provided reps
CordRepConcat* Concat(CordRep* left, CordRep* right) {
auto* concat = new CordRepConcat;
concat->length = left->length + right->length;
concat->tag = CONCAT;
concat->left = left;
concat->right = right;
return concat;
}
// Reference count helper
struct RefHelper {
std::vector<CordRep*> refs;
~RefHelper() {
for (CordRep* rep : refs) {
CordRep::Unref(rep);
}
}
// Invokes CordRep::Unref() on `rep` when this instance is destroyed.
template <typename T>
T* NeedsUnref(T* rep) {
refs.push_back(rep);
return rep;
}
// Adds `n` reference counts to `rep` which will be unreffed when this
// instance is destroyed.
template <typename T>
T* Ref(T* rep, size_t n = 1) {
while (n--) {
NeedsUnref(CordRep::Ref(rep));
}
return rep;
}
};
// Sizeof helper. Returns the allocated size of `p`, excluding any child
// elements for substring, concat and ring cord reps.
template <typename T>
size_t SizeOf(const T* rep) {
return sizeof(T);
}
template <>
size_t SizeOf(const CordRepFlat* rep) {
return rep->AllocatedSize();
}
template <>
size_t SizeOf(const CordRepExternal* rep) {
// See cord.cc
return sizeof(CordRepExternalImpl<intptr_t>) + rep->length;
}
template <>
size_t SizeOf(const CordRepRing* rep) {
return CordRepRing::AllocSize(rep->capacity());
}
// Computes fair share memory used in a naive 'we dare to recurse' way.
double FairShareImpl(CordRep* rep, size_t ref) {
double self = 0.0, children = 0.0;
ref *= rep->refcount.Get();
if (rep->tag >= FLAT) {
self = SizeOf(rep->flat());
} else if (rep->tag == EXTERNAL) {
self = SizeOf(rep->external());
} else if (rep->tag == SUBSTRING) {
self = SizeOf(rep->substring());
children = FairShareImpl(rep->substring()->child, ref);
} else if (rep->tag == BTREE) {
self = SizeOf(rep->btree());
for (CordRep*edge : rep->btree()->Edges()) {
children += FairShareImpl(edge, ref);
}
} else if (rep->tag == RING) {
self = SizeOf(rep->ring());
rep->ring()->ForEach([&](CordRepRing::index_type i) {
self += FairShareImpl(rep->ring()->entry_child(i), 1);
});
} else if (rep->tag == CONCAT) {
self = SizeOf(rep->concat());
children = FairShareImpl(rep->concat()->left, ref) +
FairShareImpl(rep->concat()->right, ref);
} else {
assert(false);
}
return self / ref + children;
}
// Returns the fair share memory size from `ShareFhareImpl()` as a size_t.
size_t FairShare(CordRep* rep, size_t ref = 1) {
return static_cast<size_t>(FairShareImpl(rep, ref));
}
// Samples the cord and returns CordzInfo::GetStatistics()
CordzStatistics SampleCord(CordRep* rep) {
InlineData cord(rep);
CordzInfo::TrackCord(cord, CordzUpdateTracker::kUnknown);
CordzStatistics stats = cord.cordz_info()->GetCordzStatistics();
cord.cordz_info()->Untrack();
return stats;
}
MATCHER_P(EqStatistics, stats, "Statistics equal expected values") {
bool ok = true;
#define STATS_MATCHER_EXPECT_EQ(member) \
if (stats.member != arg.member) { \
*result_listener << "\n stats." << #member \
<< ": actual = " << arg.member << ", expected " \
<< stats.member; \
ok = false; \
}
STATS_MATCHER_EXPECT_EQ(size);
STATS_MATCHER_EXPECT_EQ(node_count);
STATS_MATCHER_EXPECT_EQ(node_counts.flat);
STATS_MATCHER_EXPECT_EQ(node_counts.flat_64);
STATS_MATCHER_EXPECT_EQ(node_counts.flat_128);
STATS_MATCHER_EXPECT_EQ(node_counts.flat_256);
STATS_MATCHER_EXPECT_EQ(node_counts.flat_512);
STATS_MATCHER_EXPECT_EQ(node_counts.flat_1k);
STATS_MATCHER_EXPECT_EQ(node_counts.external);
STATS_MATCHER_EXPECT_EQ(node_counts.concat);
STATS_MATCHER_EXPECT_EQ(node_counts.substring);
STATS_MATCHER_EXPECT_EQ(node_counts.ring);
STATS_MATCHER_EXPECT_EQ(node_counts.btree);
STATS_MATCHER_EXPECT_EQ(estimated_memory_usage);
STATS_MATCHER_EXPECT_EQ(estimated_fair_share_memory_usage);
#undef STATS_MATCHER_EXPECT_EQ
return ok;
}
TEST(CordzInfoStatisticsTest, Flat) {
RefHelper ref;
auto* flat = ref.NeedsUnref(Flat(512));
CordzStatistics expected;
expected.size = flat->length;
expected.estimated_memory_usage = SizeOf(flat);
expected.estimated_fair_share_memory_usage = expected.estimated_memory_usage;
expected.node_count = 1;
expected.node_counts.flat = 1;
expected.node_counts.flat_512 = 1;
EXPECT_THAT(SampleCord(flat), EqStatistics(expected));
}
TEST(CordzInfoStatisticsTest, SharedFlat) {
RefHelper ref;
auto* flat = ref.Ref(ref.NeedsUnref(Flat(64)));
CordzStatistics expected;
expected.size = flat->length;
expected.estimated_memory_usage = SizeOf(flat);
expected.estimated_fair_share_memory_usage = SizeOf(flat) / 2;
expected.node_count = 1;
expected.node_counts.flat = 1;
expected.node_counts.flat_64 = 1;
EXPECT_THAT(SampleCord(flat), EqStatistics(expected));
}
TEST(CordzInfoStatisticsTest, External) {
RefHelper ref;
auto* external = ref.NeedsUnref(External());
CordzStatistics expected;
expected.size = external->length;
expected.estimated_memory_usage = SizeOf(external);
expected.estimated_fair_share_memory_usage = SizeOf(external);
expected.node_count = 1;
expected.node_counts.external = 1;
EXPECT_THAT(SampleCord(external), EqStatistics(expected));
}
TEST(CordzInfoStatisticsTest, SharedExternal) {
RefHelper ref;
auto* external = ref.Ref(ref.NeedsUnref(External()));
CordzStatistics expected;
expected.size = external->length;
expected.estimated_memory_usage = SizeOf(external);
expected.estimated_fair_share_memory_usage = SizeOf(external) / 2;
expected.node_count = 1;
expected.node_counts.external = 1;
EXPECT_THAT(SampleCord(external), EqStatistics(expected));
}
TEST(CordzInfoStatisticsTest, Substring) {
RefHelper ref;
auto* flat = Flat(1024);
auto* substring = ref.NeedsUnref(Substring(flat));
CordzStatistics expected;
expected.size = substring->length;
expected.estimated_memory_usage = SizeOf(substring) + SizeOf(flat);
expected.estimated_fair_share_memory_usage = expected.estimated_memory_usage;
expected.node_count = 2;
expected.node_counts.flat = 1;
expected.node_counts.flat_1k = 1;
expected.node_counts.substring = 1;
EXPECT_THAT(SampleCord(substring), EqStatistics(expected));
}
TEST(CordzInfoStatisticsTest, SharedSubstring) {
RefHelper ref;
auto* flat = ref.Ref(Flat(511), 2);
auto* substring = ref.Ref(ref.NeedsUnref(Substring(flat)));
CordzStatistics expected;
expected.size = substring->length;
expected.estimated_memory_usage = SizeOf(flat) + SizeOf(substring);
expected.estimated_fair_share_memory_usage =
SizeOf(substring) / 2 + SizeOf(flat) / 6;
expected.node_count = 2;
expected.node_counts.flat = 1;
expected.node_counts.flat_512 = 1;
expected.node_counts.substring = 1;
EXPECT_THAT(SampleCord(substring), EqStatistics(expected));
}
TEST(CordzInfoStatisticsTest, Concat) {
RefHelper ref;
auto* flat1 = Flat(300);
auto* flat2 = Flat(2000);
auto* concat = ref.NeedsUnref(Concat(flat1, flat2));
CordzStatistics expected;
expected.size = concat->length;
expected.estimated_memory_usage =
SizeOf(concat) + SizeOf(flat1) + SizeOf(flat2);
expected.estimated_fair_share_memory_usage = expected.estimated_memory_usage;
expected.node_count = 3;
expected.node_counts.flat = 2;
expected.node_counts.flat_512 = 1;
expected.node_counts.concat = 1;
EXPECT_THAT(SampleCord(concat), EqStatistics(expected));
}
TEST(CordzInfoStatisticsTest, DeepConcat) {
RefHelper ref;
auto* flat1 = Flat(300);
auto* flat2 = Flat(2000);
auto* flat3 = Flat(400);
auto* external = External(3000);
auto* substring = Substring(external);
auto* concat1 = Concat(flat1, flat2);
auto* concat2 = Concat(flat3, substring);
auto* concat = ref.NeedsUnref(Concat(concat1, concat2));
CordzStatistics expected;
expected.size = concat->length;
expected.estimated_memory_usage = SizeOf(concat) * 3 + SizeOf(flat1) +
SizeOf(flat2) + SizeOf(flat3) +
SizeOf(external) + SizeOf(substring);
expected.estimated_fair_share_memory_usage = expected.estimated_memory_usage;
expected.node_count = 8;
expected.node_counts.flat = 3;
expected.node_counts.flat_512 = 2;
expected.node_counts.external = 1;
expected.node_counts.concat = 3;
expected.node_counts.substring = 1;
EXPECT_THAT(SampleCord(concat), EqStatistics(expected));
}
TEST(CordzInfoStatisticsTest, DeepSharedConcat) {
RefHelper ref;
auto* flat1 = Flat(40);
auto* flat2 = ref.Ref(Flat(2000), 4);
auto* flat3 = Flat(70);
auto* external = ref.Ref(External(3000));
auto* substring = ref.Ref(Substring(external), 3);
auto* concat1 = Concat(flat1, flat2);
auto* concat2 = Concat(flat3, substring);
auto* concat = ref.Ref(ref.NeedsUnref(Concat(concat1, concat2)));
CordzStatistics expected;
expected.size = concat->length;
expected.estimated_memory_usage = SizeOf(concat) * 3 + SizeOf(flat1) +
SizeOf(flat2) + SizeOf(flat3) +
SizeOf(external) + SizeOf(substring);
expected.estimated_fair_share_memory_usage = FairShare(concat);
expected.node_count = 8;
expected.node_counts.flat = 3;
expected.node_counts.flat_64 = 1;
expected.node_counts.flat_128 = 1;
expected.node_counts.external = 1;
expected.node_counts.concat = 3;
expected.node_counts.substring = 1;
EXPECT_THAT(SampleCord(concat), EqStatistics(expected));
}
TEST(CordzInfoStatisticsTest, Ring) {
RefHelper ref;
auto* flat1 = Flat(240);
auto* flat2 = Flat(2000);
auto* flat3 = Flat(70);
auto* external = External(3000);
CordRepRing* ring = CordRepRing::Create(flat1);
ring = CordRepRing::Append(ring, flat2);
ring = CordRepRing::Append(ring, flat3);
ring = ref.NeedsUnref(CordRepRing::Append(ring, external));
CordzStatistics expected;
expected.size = ring->length;
expected.estimated_memory_usage = SizeOf(ring) + SizeOf(flat1) +
SizeOf(flat2) + SizeOf(flat3) +
SizeOf(external);
expected.estimated_fair_share_memory_usage = expected.estimated_memory_usage;
expected.node_count = 5;
expected.node_counts.flat = 3;
expected.node_counts.flat_128 = 1;
expected.node_counts.flat_256 = 1;
expected.node_counts.external = 1;
expected.node_counts.ring = 1;
EXPECT_THAT(SampleCord(ring), EqStatistics(expected));
}
TEST(CordzInfoStatisticsTest, SharedSubstringRing) {
RefHelper ref;
auto* flat1 = ref.Ref(Flat(240));
auto* flat2 = Flat(200);
auto* flat3 = Flat(70);
auto* external = ref.Ref(External(3000), 5);
CordRepRing* ring = CordRepRing::Create(flat1);
ring = CordRepRing::Append(ring, flat2);
ring = CordRepRing::Append(ring, flat3);
ring = ref.Ref(CordRepRing::Append(ring, external), 4);
auto* substring = ref.Ref(ref.NeedsUnref(Substring(ring)));
CordzStatistics expected;
expected.size = substring->length;
expected.estimated_memory_usage = SizeOf(ring) + SizeOf(flat1) +
SizeOf(flat2) + SizeOf(flat3) +
SizeOf(external) + SizeOf(substring);
expected.estimated_fair_share_memory_usage = FairShare(substring);
expected.node_count = 6;
expected.node_counts.flat = 3;
expected.node_counts.flat_128 = 1;
expected.node_counts.flat_256 = 2;
expected.node_counts.external = 1;
expected.node_counts.ring = 1;
expected.node_counts.substring = 1;
EXPECT_THAT(SampleCord(substring), EqStatistics(expected));
}
TEST(CordzInfoStatisticsTest, BtreeLeaf) {
ASSERT_THAT(CordRepBtree::kMaxCapacity, Ge(3));
RefHelper ref;
auto* flat1 = Flat(2000);
auto* flat2 = Flat(200);
auto* substr = Substring(flat2);
auto* external = External(3000);
CordRepBtree* tree = CordRepBtree::Create(flat1);
tree = CordRepBtree::Append(tree, substr);
tree = CordRepBtree::Append(tree, external);
size_t flat3_count = CordRepBtree::kMaxCapacity - 3;
size_t flat3_size = 0;
for (size_t i = 0; i < flat3_count; ++i) {
auto* flat3 = Flat(70);
flat3_size += SizeOf(flat3);
tree = CordRepBtree::Append(tree, flat3);
}
ref.NeedsUnref(tree);
CordzStatistics expected;
expected.size = tree->length;
expected.estimated_memory_usage = SizeOf(tree) + SizeOf(flat1) +
SizeOf(flat2) + SizeOf(substr) +
flat3_size + SizeOf(external);
expected.estimated_fair_share_memory_usage = expected.estimated_memory_usage;
expected.node_count = 1 + 3 + 1 + flat3_count;
expected.node_counts.flat = 2 + flat3_count;
expected.node_counts.flat_128 = flat3_count;
expected.node_counts.flat_256 = 1;
expected.node_counts.external = 1;
expected.node_counts.substring = 1;
expected.node_counts.btree = 1;
EXPECT_THAT(SampleCord(tree), EqStatistics(expected));
}
TEST(CordzInfoStatisticsTest, BtreeNodeShared) {
RefHelper ref;
static constexpr int leaf_count = 3;
const size_t flat3_count = CordRepBtree::kMaxCapacity - 3;
ASSERT_THAT(flat3_count, Ge(0));
CordRepBtree* tree = nullptr;
size_t mem_size = 0;
for (int i = 0; i < leaf_count; ++i) {
auto* flat1 = ref.Ref(Flat(2000), 9);
mem_size += SizeOf(flat1);
if (i == 0) {
tree = CordRepBtree::Create(flat1);
} else {
tree = CordRepBtree::Append(tree, flat1);
}
auto* flat2 = Flat(200);
auto* substr = Substring(flat2);
mem_size += SizeOf(flat2) + SizeOf(substr);
tree = CordRepBtree::Append(tree, substr);
auto* external = External(30);
mem_size += SizeOf(external);
tree = CordRepBtree::Append(tree, external);
for (size_t i = 0; i < flat3_count; ++i) {
auto* flat3 = Flat(70);
mem_size += SizeOf(flat3);
tree = CordRepBtree::Append(tree, flat3);
}
if (i == 0) {
mem_size += SizeOf(tree);
} else {
mem_size += SizeOf(tree->Edges().back()->btree());
}
}
ref.NeedsUnref(tree);
// Ref count: 2 for top (add 1), 5 for leaf 0 (add 4).
ref.Ref(tree, 1);
ref.Ref(tree->Edges().front(), 4);
CordzStatistics expected;
expected.size = tree->length;
expected.estimated_memory_usage = SizeOf(tree) + mem_size;
expected.estimated_fair_share_memory_usage = FairShare(tree);
expected.node_count = 1 + leaf_count * (1 + 3 + 1 + flat3_count);
expected.node_counts.flat = leaf_count * (2 + flat3_count);
expected.node_counts.flat_128 = leaf_count * flat3_count;
expected.node_counts.flat_256 = leaf_count;
expected.node_counts.external = leaf_count;
expected.node_counts.substring = leaf_count;
expected.node_counts.btree = 1 + leaf_count;
EXPECT_THAT(SampleCord(tree), EqStatistics(expected));
}
TEST(CordzInfoStatisticsTest, ThreadSafety) {
Notification stop;
static constexpr int kNumThreads = 8;
int64_t sampled_node_count = 0;
{
absl::synchronization_internal::ThreadPool pool(kNumThreads);
// Run analyzer thread emulating a CordzHandler collection.
pool.Schedule([&]() {
while (!stop.HasBeenNotified()) {
// Run every 10us (about 100K total collections).
absl::SleepFor(absl::Microseconds(10));
CordzSampleToken token;
for (const CordzInfo& cord_info : token) {
CordzStatistics stats = cord_info.GetCordzStatistics();
sampled_node_count += stats.node_count;
}
}
});
// Run 'application threads'
for (int i = 0; i < kNumThreads; ++i) {
pool.Schedule([&]() {
// Track 0 - 2 cordz infos at a time, providing permutations of 0, 1
// and 2 CordzHandle and CordzInfo queues being active, with plenty of
// 'empty to non empty' transitions.
InlineData cords[2];
std::minstd_rand gen;
std::uniform_int_distribution<int> coin_toss(0, 1);
while (!stop.HasBeenNotified()) {
for (InlineData& cord : cords) {
// 50/50 flip the state of the cord
if (coin_toss(gen) != 0) {
if (cord.is_tree()) {
// 50/50 simulate delete (untrack) or 'edit to empty'
if (coin_toss(gen) != 0) {
CordzInfo::MaybeUntrackCord(cord.cordz_info());
} else {
CordzUpdateScope scope(cord.cordz_info(),
CordzUpdateTracker::kUnknown);
scope.SetCordRep(nullptr);
}
CordRep::Unref(cord.as_tree());
cord.set_inline_size(0);
} else {
// Coin toss to 25% ring, 25% btree, and 50% flat.
CordRep* rep = Flat(256);
if (coin_toss(gen) != 0) {
if (coin_toss(gen) != 0) {
rep = CordRepRing::Create(rep);
} else {
rep = CordRepBtree::Create(rep);
}
}
cord.make_tree(rep);
// 50/50 sample
if (coin_toss(gen) != 0) {
CordzInfo::TrackCord(cord, CordzUpdateTracker::kUnknown);
}
}
}
}
}
for (InlineData& cord : cords) {
if (cord.is_tree()) {
CordzInfo::MaybeUntrackCord(cord.cordz_info());
CordRep::Unref(cord.as_tree());
}
}
});
}
// Run for 1 second to give memory and thread safety analyzers plenty of
// time to detect any mishaps or undefined behaviors.
absl::SleepFor(absl::Seconds(1));
stop.Notify();
}
std::cout << "Sampled " << sampled_node_count << " nodes\n";
}
} // namespace
} // namespace cord_internal
ABSL_NAMESPACE_END
} // namespace absl

View File

@@ -0,0 +1,341 @@
// Copyright 2019 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/cordz_info.h"
#include <vector>
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "absl/base/config.h"
#include "absl/debugging/stacktrace.h"
#include "absl/debugging/symbolize.h"
#include "absl/strings/cordz_test_helpers.h"
#include "absl/strings/internal/cord_rep_flat.h"
#include "absl/strings/internal/cordz_handle.h"
#include "absl/strings/internal/cordz_statistics.h"
#include "absl/strings/internal/cordz_update_tracker.h"
#include "absl/strings/str_cat.h"
#include "absl/types/span.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cord_internal {
namespace {
using ::testing::ElementsAre;
using ::testing::Eq;
using ::testing::HasSubstr;
using ::testing::Ne;
using ::testing::SizeIs;
// Used test values
auto constexpr kUnknownMethod = CordzUpdateTracker::kUnknown;
auto constexpr kTrackCordMethod = CordzUpdateTracker::kConstructorString;
auto constexpr kChildMethod = CordzUpdateTracker::kConstructorCord;
auto constexpr kUpdateMethod = CordzUpdateTracker::kAppendString;
// Local less verbose helper
std::vector<const CordzHandle*> DeleteQueue() {
return CordzHandle::DiagnosticsGetDeleteQueue();
}
std::string FormatStack(absl::Span<void* const> raw_stack) {
static constexpr size_t buf_size = 1 << 14;
std::unique_ptr<char[]> buf(new char[buf_size]);
std::string output;
for (void* stackp : raw_stack) {
if (absl::Symbolize(stackp, buf.get(), buf_size)) {
absl::StrAppend(&output, " ", buf.get(), "\n");
}
}
return output;
}
TEST(CordzInfoTest, TrackCord) {
TestCordData data;
CordzInfo::TrackCord(data.data, kTrackCordMethod);
CordzInfo* info = data.data.cordz_info();
ASSERT_THAT(info, Ne(nullptr));
EXPECT_FALSE(info->is_snapshot());
EXPECT_THAT(CordzInfo::Head(CordzSnapshot()), Eq(info));
EXPECT_THAT(info->GetCordRepForTesting(), Eq(data.rep.rep));
info->Untrack();
}
TEST(CordzInfoTest, MaybeTrackChildCordWithoutSampling) {
CordzSamplingIntervalHelper sample_none(99999);
TestCordData parent, child;
CordzInfo::MaybeTrackCord(child.data, parent.data, kTrackCordMethod);
EXPECT_THAT(child.data.cordz_info(), Eq(nullptr));
}
TEST(CordzInfoTest, MaybeTrackChildCordWithSampling) {
CordzSamplingIntervalHelper sample_all(1);
TestCordData parent, child;
CordzInfo::MaybeTrackCord(child.data, parent.data, kTrackCordMethod);
EXPECT_THAT(child.data.cordz_info(), Eq(nullptr));
}
TEST(CordzInfoTest, MaybeTrackChildCordWithoutSamplingParentSampled) {
CordzSamplingIntervalHelper sample_none(99999);
TestCordData parent, child;
CordzInfo::TrackCord(parent.data, kTrackCordMethod);
CordzInfo::MaybeTrackCord(child.data, parent.data, kTrackCordMethod);
CordzInfo* parent_info = parent.data.cordz_info();
CordzInfo* child_info = child.data.cordz_info();
ASSERT_THAT(child_info, Ne(nullptr));
EXPECT_THAT(child_info->GetCordRepForTesting(), Eq(child.rep.rep));
EXPECT_THAT(child_info->GetParentStack(), parent_info->GetStack());
parent_info->Untrack();
child_info->Untrack();
}
TEST(CordzInfoTest, MaybeTrackChildCordWithoutSamplingChildSampled) {
CordzSamplingIntervalHelper sample_none(99999);
TestCordData parent, child;
CordzInfo::TrackCord(child.data, kTrackCordMethod);
CordzInfo::MaybeTrackCord(child.data, parent.data, kTrackCordMethod);
EXPECT_THAT(child.data.cordz_info(), Eq(nullptr));
}
TEST(CordzInfoTest, MaybeTrackChildCordWithSamplingChildSampled) {
CordzSamplingIntervalHelper sample_all(1);
TestCordData parent, child;
CordzInfo::TrackCord(child.data, kTrackCordMethod);
CordzInfo::MaybeTrackCord(child.data, parent.data, kTrackCordMethod);
EXPECT_THAT(child.data.cordz_info(), Eq(nullptr));
}
TEST(CordzInfoTest, UntrackCord) {
TestCordData data;
CordzInfo::TrackCord(data.data, kTrackCordMethod);
CordzInfo* info = data.data.cordz_info();
info->Untrack();
EXPECT_THAT(DeleteQueue(), SizeIs(0));
}
TEST(CordzInfoTest, UntrackCordWithSnapshot) {
TestCordData data;
CordzInfo::TrackCord(data.data, kTrackCordMethod);
CordzInfo* info = data.data.cordz_info();
CordzSnapshot snapshot;
info->Untrack();
EXPECT_THAT(CordzInfo::Head(CordzSnapshot()), Eq(nullptr));
EXPECT_THAT(info->GetCordRepForTesting(), Eq(data.rep.rep));
EXPECT_THAT(DeleteQueue(), ElementsAre(info, &snapshot));
}
TEST(CordzInfoTest, SetCordRep) {
TestCordData data;
CordzInfo::TrackCord(data.data, kTrackCordMethod);
CordzInfo* info = data.data.cordz_info();
TestCordRep rep;
info->Lock(CordzUpdateTracker::kAppendCord);
info->SetCordRep(rep.rep);
info->Unlock();
EXPECT_THAT(info->GetCordRepForTesting(), Eq(rep.rep));
info->Untrack();
}
TEST(CordzInfoTest, SetCordRepNullUntracksCordOnUnlock) {
TestCordData data;
CordzInfo::TrackCord(data.data, kTrackCordMethod);
CordzInfo* info = data.data.cordz_info();
info->Lock(CordzUpdateTracker::kAppendString);
info->SetCordRep(nullptr);
EXPECT_THAT(info->GetCordRepForTesting(), Eq(nullptr));
EXPECT_THAT(CordzInfo::Head(CordzSnapshot()), Eq(info));
info->Unlock();
EXPECT_THAT(CordzInfo::Head(CordzSnapshot()), Eq(nullptr));
}
TEST(CordzInfoTest, RefCordRep) {
TestCordData data;
CordzInfo::TrackCord(data.data, kTrackCordMethod);
CordzInfo* info = data.data.cordz_info();
size_t refcount = data.rep.rep->refcount.Get();
EXPECT_THAT(info->RefCordRep(), Eq(data.rep.rep));
EXPECT_THAT(data.rep.rep->refcount.Get(), Eq(refcount + 1));
CordRep::Unref(data.rep.rep);
info->Untrack();
}
#if GTEST_HAS_DEATH_TEST
TEST(CordzInfoTest, SetCordRepRequiresMutex) {
TestCordData data;
CordzInfo::TrackCord(data.data, kTrackCordMethod);
CordzInfo* info = data.data.cordz_info();
TestCordRep rep;
EXPECT_DEBUG_DEATH(info->SetCordRep(rep.rep), ".*");
info->Untrack();
}
#endif // GTEST_HAS_DEATH_TEST
TEST(CordzInfoTest, TrackUntrackHeadFirstV2) {
CordzSnapshot snapshot;
EXPECT_THAT(CordzInfo::Head(snapshot), Eq(nullptr));
TestCordData data;
CordzInfo::TrackCord(data.data, kTrackCordMethod);
CordzInfo* info1 = data.data.cordz_info();
ASSERT_THAT(CordzInfo::Head(snapshot), Eq(info1));
EXPECT_THAT(info1->Next(snapshot), Eq(nullptr));
TestCordData data2;
CordzInfo::TrackCord(data2.data, kTrackCordMethod);
CordzInfo* info2 = data2.data.cordz_info();
ASSERT_THAT(CordzInfo::Head(snapshot), Eq(info2));
EXPECT_THAT(info2->Next(snapshot), Eq(info1));
EXPECT_THAT(info1->Next(snapshot), Eq(nullptr));
info2->Untrack();
ASSERT_THAT(CordzInfo::Head(snapshot), Eq(info1));
EXPECT_THAT(info1->Next(snapshot), Eq(nullptr));
info1->Untrack();
ASSERT_THAT(CordzInfo::Head(snapshot), Eq(nullptr));
}
TEST(CordzInfoTest, TrackUntrackTailFirstV2) {
CordzSnapshot snapshot;
EXPECT_THAT(CordzInfo::Head(snapshot), Eq(nullptr));
TestCordData data;
CordzInfo::TrackCord(data.data, kTrackCordMethod);
CordzInfo* info1 = data.data.cordz_info();
ASSERT_THAT(CordzInfo::Head(snapshot), Eq(info1));
EXPECT_THAT(info1->Next(snapshot), Eq(nullptr));
TestCordData data2;
CordzInfo::TrackCord(data2.data, kTrackCordMethod);
CordzInfo* info2 = data2.data.cordz_info();
ASSERT_THAT(CordzInfo::Head(snapshot), Eq(info2));
EXPECT_THAT(info2->Next(snapshot), Eq(info1));
EXPECT_THAT(info1->Next(snapshot), Eq(nullptr));
info1->Untrack();
ASSERT_THAT(CordzInfo::Head(snapshot), Eq(info2));
EXPECT_THAT(info2->Next(snapshot), Eq(nullptr));
info2->Untrack();
ASSERT_THAT(CordzInfo::Head(snapshot), Eq(nullptr));
}
TEST(CordzInfoTest, StackV2) {
TestCordData data;
// kMaxStackDepth is intentionally less than 64 (which is the max depth that
// Cordz will record) because if the actual stack depth is over 64
// (which it is on Apple platforms) then the expected_stack will end up
// catching a few frames at the end that the actual_stack didn't get and
// it will no longer be subset. At the time of this writing 58 is the max
// that will allow this test to pass (with a minimum os version of iOS 9), so
// rounded down to 50 to hopefully not run into this in the future if Apple
// makes small modifications to its testing stack. 50 is sufficient to prove
// that we got a decent stack.
static constexpr int kMaxStackDepth = 50;
CordzInfo::TrackCord(data.data, kTrackCordMethod);
CordzInfo* info = data.data.cordz_info();
std::vector<void*> local_stack;
local_stack.resize(kMaxStackDepth);
// In some environments we don't get stack traces. For example in Android
// absl::GetStackTrace will return 0 indicating it didn't find any stack. The
// resultant formatted stack will be "", but that still equals the stack
// recorded in CordzInfo, which is also empty. The skip_count is 1 so that the
// line number of the current stack isn't included in the HasSubstr check.
local_stack.resize(absl::GetStackTrace(local_stack.data(), kMaxStackDepth,
/*skip_count=*/1));
std::string got_stack = FormatStack(info->GetStack());
std::string expected_stack = FormatStack(local_stack);
// If TrackCord is inlined, got_stack should match expected_stack. If it isn't
// inlined, got_stack should include an additional frame not present in
// expected_stack. Either way, expected_stack should be a substring of
// got_stack.
EXPECT_THAT(got_stack, HasSubstr(expected_stack));
info->Untrack();
}
// Local helper functions to get different stacks for child and parent.
CordzInfo* TrackChildCord(InlineData& data, const InlineData& parent) {
CordzInfo::TrackCord(data, parent, kChildMethod);
return data.cordz_info();
}
CordzInfo* TrackParentCord(InlineData& data) {
CordzInfo::TrackCord(data, kTrackCordMethod);
return data.cordz_info();
}
TEST(CordzInfoTest, GetStatistics) {
TestCordData data;
CordzInfo* info = TrackParentCord(data.data);
CordzStatistics statistics = info->GetCordzStatistics();
EXPECT_THAT(statistics.size, Eq(data.rep.rep->length));
EXPECT_THAT(statistics.method, Eq(kTrackCordMethod));
EXPECT_THAT(statistics.parent_method, Eq(kUnknownMethod));
EXPECT_THAT(statistics.update_tracker.Value(kTrackCordMethod), Eq(1));
info->Untrack();
}
TEST(CordzInfoTest, LockCountsMethod) {
TestCordData data;
CordzInfo* info = TrackParentCord(data.data);
info->Lock(kUpdateMethod);
info->Unlock();
info->Lock(kUpdateMethod);
info->Unlock();
CordzStatistics statistics = info->GetCordzStatistics();
EXPECT_THAT(statistics.update_tracker.Value(kUpdateMethod), Eq(2));
info->Untrack();
}
TEST(CordzInfoTest, FromParent) {
TestCordData parent;
TestCordData child;
CordzInfo* info_parent = TrackParentCord(parent.data);
CordzInfo* info_child = TrackChildCord(child.data, parent.data);
std::string stack = FormatStack(info_parent->GetStack());
std::string parent_stack = FormatStack(info_child->GetParentStack());
EXPECT_THAT(stack, Eq(parent_stack));
CordzStatistics statistics = info_child->GetCordzStatistics();
EXPECT_THAT(statistics.size, Eq(child.rep.rep->length));
EXPECT_THAT(statistics.method, Eq(kChildMethod));
EXPECT_THAT(statistics.parent_method, Eq(kTrackCordMethod));
EXPECT_THAT(statistics.update_tracker.Value(kChildMethod), Eq(1));
info_parent->Untrack();
info_child->Untrack();
}
} // namespace
} // namespace cord_internal
ABSL_NAMESPACE_END
} // namespace absl

View File

@@ -0,0 +1,64 @@
// Copyright 2019 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/cordz_sample_token.h"
#include "absl/base/config.h"
#include "absl/strings/internal/cordz_handle.h"
#include "absl/strings/internal/cordz_info.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cord_internal {
CordzSampleToken::Iterator& CordzSampleToken::Iterator::operator++() {
if (current_) {
current_ = current_->Next(*token_);
}
return *this;
}
CordzSampleToken::Iterator CordzSampleToken::Iterator::operator++(int) {
Iterator it(*this);
operator++();
return it;
}
bool operator==(const CordzSampleToken::Iterator& lhs,
const CordzSampleToken::Iterator& rhs) {
return lhs.current_ == rhs.current_ &&
(lhs.current_ == nullptr || lhs.token_ == rhs.token_);
}
bool operator!=(const CordzSampleToken::Iterator& lhs,
const CordzSampleToken::Iterator& rhs) {
return !(lhs == rhs);
}
CordzSampleToken::Iterator::reference CordzSampleToken::Iterator::operator*()
const {
return *current_;
}
CordzSampleToken::Iterator::pointer CordzSampleToken::Iterator::operator->()
const {
return current_;
}
CordzSampleToken::Iterator::Iterator(const CordzSampleToken* token)
: token_(token), current_(CordzInfo::Head(*token)) {}
} // namespace cord_internal
ABSL_NAMESPACE_END
} // namespace absl

View File

@@ -0,0 +1,97 @@
// Copyright 2019 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/base/config.h"
#include "absl/strings/internal/cordz_handle.h"
#include "absl/strings/internal/cordz_info.h"
#ifndef ABSL_STRINGS_CORDZ_SAMPLE_TOKEN_H_
#define ABSL_STRINGS_CORDZ_SAMPLE_TOKEN_H_
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cord_internal {
// The existence of a CordzSampleToken guarantees that a reader can traverse the
// global_cordz_infos_head linked-list without needing to hold a mutex. When a
// CordzSampleToken exists, all CordzInfo objects that would be destroyed are
// instead appended to a deletion queue. When the CordzSampleToken is destroyed,
// it will also clean up any of these CordzInfo objects.
//
// E.g., ST are CordzSampleToken objects and CH are CordzHandle objects.
// ST1 <- CH1 <- CH2 <- ST2 <- CH3 <- global_delete_queue_tail
//
// This list tracks that CH1 and CH2 were created after ST1, so the thread
// holding ST1 might have a referece to CH1, CH2, ST2, and CH3. However, ST2 was
// created later, so the thread holding the ST2 token cannot have a reference to
// ST1, CH1, or CH2. If ST1 is cleaned up first, that thread will delete ST1,
// CH1, and CH2. If instead ST2 is cleaned up first, that thread will only
// delete ST2.
//
// If ST1 is cleaned up first, the new list will be:
// ST2 <- CH3 <- global_delete_queue_tail
//
// If ST2 is cleaned up first, the new list will be:
// ST1 <- CH1 <- CH2 <- CH3 <- global_delete_queue_tail
//
// All new CordzHandle objects are appended to the list, so if a new thread
// comes along before either ST1 or ST2 are cleaned up, the new list will be:
// ST1 <- CH1 <- CH2 <- ST2 <- CH3 <- ST3 <- global_delete_queue_tail
//
// A thread must hold the global_delete_queue_mu mutex whenever it's altering
// this list.
//
// It is safe for thread that holds a CordzSampleToken to read
// global_cordz_infos at any time since the objects it is able to retrieve will
// not be deleted while the CordzSampleToken exists.
class CordzSampleToken : public CordzSnapshot {
public:
class Iterator {
public:
using iterator_category = std::input_iterator_tag;
using value_type = const CordzInfo&;
using difference_type = ptrdiff_t;
using pointer = const CordzInfo*;
using reference = value_type;
Iterator() = default;
Iterator& operator++();
Iterator operator++(int);
friend bool operator==(const Iterator& lhs, const Iterator& rhs);
friend bool operator!=(const Iterator& lhs, const Iterator& rhs);
reference operator*() const;
pointer operator->() const;
private:
friend class CordzSampleToken;
explicit Iterator(const CordzSampleToken* token);
const CordzSampleToken* token_ = nullptr;
pointer current_ = nullptr;
};
CordzSampleToken() = default;
CordzSampleToken(const CordzSampleToken&) = delete;
CordzSampleToken& operator=(const CordzSampleToken&) = delete;
Iterator begin() { return Iterator(this); }
Iterator end() { return Iterator(); }
};
} // namespace cord_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_CORDZ_SAMPLE_TOKEN_H_

View File

@@ -0,0 +1,208 @@
// Copyright 2019 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/cordz_sample_token.h"
#include <memory>
#include <type_traits>
#include <vector>
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "absl/memory/memory.h"
#include "absl/random/random.h"
#include "absl/strings/cordz_test_helpers.h"
#include "absl/strings/internal/cord_rep_flat.h"
#include "absl/strings/internal/cordz_handle.h"
#include "absl/strings/internal/cordz_info.h"
#include "absl/synchronization/internal/thread_pool.h"
#include "absl/synchronization/notification.h"
#include "absl/time/clock.h"
#include "absl/time/time.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cord_internal {
namespace {
using ::testing::ElementsAre;
using ::testing::Eq;
using ::testing::Ne;
// Used test values
auto constexpr kTrackCordMethod = CordzUpdateTracker::kConstructorString;
TEST(CordzSampleTokenTest, IteratorTraits) {
static_assert(std::is_copy_constructible<CordzSampleToken::Iterator>::value,
"");
static_assert(std::is_copy_assignable<CordzSampleToken::Iterator>::value, "");
static_assert(std::is_move_constructible<CordzSampleToken::Iterator>::value,
"");
static_assert(std::is_move_assignable<CordzSampleToken::Iterator>::value, "");
static_assert(
std::is_same<
std::iterator_traits<CordzSampleToken::Iterator>::iterator_category,
std::input_iterator_tag>::value,
"");
static_assert(
std::is_same<std::iterator_traits<CordzSampleToken::Iterator>::value_type,
const CordzInfo&>::value,
"");
static_assert(
std::is_same<
std::iterator_traits<CordzSampleToken::Iterator>::difference_type,
ptrdiff_t>::value,
"");
static_assert(
std::is_same<std::iterator_traits<CordzSampleToken::Iterator>::pointer,
const CordzInfo*>::value,
"");
static_assert(
std::is_same<std::iterator_traits<CordzSampleToken::Iterator>::reference,
const CordzInfo&>::value,
"");
}
TEST(CordzSampleTokenTest, IteratorEmpty) {
CordzSampleToken token;
EXPECT_THAT(token.begin(), Eq(token.end()));
}
TEST(CordzSampleTokenTest, Iterator) {
TestCordData cord1, cord2, cord3;
CordzInfo::TrackCord(cord1.data, kTrackCordMethod);
CordzInfo* info1 = cord1.data.cordz_info();
CordzInfo::TrackCord(cord2.data, kTrackCordMethod);
CordzInfo* info2 = cord2.data.cordz_info();
CordzInfo::TrackCord(cord3.data, kTrackCordMethod);
CordzInfo* info3 = cord3.data.cordz_info();
CordzSampleToken token;
std::vector<const CordzInfo*> found;
for (const CordzInfo& cord_info : token) {
found.push_back(&cord_info);
}
EXPECT_THAT(found, ElementsAre(info3, info2, info1));
info1->Untrack();
info2->Untrack();
info3->Untrack();
}
TEST(CordzSampleTokenTest, IteratorEquality) {
TestCordData cord1;
TestCordData cord2;
TestCordData cord3;
CordzInfo::TrackCord(cord1.data, kTrackCordMethod);
CordzInfo* info1 = cord1.data.cordz_info();
CordzSampleToken token1;
// lhs starts with the CordzInfo corresponding to cord1 at the head.
CordzSampleToken::Iterator lhs = token1.begin();
CordzInfo::TrackCord(cord2.data, kTrackCordMethod);
CordzInfo* info2 = cord2.data.cordz_info();
CordzSampleToken token2;
// rhs starts with the CordzInfo corresponding to cord2 at the head.
CordzSampleToken::Iterator rhs = token2.begin();
CordzInfo::TrackCord(cord3.data, kTrackCordMethod);
CordzInfo* info3 = cord3.data.cordz_info();
// lhs is on cord1 while rhs is on cord2.
EXPECT_THAT(lhs, Ne(rhs));
rhs++;
// lhs and rhs are both on cord1, but they didn't come from the same
// CordzSampleToken.
EXPECT_THAT(lhs, Ne(rhs));
lhs++;
rhs++;
// Both lhs and rhs are done, so they are on nullptr.
EXPECT_THAT(lhs, Eq(rhs));
info1->Untrack();
info2->Untrack();
info3->Untrack();
}
TEST(CordzSampleTokenTest, MultiThreaded) {
Notification stop;
static constexpr int kNumThreads = 4;
static constexpr int kNumCords = 3;
static constexpr int kNumTokens = 3;
absl::synchronization_internal::ThreadPool pool(kNumThreads);
for (int i = 0; i < kNumThreads; ++i) {
pool.Schedule([&stop]() {
absl::BitGen gen;
TestCordData cords[kNumCords];
std::unique_ptr<CordzSampleToken> tokens[kNumTokens];
while (!stop.HasBeenNotified()) {
// Randomly perform one of five actions:
// 1) Untrack
// 2) Track
// 3) Iterate over Cords visible to a token.
// 4) Unsample
// 5) Sample
int index = absl::Uniform(gen, 0, kNumCords);
if (absl::Bernoulli(gen, 0.5)) {
TestCordData& cord = cords[index];
// Track/untrack.
if (cord.data.is_profiled()) {
// 1) Untrack
cord.data.cordz_info()->Untrack();
cord.data.clear_cordz_info();;
} else {
// 2) Track
CordzInfo::TrackCord(cord.data, kTrackCordMethod);
}
} else {
std::unique_ptr<CordzSampleToken>& token = tokens[index];
if (token) {
if (absl::Bernoulli(gen, 0.5)) {
// 3) Iterate over Cords visible to a token.
for (const CordzInfo& info : *token) {
// This is trivial work to allow us to compile the loop.
EXPECT_THAT(info.Next(*token), Ne(&info));
}
} else {
// 4) Unsample
token = nullptr;
}
} else {
// 5) Sample
token = absl::make_unique<CordzSampleToken>();
}
}
}
for (TestCordData& cord : cords) {
CordzInfo::MaybeUntrackCord(cord.data.cordz_info());
}
});
}
// The threads will hammer away. Give it a little bit of time for tsan to
// spot errors.
absl::SleepFor(absl::Seconds(3));
stop.Notify();
}
} // namespace
} // namespace cord_internal
ABSL_NAMESPACE_END
} // namespace absl

View File

@@ -0,0 +1,87 @@
// Copyright 2019 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef ABSL_STRINGS_INTERNAL_CORDZ_STATISTICS_H_
#define ABSL_STRINGS_INTERNAL_CORDZ_STATISTICS_H_
#include <cstdint>
#include "absl/base/config.h"
#include "absl/strings/internal/cordz_update_tracker.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cord_internal {
// CordzStatistics captures some meta information about a Cord's shape.
struct CordzStatistics {
using MethodIdentifier = CordzUpdateTracker::MethodIdentifier;
// Node counts information
struct NodeCounts {
size_t flat = 0; // #flats
size_t flat_64 = 0; // #flats up to 64 bytes
size_t flat_128 = 0; // #flats up to 128 bytes
size_t flat_256 = 0; // #flats up to 256 bytes
size_t flat_512 = 0; // #flats up to 512 bytes
size_t flat_1k = 0; // #flats up to 1K bytes
size_t external = 0; // #external reps
size_t substring = 0; // #substring reps
size_t concat = 0; // #concat reps
size_t ring = 0; // #ring buffer reps
size_t btree = 0; // #btree reps
};
// The size of the cord in bytes. This matches the result of Cord::size().
int64_t size = 0;
// The estimated memory used by the sampled cord. This value matches the
// value as reported by Cord::EstimatedMemoryUsage().
// A value of 0 implies the property has not been recorded.
int64_t estimated_memory_usage = 0;
// The effective memory used by the sampled cord, inversely weighted by the
// effective indegree of each allocated node. This is a representation of the
// fair share of memory usage that should be attributed to the sampled cord.
// This value is more useful for cases where one or more nodes are referenced
// by multiple Cord instances, and for cases where a Cord includes the same
// node multiple times (either directly or indirectly).
// A value of 0 implies the property has not been recorded.
int64_t estimated_fair_share_memory_usage = 0;
// The total number of nodes referenced by this cord.
// For ring buffer Cords, this includes the 'ring buffer' node.
// For btree Cords, this includes all 'CordRepBtree' tree nodes as well as all
// the substring, flat and external nodes referenced by the tree.
// A value of 0 implies the property has not been recorded.
int64_t node_count = 0;
// Detailed node counts per type
NodeCounts node_counts;
// The cord method responsible for sampling the cord.
MethodIdentifier method = MethodIdentifier::kUnknown;
// The cord method responsible for sampling the parent cord if applicable.
MethodIdentifier parent_method = MethodIdentifier::kUnknown;
// Update tracker tracking invocation count per cord method.
CordzUpdateTracker update_tracker;
};
} // namespace cord_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_CORDZ_STATISTICS_H_

View File

@@ -0,0 +1,71 @@
// Copyright 2021 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef ABSL_STRINGS_INTERNAL_CORDZ_UPDATE_SCOPE_H_
#define ABSL_STRINGS_INTERNAL_CORDZ_UPDATE_SCOPE_H_
#include "absl/base/config.h"
#include "absl/base/optimization.h"
#include "absl/base/thread_annotations.h"
#include "absl/strings/internal/cord_internal.h"
#include "absl/strings/internal/cordz_info.h"
#include "absl/strings/internal/cordz_update_tracker.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cord_internal {
// CordzUpdateScope scopes an update to the provided CordzInfo.
// The class invokes `info->Lock(method)` and `info->Unlock()` to guard
// cordrep updates. This class does nothing if `info` is null.
// See also the 'Lock`, `Unlock` and `SetCordRep` methods in `CordzInfo`.
class ABSL_SCOPED_LOCKABLE CordzUpdateScope {
public:
CordzUpdateScope(CordzInfo* info, CordzUpdateTracker::MethodIdentifier method)
ABSL_EXCLUSIVE_LOCK_FUNCTION(info)
: info_(info) {
if (ABSL_PREDICT_FALSE(info_)) {
info->Lock(method);
}
}
// CordzUpdateScope can not be copied or assigned to.
CordzUpdateScope(CordzUpdateScope&& rhs) = delete;
CordzUpdateScope(const CordzUpdateScope&) = delete;
CordzUpdateScope& operator=(CordzUpdateScope&& rhs) = delete;
CordzUpdateScope& operator=(const CordzUpdateScope&) = delete;
~CordzUpdateScope() ABSL_UNLOCK_FUNCTION() {
if (ABSL_PREDICT_FALSE(info_)) {
info_->Unlock();
}
}
void SetCordRep(CordRep* rep) const {
if (ABSL_PREDICT_FALSE(info_)) {
info_->SetCordRep(rep);
}
}
CordzInfo* info() const { return info_; }
private:
CordzInfo* info_;
};
} // namespace cord_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_CORDZ_UPDATE_SCOPE_H_

View File

@@ -0,0 +1,49 @@
// Copyright 2021 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/cordz_update_scope.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "absl/base/config.h"
#include "absl/strings/cordz_test_helpers.h"
#include "absl/strings/internal/cord_rep_flat.h"
#include "absl/strings/internal/cordz_info.h"
#include "absl/strings/internal/cordz_update_tracker.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cord_internal {
namespace {
// Used test values
auto constexpr kTrackCordMethod = CordzUpdateTracker::kConstructorString;
TEST(CordzUpdateScopeTest, ScopeNullptr) {
CordzUpdateScope scope(nullptr, kTrackCordMethod);
}
TEST(CordzUpdateScopeTest, ScopeSampledCord) {
TestCordData cord;
CordzInfo::TrackCord(cord.data, kTrackCordMethod);
CordzUpdateScope scope(cord.data.cordz_info(), kTrackCordMethod);
cord.data.cordz_info()->SetCordRep(nullptr);
}
} // namespace
ABSL_NAMESPACE_END
} // namespace cord_internal
} // namespace absl

View File

@@ -0,0 +1,119 @@
// Copyright 2021 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef ABSL_STRINGS_INTERNAL_CORDZ_UPDATE_TRACKER_H_
#define ABSL_STRINGS_INTERNAL_CORDZ_UPDATE_TRACKER_H_
#include <atomic>
#include <cstdint>
#include "absl/base/config.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cord_internal {
// CordzUpdateTracker tracks counters for Cord update methods.
//
// The purpose of CordzUpdateTracker is to track the number of calls to methods
// updating Cord data for sampled cords. The class internally uses 'lossy'
// atomic operations: Cord is thread-compatible, so there is no need to
// synchronize updates. However, Cordz collection threads may call 'Value()' at
// any point, so the class needs to provide thread safe access.
//
// This class is thread-safe. But as per above comments, all non-const methods
// should be used single-threaded only: updates are thread-safe but lossy.
class CordzUpdateTracker {
public:
// Tracked update methods.
enum MethodIdentifier {
kUnknown,
kAppendCord,
kAppendExternalMemory,
kAppendString,
kAssignCord,
kAssignString,
kClear,
kConstructorCord,
kConstructorString,
kCordReader,
kFlatten,
kGetAppendRegion,
kMakeCordFromExternal,
kMoveAppendCord,
kMoveAssignCord,
kMovePrependCord,
kPrependCord,
kPrependString,
kRemovePrefix,
kRemoveSuffix,
kSubCord,
// kNumMethods defines the number of entries: must be the last entry.
kNumMethods,
};
// Constructs a new instance. All counters are zero-initialized.
constexpr CordzUpdateTracker() noexcept : values_{} {}
// Copy constructs a new instance.
CordzUpdateTracker(const CordzUpdateTracker& rhs) noexcept { *this = rhs; }
// Assigns the provided value to this instance.
CordzUpdateTracker& operator=(const CordzUpdateTracker& rhs) noexcept {
for (int i = 0; i < kNumMethods; ++i) {
values_[i].store(rhs.values_[i].load(std::memory_order_relaxed),
std::memory_order_relaxed);
}
return *this;
}
// Returns the value for the specified method.
int64_t Value(MethodIdentifier method) const {
return values_[method].load(std::memory_order_relaxed);
}
// Increases the value for the specified method by `n`
void LossyAdd(MethodIdentifier method, int64_t n = 1) {
auto& value = values_[method];
value.store(value.load(std::memory_order_relaxed) + n,
std::memory_order_relaxed);
}
// Adds all the values from `src` to this instance
void LossyAdd(const CordzUpdateTracker& src) {
for (int i = 0; i < kNumMethods; ++i) {
MethodIdentifier method = static_cast<MethodIdentifier>(i);
if (int64_t value = src.Value(method)) {
LossyAdd(method, value);
}
}
}
private:
// Until C++20 std::atomic is not constexpr default-constructible, so we need
// a wrapper for this class to be constexpr constructible.
class Counter : public std::atomic<int64_t> {
public:
constexpr Counter() noexcept : std::atomic<int64_t>(0) {}
};
Counter values_[kNumMethods];
};
} // namespace cord_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_CORDZ_UPDATE_TRACKER_H_

View File

@@ -0,0 +1,143 @@
// Copyright 2021 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/cordz_update_tracker.h"
#include <array>
#include <thread> // NOLINT
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "absl/base/attributes.h"
#include "absl/base/config.h"
#include "absl/synchronization/notification.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cord_internal {
namespace {
using ::testing::AnyOf;
using ::testing::Eq;
using Method = CordzUpdateTracker::MethodIdentifier;
using Methods = std::array<Method, Method::kNumMethods>;
// Returns an array of all methods defined in `MethodIdentifier`
Methods AllMethods() {
return Methods{Method::kUnknown,
Method::kAppendCord,
Method::kAppendExternalMemory,
Method::kAppendString,
Method::kAssignCord,
Method::kAssignString,
Method::kClear,
Method::kConstructorCord,
Method::kConstructorString,
Method::kCordReader,
Method::kFlatten,
Method::kGetAppendRegion,
Method::kMakeCordFromExternal,
Method::kMoveAppendCord,
Method::kMoveAssignCord,
Method::kMovePrependCord,
Method::kPrependCord,
Method::kPrependString,
Method::kRemovePrefix,
Method::kRemoveSuffix,
Method::kSubCord};
}
TEST(CordzUpdateTracker, IsConstExprAndInitializesToZero) {
constexpr CordzUpdateTracker tracker;
for (Method method : AllMethods()) {
ASSERT_THAT(tracker.Value(method), Eq(0));
}
}
TEST(CordzUpdateTracker, LossyAdd) {
int64_t n = 1;
CordzUpdateTracker tracker;
for (Method method : AllMethods()) {
tracker.LossyAdd(method, n);
EXPECT_THAT(tracker.Value(method), Eq(n));
n += 2;
}
}
TEST(CordzUpdateTracker, CopyConstructor) {
int64_t n = 1;
CordzUpdateTracker src;
for (Method method : AllMethods()) {
src.LossyAdd(method, n);
n += 2;
}
n = 1;
CordzUpdateTracker tracker(src);
for (Method method : AllMethods()) {
EXPECT_THAT(tracker.Value(method), Eq(n));
n += 2;
}
}
TEST(CordzUpdateTracker, OperatorAssign) {
int64_t n = 1;
CordzUpdateTracker src;
CordzUpdateTracker tracker;
for (Method method : AllMethods()) {
src.LossyAdd(method, n);
n += 2;
}
n = 1;
tracker = src;
for (Method method : AllMethods()) {
EXPECT_THAT(tracker.Value(method), Eq(n));
n += 2;
}
}
TEST(CordzUpdateTracker, ThreadSanitizedValueCheck) {
absl::Notification done;
CordzUpdateTracker tracker;
std::thread reader([&done, &tracker] {
while (!done.HasBeenNotified()) {
int n = 1;
for (Method method : AllMethods()) {
EXPECT_THAT(tracker.Value(method), AnyOf(Eq(n), Eq(0)));
n += 2;
}
}
int n = 1;
for (Method method : AllMethods()) {
EXPECT_THAT(tracker.Value(method), Eq(n));
n += 2;
}
});
int64_t n = 1;
for (Method method : AllMethods()) {
tracker.LossyAdd(method, n);
n += 2;
}
done.Notify();
reader.join();
}
} // namespace
} // namespace cord_internal
ABSL_NAMESPACE_END
} // namespace absl

View File

@@ -0,0 +1,180 @@
// Copyright 2020 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/escaping.h"
#include "absl/base/internal/endian.h"
#include "absl/base/internal/raw_logging.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace strings_internal {
const char kBase64Chars[] =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
size_t CalculateBase64EscapedLenInternal(size_t input_len, bool do_padding) {
// Base64 encodes three bytes of input at a time. If the input is not
// divisible by three, we pad as appropriate.
//
// (from https://tools.ietf.org/html/rfc3548)
// Special processing is performed if fewer than 24 bits are available
// at the end of the data being encoded. A full encoding quantum is
// always completed at the end of a quantity. When fewer than 24 input
// bits are available in an input group, zero bits are added (on the
// right) to form an integral number of 6-bit groups. Padding at the
// end of the data is performed using the '=' character. Since all base
// 64 input is an integral number of octets, only the following cases
// can arise:
// Base64 encodes each three bytes of input into four bytes of output.
size_t len = (input_len / 3) * 4;
if (input_len % 3 == 0) {
// (from https://tools.ietf.org/html/rfc3548)
// (1) the final quantum of encoding input is an integral multiple of 24
// bits; here, the final unit of encoded output will be an integral
// multiple of 4 characters with no "=" padding,
} else if (input_len % 3 == 1) {
// (from https://tools.ietf.org/html/rfc3548)
// (2) the final quantum of encoding input is exactly 8 bits; here, the
// final unit of encoded output will be two characters followed by two
// "=" padding characters, or
len += 2;
if (do_padding) {
len += 2;
}
} else { // (input_len % 3 == 2)
// (from https://tools.ietf.org/html/rfc3548)
// (3) the final quantum of encoding input is exactly 16 bits; here, the
// final unit of encoded output will be three characters followed by one
// "=" padding character.
len += 3;
if (do_padding) {
len += 1;
}
}
assert(len >= input_len); // make sure we didn't overflow
return len;
}
size_t Base64EscapeInternal(const unsigned char* src, size_t szsrc, char* dest,
size_t szdest, const char* base64,
bool do_padding) {
static const char kPad64 = '=';
if (szsrc * 4 > szdest * 3) return 0;
char* cur_dest = dest;
const unsigned char* cur_src = src;
char* const limit_dest = dest + szdest;
const unsigned char* const limit_src = src + szsrc;
// Three bytes of data encodes to four characters of cyphertext.
// So we can pump through three-byte chunks atomically.
if (szsrc >= 3) { // "limit_src - 3" is UB if szsrc < 3.
while (cur_src < limit_src - 3) { // While we have >= 32 bits.
uint32_t in = absl::big_endian::Load32(cur_src) >> 8;
cur_dest[0] = base64[in >> 18];
in &= 0x3FFFF;
cur_dest[1] = base64[in >> 12];
in &= 0xFFF;
cur_dest[2] = base64[in >> 6];
in &= 0x3F;
cur_dest[3] = base64[in];
cur_dest += 4;
cur_src += 3;
}
}
// To save time, we didn't update szdest or szsrc in the loop. So do it now.
szdest = limit_dest - cur_dest;
szsrc = limit_src - cur_src;
/* now deal with the tail (<=3 bytes) */
switch (szsrc) {
case 0:
// Nothing left; nothing more to do.
break;
case 1: {
// One byte left: this encodes to two characters, and (optionally)
// two pad characters to round out the four-character cypherblock.
if (szdest < 2) return 0;
uint32_t in = cur_src[0];
cur_dest[0] = base64[in >> 2];
in &= 0x3;
cur_dest[1] = base64[in << 4];
cur_dest += 2;
szdest -= 2;
if (do_padding) {
if (szdest < 2) return 0;
cur_dest[0] = kPad64;
cur_dest[1] = kPad64;
cur_dest += 2;
szdest -= 2;
}
break;
}
case 2: {
// Two bytes left: this encodes to three characters, and (optionally)
// one pad character to round out the four-character cypherblock.
if (szdest < 3) return 0;
uint32_t in = absl::big_endian::Load16(cur_src);
cur_dest[0] = base64[in >> 10];
in &= 0x3FF;
cur_dest[1] = base64[in >> 4];
in &= 0x00F;
cur_dest[2] = base64[in << 2];
cur_dest += 3;
szdest -= 3;
if (do_padding) {
if (szdest < 1) return 0;
cur_dest[0] = kPad64;
cur_dest += 1;
szdest -= 1;
}
break;
}
case 3: {
// Three bytes left: same as in the big loop above. We can't do this in
// the loop because the loop above always reads 4 bytes, and the fourth
// byte is past the end of the input.
if (szdest < 4) return 0;
uint32_t in = (cur_src[0] << 16) + absl::big_endian::Load16(cur_src + 1);
cur_dest[0] = base64[in >> 18];
in &= 0x3FFFF;
cur_dest[1] = base64[in >> 12];
in &= 0xFFF;
cur_dest[2] = base64[in >> 6];
in &= 0x3F;
cur_dest[3] = base64[in];
cur_dest += 4;
szdest -= 4;
break;
}
default:
// Should not be reached: blocks of 4 bytes are handled
// in the while loop before this switch statement.
ABSL_RAW_LOG(FATAL, "Logic problem? szsrc = %zu", szsrc);
break;
}
return (cur_dest - dest);
}
} // namespace strings_internal
ABSL_NAMESPACE_END
} // namespace absl

View File

@@ -0,0 +1,58 @@
// Copyright 2020 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef ABSL_STRINGS_INTERNAL_ESCAPING_H_
#define ABSL_STRINGS_INTERNAL_ESCAPING_H_
#include <cassert>
#include "absl/strings/internal/resize_uninitialized.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace strings_internal {
ABSL_CONST_INIT extern const char kBase64Chars[];
// Calculates how long a string will be when it is base64 encoded given its
// length and whether or not the result should be padded.
size_t CalculateBase64EscapedLenInternal(size_t input_len, bool do_padding);
// Base64-encodes `src` using the alphabet provided in `base64` and writes the
// result to `dest`. If `do_padding` is true, `dest` is padded with '=' chars
// until its length is a multiple of 3. Returns the length of `dest`.
size_t Base64EscapeInternal(const unsigned char* src, size_t szsrc, char* dest,
size_t szdest, const char* base64, bool do_padding);
// Base64-encodes `src` using the alphabet provided in `base64` and writes the
// result to `dest`. If `do_padding` is true, `dest` is padded with '=' chars
// until its length is a multiple of 3.
template <typename String>
void Base64EscapeInternal(const unsigned char* src, size_t szsrc, String* dest,
bool do_padding, const char* base64_chars) {
const size_t calc_escaped_size =
CalculateBase64EscapedLenInternal(szsrc, do_padding);
STLStringResizeUninitialized(dest, calc_escaped_size);
const size_t escaped_len = Base64EscapeInternal(
src, szsrc, &(*dest)[0], dest->size(), base64_chars, do_padding);
assert(calc_escaped_size == escaped_len);
dest->erase(escaped_len);
}
} // namespace strings_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_ESCAPING_H_

View File

@@ -0,0 +1,133 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// This test contains common things needed by both escaping_test.cc and
// escaping_benchmark.cc.
#ifndef ABSL_STRINGS_INTERNAL_ESCAPING_TEST_COMMON_H_
#define ABSL_STRINGS_INTERNAL_ESCAPING_TEST_COMMON_H_
#include <array>
#include "absl/strings/string_view.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace strings_internal {
struct base64_testcase {
absl::string_view plaintext;
absl::string_view cyphertext;
};
inline const std::array<base64_testcase, 5>& base64_strings() {
static const std::array<base64_testcase, 5> testcase{{
// Some google quotes
// Cyphertext created with "uuencode (GNU sharutils) 4.6.3"
// (Note that we're testing the websafe encoding, though, so if
// you add messages, be sure to run "tr -- '+/' '-_'" on the output)
{ "I was always good at math and science, and I never realized "
"that was unusual or somehow undesirable. So one of the things "
"I care a lot about is helping to remove that stigma, "
"to show girls that you can be feminine, you can like the things "
"that girls like, but you can also be really good at technology. "
"You can be really good at building things."
" - Marissa Meyer, Newsweek, 2010-12-22" "\n",
"SSB3YXMgYWx3YXlzIGdvb2QgYXQgbWF0aCBhbmQgc2NpZW5jZSwgYW5kIEkg"
"bmV2ZXIgcmVhbGl6ZWQgdGhhdCB3YXMgdW51c3VhbCBvciBzb21laG93IHVu"
"ZGVzaXJhYmxlLiBTbyBvbmUgb2YgdGhlIHRoaW5ncyBJIGNhcmUgYSBsb3Qg"
"YWJvdXQgaXMgaGVscGluZyB0byByZW1vdmUgdGhhdCBzdGlnbWEsIHRvIHNo"
"b3cgZ2lybHMgdGhhdCB5b3UgY2FuIGJlIGZlbWluaW5lLCB5b3UgY2FuIGxp"
"a2UgdGhlIHRoaW5ncyB0aGF0IGdpcmxzIGxpa2UsIGJ1dCB5b3UgY2FuIGFs"
"c28gYmUgcmVhbGx5IGdvb2QgYXQgdGVjaG5vbG9neS4gWW91IGNhbiBiZSBy"
"ZWFsbHkgZ29vZCBhdCBidWlsZGluZyB0aGluZ3MuIC0gTWFyaXNzYSBNZXll"
"ciwgTmV3c3dlZWssIDIwMTAtMTItMjIK" },
{ "Typical first year for a new cluster: "
"~0.5 overheating "
"~1 PDU failure "
"~1 rack-move "
"~1 network rewiring "
"~20 rack failures "
"~5 racks go wonky "
"~8 network maintenances "
"~12 router reloads "
"~3 router failures "
"~dozens of minor 30-second blips for dns "
"~1000 individual machine failures "
"~thousands of hard drive failures "
"slow disks, bad memory, misconfigured machines, flaky machines, etc."
" - Jeff Dean, The Joys of Real Hardware" "\n",
"VHlwaWNhbCBmaXJzdCB5ZWFyIGZvciBhIG5ldyBjbHVzdGVyOiB-MC41IG92"
"ZXJoZWF0aW5nIH4xIFBEVSBmYWlsdXJlIH4xIHJhY2stbW92ZSB-MSBuZXR3"
"b3JrIHJld2lyaW5nIH4yMCByYWNrIGZhaWx1cmVzIH41IHJhY2tzIGdvIHdv"
"bmt5IH44IG5ldHdvcmsgbWFpbnRlbmFuY2VzIH4xMiByb3V0ZXIgcmVsb2Fk"
"cyB-MyByb3V0ZXIgZmFpbHVyZXMgfmRvemVucyBvZiBtaW5vciAzMC1zZWNv"
"bmQgYmxpcHMgZm9yIGRucyB-MTAwMCBpbmRpdmlkdWFsIG1hY2hpbmUgZmFp"
"bHVyZXMgfnRob3VzYW5kcyBvZiBoYXJkIGRyaXZlIGZhaWx1cmVzIHNsb3cg"
"ZGlza3MsIGJhZCBtZW1vcnksIG1pc2NvbmZpZ3VyZWQgbWFjaGluZXMsIGZs"
"YWt5IG1hY2hpbmVzLCBldGMuIC0gSmVmZiBEZWFuLCBUaGUgSm95cyBvZiBS"
"ZWFsIEhhcmR3YXJlCg" },
{ "I'm the head of the webspam team at Google. "
"That means that if you type your name into Google and get porn back, "
"it's my fault. Unless you're a porn star, in which case porn is a "
"completely reasonable response."
" - Matt Cutts, Google Plus" "\n",
"SSdtIHRoZSBoZWFkIG9mIHRoZSB3ZWJzcGFtIHRlYW0gYXQgR29vZ2xlLiAg"
"VGhhdCBtZWFucyB0aGF0IGlmIHlvdSB0eXBlIHlvdXIgbmFtZSBpbnRvIEdv"
"b2dsZSBhbmQgZ2V0IHBvcm4gYmFjaywgaXQncyBteSBmYXVsdC4gVW5sZXNz"
"IHlvdSdyZSBhIHBvcm4gc3RhciwgaW4gd2hpY2ggY2FzZSBwb3JuIGlzIGEg"
"Y29tcGxldGVseSByZWFzb25hYmxlIHJlc3BvbnNlLiAtIE1hdHQgQ3V0dHMs"
"IEdvb2dsZSBQbHVzCg" },
{ "It will still be a long time before machines approach human "
"intelligence. "
"But luckily, machines don't actually have to be intelligent; "
"they just have to fake it. Access to a wealth of information, "
"combined with a rudimentary decision-making capacity, "
"can often be almost as useful. Of course, the results are better yet "
"when coupled with intelligence. A reference librarian with access to "
"a good search engine is a formidable tool."
" - Craig Silverstein, Siemens Pictures of the Future, Spring 2004"
"\n",
"SXQgd2lsbCBzdGlsbCBiZSBhIGxvbmcgdGltZSBiZWZvcmUgbWFjaGluZXMg"
"YXBwcm9hY2ggaHVtYW4gaW50ZWxsaWdlbmNlLiBCdXQgbHVja2lseSwgbWFj"
"aGluZXMgZG9uJ3QgYWN0dWFsbHkgaGF2ZSB0byBiZSBpbnRlbGxpZ2VudDsg"
"dGhleSBqdXN0IGhhdmUgdG8gZmFrZSBpdC4gQWNjZXNzIHRvIGEgd2VhbHRo"
"IG9mIGluZm9ybWF0aW9uLCBjb21iaW5lZCB3aXRoIGEgcnVkaW1lbnRhcnkg"
"ZGVjaXNpb24tbWFraW5nIGNhcGFjaXR5LCBjYW4gb2Z0ZW4gYmUgYWxtb3N0"
"IGFzIHVzZWZ1bC4gT2YgY291cnNlLCB0aGUgcmVzdWx0cyBhcmUgYmV0dGVy"
"IHlldCB3aGVuIGNvdXBsZWQgd2l0aCBpbnRlbGxpZ2VuY2UuIEEgcmVmZXJl"
"bmNlIGxpYnJhcmlhbiB3aXRoIGFjY2VzcyB0byBhIGdvb2Qgc2VhcmNoIGVu"
"Z2luZSBpcyBhIGZvcm1pZGFibGUgdG9vbC4gLSBDcmFpZyBTaWx2ZXJzdGVp"
"biwgU2llbWVucyBQaWN0dXJlcyBvZiB0aGUgRnV0dXJlLCBTcHJpbmcgMjAw"
"NAo" },
// Degenerate edge case
{ "",
"" },
}};
return testcase;
}
} // namespace strings_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_ESCAPING_TEST_COMMON_H_

View File

@@ -0,0 +1,112 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/memutil.h"
#include <cstdlib>
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace strings_internal {
int memcasecmp(const char* s1, const char* s2, size_t len) {
const unsigned char* us1 = reinterpret_cast<const unsigned char*>(s1);
const unsigned char* us2 = reinterpret_cast<const unsigned char*>(s2);
for (size_t i = 0; i < len; i++) {
const int diff =
int{static_cast<unsigned char>(absl::ascii_tolower(us1[i]))} -
int{static_cast<unsigned char>(absl::ascii_tolower(us2[i]))};
if (diff != 0) return diff;
}
return 0;
}
char* memdup(const char* s, size_t slen) {
void* copy;
if ((copy = malloc(slen)) == nullptr) return nullptr;
memcpy(copy, s, slen);
return reinterpret_cast<char*>(copy);
}
char* memrchr(const char* s, int c, size_t slen) {
for (const char* e = s + slen - 1; e >= s; e--) {
if (*e == c) return const_cast<char*>(e);
}
return nullptr;
}
size_t memspn(const char* s, size_t slen, const char* accept) {
const char* p = s;
const char* spanp;
char c, sc;
cont:
c = *p++;
if (slen-- == 0) return p - 1 - s;
for (spanp = accept; (sc = *spanp++) != '\0';)
if (sc == c) goto cont;
return p - 1 - s;
}
size_t memcspn(const char* s, size_t slen, const char* reject) {
const char* p = s;
const char* spanp;
char c, sc;
while (slen-- != 0) {
c = *p++;
for (spanp = reject; (sc = *spanp++) != '\0';)
if (sc == c) return p - 1 - s;
}
return p - s;
}
char* mempbrk(const char* s, size_t slen, const char* accept) {
const char* scanp;
int sc;
for (; slen; ++s, --slen) {
for (scanp = accept; (sc = *scanp++) != '\0';)
if (sc == *s) return const_cast<char*>(s);
}
return nullptr;
}
// This is significantly faster for case-sensitive matches with very
// few possible matches. See unit test for benchmarks.
const char* memmatch(const char* phaystack, size_t haylen, const char* pneedle,
size_t neelen) {
if (0 == neelen) {
return phaystack; // even if haylen is 0
}
if (haylen < neelen) return nullptr;
const char* match;
const char* hayend = phaystack + haylen - neelen + 1;
// A static cast is used here to work around the fact that memchr returns
// a void* on Posix-compliant systems and const void* on Windows.
while ((match = static_cast<const char*>(
memchr(phaystack, pneedle[0], hayend - phaystack)))) {
if (memcmp(match, pneedle, neelen) == 0)
return match;
else
phaystack = match + 1;
}
return nullptr;
}
} // namespace strings_internal
ABSL_NAMESPACE_END
} // namespace absl

View File

@@ -0,0 +1,148 @@
//
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// These routines provide mem versions of standard C string routines,
// such as strpbrk. They function exactly the same as the str versions,
// so if you wonder what they are, replace the word "mem" by
// "str" and check out the man page. I could return void*, as the
// strutil.h mem*() routines tend to do, but I return char* instead
// since this is by far the most common way these functions are called.
//
// The difference between the mem and str versions is the mem version
// takes a pointer and a length, rather than a '\0'-terminated string.
// The memcase* routines defined here assume the locale is "C"
// (they use absl::ascii_tolower instead of tolower).
//
// These routines are based on the BSD library.
//
// Here's a list of routines from string.h, and their mem analogues.
// Functions in lowercase are defined in string.h; those in UPPERCASE
// are defined here:
//
// strlen --
// strcat strncat MEMCAT
// strcpy strncpy memcpy
// -- memccpy (very cool function, btw)
// -- memmove
// -- memset
// strcmp strncmp memcmp
// strcasecmp strncasecmp MEMCASECMP
// strchr memchr
// strcoll --
// strxfrm --
// strdup strndup MEMDUP
// strrchr MEMRCHR
// strspn MEMSPN
// strcspn MEMCSPN
// strpbrk MEMPBRK
// strstr MEMSTR MEMMEM
// (g)strcasestr MEMCASESTR MEMCASEMEM
// strtok --
// strprefix MEMPREFIX (strprefix is from strutil.h)
// strcaseprefix MEMCASEPREFIX (strcaseprefix is from strutil.h)
// strsuffix MEMSUFFIX (strsuffix is from strutil.h)
// strcasesuffix MEMCASESUFFIX (strcasesuffix is from strutil.h)
// -- MEMIS
// -- MEMCASEIS
// strcount MEMCOUNT (strcount is from strutil.h)
#ifndef ABSL_STRINGS_INTERNAL_MEMUTIL_H_
#define ABSL_STRINGS_INTERNAL_MEMUTIL_H_
#include <cstddef>
#include <cstring>
#include "absl/base/port.h" // disable some warnings on Windows
#include "absl/strings/ascii.h" // for absl::ascii_tolower
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace strings_internal {
inline char* memcat(char* dest, size_t destlen, const char* src,
size_t srclen) {
return reinterpret_cast<char*>(memcpy(dest + destlen, src, srclen));
}
int memcasecmp(const char* s1, const char* s2, size_t len);
char* memdup(const char* s, size_t slen);
char* memrchr(const char* s, int c, size_t slen);
size_t memspn(const char* s, size_t slen, const char* accept);
size_t memcspn(const char* s, size_t slen, const char* reject);
char* mempbrk(const char* s, size_t slen, const char* accept);
// This is for internal use only. Don't call this directly
template <bool case_sensitive>
const char* int_memmatch(const char* haystack, size_t haylen,
const char* needle, size_t neelen) {
if (0 == neelen) {
return haystack; // even if haylen is 0
}
const char* hayend = haystack + haylen;
const char* needlestart = needle;
const char* needleend = needlestart + neelen;
for (; haystack < hayend; ++haystack) {
char hay = case_sensitive
? *haystack
: absl::ascii_tolower(static_cast<unsigned char>(*haystack));
char nee = case_sensitive
? *needle
: absl::ascii_tolower(static_cast<unsigned char>(*needle));
if (hay == nee) {
if (++needle == needleend) {
return haystack + 1 - neelen;
}
} else if (needle != needlestart) {
// must back up haystack in case a prefix matched (find "aab" in "aaab")
haystack -= needle - needlestart; // for loop will advance one more
needle = needlestart;
}
}
return nullptr;
}
// These are the guys you can call directly
inline const char* memstr(const char* phaystack, size_t haylen,
const char* pneedle) {
return int_memmatch<true>(phaystack, haylen, pneedle, strlen(pneedle));
}
inline const char* memcasestr(const char* phaystack, size_t haylen,
const char* pneedle) {
return int_memmatch<false>(phaystack, haylen, pneedle, strlen(pneedle));
}
inline const char* memmem(const char* phaystack, size_t haylen,
const char* pneedle, size_t needlelen) {
return int_memmatch<true>(phaystack, haylen, pneedle, needlelen);
}
inline const char* memcasemem(const char* phaystack, size_t haylen,
const char* pneedle, size_t needlelen) {
return int_memmatch<false>(phaystack, haylen, pneedle, needlelen);
}
// This is significantly faster for case-sensitive matches with very
// few possible matches. See unit test for benchmarks.
const char* memmatch(const char* phaystack, size_t haylen, const char* pneedle,
size_t neelen);
} // namespace strings_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_MEMUTIL_H_

View File

@@ -0,0 +1,323 @@
// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/memutil.h"
#include <algorithm>
#include <cstdlib>
#include "benchmark/benchmark.h"
#include "absl/strings/ascii.h"
// We fill the haystack with aaaaaaaaaaaaaaaaaa...aaaab.
// That gives us:
// - an easy search: 'b'
// - a medium search: 'ab'. That means every letter is a possible match.
// - a pathological search: 'aaaaaa.......aaaaab' (half as many a's as haytack)
// We benchmark case-sensitive and case-insensitive versions of
// three memmem implementations:
// - memmem() from memutil.h
// - search() from STL
// - memmatch(), a custom implementation using memchr and memcmp.
// Here are sample results:
//
// Run on (12 X 3800 MHz CPU s)
// CPU Caches:
// L1 Data 32K (x6)
// L1 Instruction 32K (x6)
// L2 Unified 256K (x6)
// L3 Unified 15360K (x1)
// ----------------------------------------------------------------
// Benchmark Time CPU Iterations
// ----------------------------------------------------------------
// BM_Memmem 3583 ns 3582 ns 196469 2.59966GB/s
// BM_MemmemMedium 13743 ns 13742 ns 50901 693.986MB/s
// BM_MemmemPathological 13695030 ns 13693977 ns 51 713.133kB/s
// BM_Memcasemem 3299 ns 3299 ns 212942 2.82309GB/s
// BM_MemcasememMedium 16407 ns 16406 ns 42170 581.309MB/s
// BM_MemcasememPathological 17267745 ns 17266030 ns 41 565.598kB/s
// BM_Search 1610 ns 1609 ns 431321 5.78672GB/s
// BM_SearchMedium 11111 ns 11110 ns 63001 858.414MB/s
// BM_SearchPathological 12117390 ns 12116397 ns 58 805.984kB/s
// BM_Searchcase 3081 ns 3081 ns 229949 3.02313GB/s
// BM_SearchcaseMedium 16003 ns 16001 ns 44170 595.998MB/s
// BM_SearchcasePathological 15823413 ns 15821909 ns 44 617.222kB/s
// BM_Memmatch 197 ns 197 ns 3584225 47.2951GB/s
// BM_MemmatchMedium 52333 ns 52329 ns 13280 182.244MB/s
// BM_MemmatchPathological 659799 ns 659727 ns 1058 14.4556MB/s
// BM_Memcasematch 5460 ns 5460 ns 127606 1.70586GB/s
// BM_MemcasematchMedium 32861 ns 32857 ns 21258 290.248MB/s
// BM_MemcasematchPathological 15154243 ns 15153089 ns 46 644.464kB/s
// BM_MemmemStartup 5 ns 5 ns 150821500
// BM_SearchStartup 5 ns 5 ns 150644203
// BM_MemmatchStartup 7 ns 7 ns 97068802
//
// Conclusions:
//
// The following recommendations are based on the sample results above. However,
// we have found that the performance of STL search can vary significantly
// depending on compiler and standard library implementation. We recommend you
// run the benchmarks for yourself on relevant platforms.
//
// If you need case-insensitive, STL search is slightly better than memmem for
// all cases.
//
// Case-sensitive is more subtle:
// Custom memmatch is _very_ fast at scanning, so if you have very few possible
// matches in your haystack, that's the way to go. Performance drops
// significantly with more matches.
//
// STL search is slightly faster than memmem in the medium and pathological
// benchmarks. However, the performance of memmem is currently more dependable
// across platforms and build configurations.
namespace {
constexpr int kHaystackSize = 10000;
constexpr int64_t kHaystackSize64 = kHaystackSize;
const char* MakeHaystack() {
char* haystack = new char[kHaystackSize];
for (int i = 0; i < kHaystackSize - 1; ++i) haystack[i] = 'a';
haystack[kHaystackSize - 1] = 'b';
return haystack;
}
const char* const kHaystack = MakeHaystack();
void BM_Memmem(benchmark::State& state) {
for (auto _ : state) {
benchmark::DoNotOptimize(
absl::strings_internal::memmem(kHaystack, kHaystackSize, "b", 1));
}
state.SetBytesProcessed(kHaystackSize64 * state.iterations());
}
BENCHMARK(BM_Memmem);
void BM_MemmemMedium(benchmark::State& state) {
for (auto _ : state) {
benchmark::DoNotOptimize(
absl::strings_internal::memmem(kHaystack, kHaystackSize, "ab", 2));
}
state.SetBytesProcessed(kHaystackSize64 * state.iterations());
}
BENCHMARK(BM_MemmemMedium);
void BM_MemmemPathological(benchmark::State& state) {
for (auto _ : state) {
benchmark::DoNotOptimize(absl::strings_internal::memmem(
kHaystack, kHaystackSize, kHaystack + kHaystackSize / 2,
kHaystackSize - kHaystackSize / 2));
}
state.SetBytesProcessed(kHaystackSize64 * state.iterations());
}
BENCHMARK(BM_MemmemPathological);
void BM_Memcasemem(benchmark::State& state) {
for (auto _ : state) {
benchmark::DoNotOptimize(
absl::strings_internal::memcasemem(kHaystack, kHaystackSize, "b", 1));
}
state.SetBytesProcessed(kHaystackSize64 * state.iterations());
}
BENCHMARK(BM_Memcasemem);
void BM_MemcasememMedium(benchmark::State& state) {
for (auto _ : state) {
benchmark::DoNotOptimize(
absl::strings_internal::memcasemem(kHaystack, kHaystackSize, "ab", 2));
}
state.SetBytesProcessed(kHaystackSize64 * state.iterations());
}
BENCHMARK(BM_MemcasememMedium);
void BM_MemcasememPathological(benchmark::State& state) {
for (auto _ : state) {
benchmark::DoNotOptimize(absl::strings_internal::memcasemem(
kHaystack, kHaystackSize, kHaystack + kHaystackSize / 2,
kHaystackSize - kHaystackSize / 2));
}
state.SetBytesProcessed(kHaystackSize64 * state.iterations());
}
BENCHMARK(BM_MemcasememPathological);
bool case_eq(const char a, const char b) {
return absl::ascii_tolower(a) == absl::ascii_tolower(b);
}
void BM_Search(benchmark::State& state) {
for (auto _ : state) {
benchmark::DoNotOptimize(std::search(kHaystack, kHaystack + kHaystackSize,
kHaystack + kHaystackSize - 1,
kHaystack + kHaystackSize));
}
state.SetBytesProcessed(kHaystackSize64 * state.iterations());
}
BENCHMARK(BM_Search);
void BM_SearchMedium(benchmark::State& state) {
for (auto _ : state) {
benchmark::DoNotOptimize(std::search(kHaystack, kHaystack + kHaystackSize,
kHaystack + kHaystackSize - 2,
kHaystack + kHaystackSize));
}
state.SetBytesProcessed(kHaystackSize64 * state.iterations());
}
BENCHMARK(BM_SearchMedium);
void BM_SearchPathological(benchmark::State& state) {
for (auto _ : state) {
benchmark::DoNotOptimize(std::search(kHaystack, kHaystack + kHaystackSize,
kHaystack + kHaystackSize / 2,
kHaystack + kHaystackSize));
}
state.SetBytesProcessed(kHaystackSize64 * state.iterations());
}
BENCHMARK(BM_SearchPathological);
void BM_Searchcase(benchmark::State& state) {
for (auto _ : state) {
benchmark::DoNotOptimize(std::search(kHaystack, kHaystack + kHaystackSize,
kHaystack + kHaystackSize - 1,
kHaystack + kHaystackSize, case_eq));
}
state.SetBytesProcessed(kHaystackSize64 * state.iterations());
}
BENCHMARK(BM_Searchcase);
void BM_SearchcaseMedium(benchmark::State& state) {
for (auto _ : state) {
benchmark::DoNotOptimize(std::search(kHaystack, kHaystack + kHaystackSize,
kHaystack + kHaystackSize - 2,
kHaystack + kHaystackSize, case_eq));
}
state.SetBytesProcessed(kHaystackSize64 * state.iterations());
}
BENCHMARK(BM_SearchcaseMedium);
void BM_SearchcasePathological(benchmark::State& state) {
for (auto _ : state) {
benchmark::DoNotOptimize(std::search(kHaystack, kHaystack + kHaystackSize,
kHaystack + kHaystackSize / 2,
kHaystack + kHaystackSize, case_eq));
}
state.SetBytesProcessed(kHaystackSize64 * state.iterations());
}
BENCHMARK(BM_SearchcasePathological);
char* memcasechr(const char* s, int c, size_t slen) {
c = absl::ascii_tolower(c);
for (; slen; ++s, --slen) {
if (absl::ascii_tolower(*s) == c) return const_cast<char*>(s);
}
return nullptr;
}
const char* memcasematch(const char* phaystack, size_t haylen,
const char* pneedle, size_t neelen) {
if (0 == neelen) {
return phaystack; // even if haylen is 0
}
if (haylen < neelen) return nullptr;
const char* match;
const char* hayend = phaystack + haylen - neelen + 1;
while ((match = static_cast<char*>(
memcasechr(phaystack, pneedle[0], hayend - phaystack)))) {
if (absl::strings_internal::memcasecmp(match, pneedle, neelen) == 0)
return match;
else
phaystack = match + 1;
}
return nullptr;
}
void BM_Memmatch(benchmark::State& state) {
for (auto _ : state) {
benchmark::DoNotOptimize(
absl::strings_internal::memmatch(kHaystack, kHaystackSize, "b", 1));
}
state.SetBytesProcessed(kHaystackSize64 * state.iterations());
}
BENCHMARK(BM_Memmatch);
void BM_MemmatchMedium(benchmark::State& state) {
for (auto _ : state) {
benchmark::DoNotOptimize(
absl::strings_internal::memmatch(kHaystack, kHaystackSize, "ab", 2));
}
state.SetBytesProcessed(kHaystackSize64 * state.iterations());
}
BENCHMARK(BM_MemmatchMedium);
void BM_MemmatchPathological(benchmark::State& state) {
for (auto _ : state) {
benchmark::DoNotOptimize(absl::strings_internal::memmatch(
kHaystack, kHaystackSize, kHaystack + kHaystackSize / 2,
kHaystackSize - kHaystackSize / 2));
}
state.SetBytesProcessed(kHaystackSize64 * state.iterations());
}
BENCHMARK(BM_MemmatchPathological);
void BM_Memcasematch(benchmark::State& state) {
for (auto _ : state) {
benchmark::DoNotOptimize(memcasematch(kHaystack, kHaystackSize, "b", 1));
}
state.SetBytesProcessed(kHaystackSize64 * state.iterations());
}
BENCHMARK(BM_Memcasematch);
void BM_MemcasematchMedium(benchmark::State& state) {
for (auto _ : state) {
benchmark::DoNotOptimize(memcasematch(kHaystack, kHaystackSize, "ab", 2));
}
state.SetBytesProcessed(kHaystackSize64 * state.iterations());
}
BENCHMARK(BM_MemcasematchMedium);
void BM_MemcasematchPathological(benchmark::State& state) {
for (auto _ : state) {
benchmark::DoNotOptimize(memcasematch(kHaystack, kHaystackSize,
kHaystack + kHaystackSize / 2,
kHaystackSize - kHaystackSize / 2));
}
state.SetBytesProcessed(kHaystackSize64 * state.iterations());
}
BENCHMARK(BM_MemcasematchPathological);
void BM_MemmemStartup(benchmark::State& state) {
for (auto _ : state) {
benchmark::DoNotOptimize(absl::strings_internal::memmem(
kHaystack + kHaystackSize - 10, 10, kHaystack + kHaystackSize - 1, 1));
}
}
BENCHMARK(BM_MemmemStartup);
void BM_SearchStartup(benchmark::State& state) {
for (auto _ : state) {
benchmark::DoNotOptimize(
std::search(kHaystack + kHaystackSize - 10, kHaystack + kHaystackSize,
kHaystack + kHaystackSize - 1, kHaystack + kHaystackSize));
}
}
BENCHMARK(BM_SearchStartup);
void BM_MemmatchStartup(benchmark::State& state) {
for (auto _ : state) {
benchmark::DoNotOptimize(absl::strings_internal::memmatch(
kHaystack + kHaystackSize - 10, 10, kHaystack + kHaystackSize - 1, 1));
}
}
BENCHMARK(BM_MemmatchStartup);
} // namespace

View File

@@ -0,0 +1,179 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Unit test for memutil.cc
#include "absl/strings/internal/memutil.h"
#include <cstdlib>
#include "gtest/gtest.h"
#include "absl/strings/ascii.h"
namespace {
static char* memcasechr(const char* s, int c, size_t slen) {
c = absl::ascii_tolower(c);
for (; slen; ++s, --slen) {
if (absl::ascii_tolower(*s) == c) return const_cast<char*>(s);
}
return nullptr;
}
static const char* memcasematch(const char* phaystack, size_t haylen,
const char* pneedle, size_t neelen) {
if (0 == neelen) {
return phaystack; // even if haylen is 0
}
if (haylen < neelen) return nullptr;
const char* match;
const char* hayend = phaystack + haylen - neelen + 1;
while ((match = static_cast<char*>(
memcasechr(phaystack, pneedle[0], hayend - phaystack)))) {
if (absl::strings_internal::memcasecmp(match, pneedle, neelen) == 0)
return match;
else
phaystack = match + 1;
}
return nullptr;
}
TEST(MemUtilTest, AllTests) {
// check memutil functions
char a[1000];
absl::strings_internal::memcat(a, 0, "hello", sizeof("hello") - 1);
absl::strings_internal::memcat(a, 5, " there", sizeof(" there") - 1);
EXPECT_EQ(absl::strings_internal::memcasecmp(a, "heLLO there",
sizeof("hello there") - 1),
0);
EXPECT_EQ(absl::strings_internal::memcasecmp(a, "heLLO therf",
sizeof("hello there") - 1),
-1);
EXPECT_EQ(absl::strings_internal::memcasecmp(a, "heLLO therf",
sizeof("hello there") - 2),
0);
EXPECT_EQ(absl::strings_internal::memcasecmp(a, "whatever", 0), 0);
char* p = absl::strings_internal::memdup("hello", 5);
free(p);
p = absl::strings_internal::memrchr("hello there", 'e',
sizeof("hello there") - 1);
EXPECT_TRUE(p && p[-1] == 'r');
p = absl::strings_internal::memrchr("hello there", 'e',
sizeof("hello there") - 2);
EXPECT_TRUE(p && p[-1] == 'h');
p = absl::strings_internal::memrchr("hello there", 'u',
sizeof("hello there") - 1);
EXPECT_TRUE(p == nullptr);
int len = absl::strings_internal::memspn("hello there",
sizeof("hello there") - 1, "hole");
EXPECT_EQ(len, sizeof("hello") - 1);
len = absl::strings_internal::memspn("hello there", sizeof("hello there") - 1,
"u");
EXPECT_EQ(len, 0);
len = absl::strings_internal::memspn("hello there", sizeof("hello there") - 1,
"");
EXPECT_EQ(len, 0);
len = absl::strings_internal::memspn("hello there", sizeof("hello there") - 1,
"trole h");
EXPECT_EQ(len, sizeof("hello there") - 1);
len = absl::strings_internal::memspn("hello there!",
sizeof("hello there!") - 1, "trole h");
EXPECT_EQ(len, sizeof("hello there") - 1);
len = absl::strings_internal::memspn("hello there!",
sizeof("hello there!") - 2, "trole h!");
EXPECT_EQ(len, sizeof("hello there!") - 2);
len = absl::strings_internal::memcspn("hello there",
sizeof("hello there") - 1, "leho");
EXPECT_EQ(len, 0);
len = absl::strings_internal::memcspn("hello there",
sizeof("hello there") - 1, "u");
EXPECT_EQ(len, sizeof("hello there") - 1);
len = absl::strings_internal::memcspn("hello there",
sizeof("hello there") - 1, "");
EXPECT_EQ(len, sizeof("hello there") - 1);
len = absl::strings_internal::memcspn("hello there",
sizeof("hello there") - 1, " ");
EXPECT_EQ(len, 5);
p = absl::strings_internal::mempbrk("hello there", sizeof("hello there") - 1,
"leho");
EXPECT_TRUE(p && p[1] == 'e' && p[2] == 'l');
p = absl::strings_internal::mempbrk("hello there", sizeof("hello there") - 1,
"nu");
EXPECT_TRUE(p == nullptr);
p = absl::strings_internal::mempbrk("hello there!",
sizeof("hello there!") - 2, "!");
EXPECT_TRUE(p == nullptr);
p = absl::strings_internal::mempbrk("hello there", sizeof("hello there") - 1,
" t ");
EXPECT_TRUE(p && p[-1] == 'o' && p[1] == 't');
{
const char kHaystack[] = "0123456789";
EXPECT_EQ(absl::strings_internal::memmem(kHaystack, 0, "", 0), kHaystack);
EXPECT_EQ(absl::strings_internal::memmem(kHaystack, 10, "012", 3),
kHaystack);
EXPECT_EQ(absl::strings_internal::memmem(kHaystack, 10, "0xx", 1),
kHaystack);
EXPECT_EQ(absl::strings_internal::memmem(kHaystack, 10, "789", 3),
kHaystack + 7);
EXPECT_EQ(absl::strings_internal::memmem(kHaystack, 10, "9xx", 1),
kHaystack + 9);
EXPECT_TRUE(absl::strings_internal::memmem(kHaystack, 10, "9xx", 3) ==
nullptr);
EXPECT_TRUE(absl::strings_internal::memmem(kHaystack, 10, "xxx", 1) ==
nullptr);
}
{
const char kHaystack[] = "aBcDeFgHiJ";
EXPECT_EQ(absl::strings_internal::memcasemem(kHaystack, 0, "", 0),
kHaystack);
EXPECT_EQ(absl::strings_internal::memcasemem(kHaystack, 10, "Abc", 3),
kHaystack);
EXPECT_EQ(absl::strings_internal::memcasemem(kHaystack, 10, "Axx", 1),
kHaystack);
EXPECT_EQ(absl::strings_internal::memcasemem(kHaystack, 10, "hIj", 3),
kHaystack + 7);
EXPECT_EQ(absl::strings_internal::memcasemem(kHaystack, 10, "jxx", 1),
kHaystack + 9);
EXPECT_TRUE(absl::strings_internal::memcasemem(kHaystack, 10, "jxx", 3) ==
nullptr);
EXPECT_TRUE(absl::strings_internal::memcasemem(kHaystack, 10, "xxx", 1) ==
nullptr);
}
{
const char kHaystack[] = "0123456789";
EXPECT_EQ(absl::strings_internal::memmatch(kHaystack, 0, "", 0), kHaystack);
EXPECT_EQ(absl::strings_internal::memmatch(kHaystack, 10, "012", 3),
kHaystack);
EXPECT_EQ(absl::strings_internal::memmatch(kHaystack, 10, "0xx", 1),
kHaystack);
EXPECT_EQ(absl::strings_internal::memmatch(kHaystack, 10, "789", 3),
kHaystack + 7);
EXPECT_EQ(absl::strings_internal::memmatch(kHaystack, 10, "9xx", 1),
kHaystack + 9);
EXPECT_TRUE(absl::strings_internal::memmatch(kHaystack, 10, "9xx", 3) ==
nullptr);
EXPECT_TRUE(absl::strings_internal::memmatch(kHaystack, 10, "xxx", 1) ==
nullptr);
}
}
} // namespace

View File

@@ -0,0 +1,184 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// This file contains common things needed by numbers_test.cc,
// numbers_legacy_test.cc and numbers_benchmark.cc.
#ifndef ABSL_STRINGS_INTERNAL_NUMBERS_TEST_COMMON_H_
#define ABSL_STRINGS_INTERNAL_NUMBERS_TEST_COMMON_H_
#include <array>
#include <cstdint>
#include <limits>
#include <string>
#include "absl/base/config.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace strings_internal {
template <typename IntType>
inline bool Itoa(IntType value, int base, std::string* destination) {
destination->clear();
if (base <= 1 || base > 36) {
return false;
}
if (value == 0) {
destination->push_back('0');
return true;
}
bool negative = value < 0;
while (value != 0) {
const IntType next_value = value / base;
// Can't use std::abs here because of problems when IntType is unsigned.
int remainder =
static_cast<int>(value > next_value * base ? value - next_value * base
: next_value * base - value);
char c = remainder < 10 ? '0' + remainder : 'A' + remainder - 10;
destination->insert(0, 1, c);
value = next_value;
}
if (negative) {
destination->insert(0, 1, '-');
}
return true;
}
struct uint32_test_case {
const char* str;
bool expect_ok;
int base; // base to pass to the conversion function
uint32_t expected;
};
inline const std::array<uint32_test_case, 27>& strtouint32_test_cases() {
static const std::array<uint32_test_case, 27> test_cases{{
{"0xffffffff", true, 16, (std::numeric_limits<uint32_t>::max)()},
{"0x34234324", true, 16, 0x34234324},
{"34234324", true, 16, 0x34234324},
{"0", true, 16, 0},
{" \t\n 0xffffffff", true, 16, (std::numeric_limits<uint32_t>::max)()},
{" \f\v 46", true, 10, 46}, // must accept weird whitespace
{" \t\n 72717222", true, 8, 072717222},
{" \t\n 072717222", true, 8, 072717222},
{" \t\n 072717228", false, 8, 07271722},
{"0", true, 0, 0},
// Base-10 version.
{"34234324", true, 0, 34234324},
{"4294967295", true, 0, (std::numeric_limits<uint32_t>::max)()},
{"34234324 \n\t", true, 10, 34234324},
// Unusual base
{"0", true, 3, 0},
{"2", true, 3, 2},
{"11", true, 3, 4},
// Invalid uints.
{"", false, 0, 0},
{" ", false, 0, 0},
{"abc", false, 0, 0}, // would be valid hex, but prefix is missing
{"34234324a", false, 0, 34234324},
{"34234.3", false, 0, 34234},
{"-1", false, 0, 0},
{" -123", false, 0, 0},
{" \t\n -123", false, 0, 0},
// Out of bounds.
{"4294967296", false, 0, (std::numeric_limits<uint32_t>::max)()},
{"0x100000000", false, 0, (std::numeric_limits<uint32_t>::max)()},
{nullptr, false, 0, 0},
}};
return test_cases;
}
struct uint64_test_case {
const char* str;
bool expect_ok;
int base;
uint64_t expected;
};
inline const std::array<uint64_test_case, 34>& strtouint64_test_cases() {
static const std::array<uint64_test_case, 34> test_cases{{
{"0x3423432448783446", true, 16, int64_t{0x3423432448783446}},
{"3423432448783446", true, 16, int64_t{0x3423432448783446}},
{"0", true, 16, 0},
{"000", true, 0, 0},
{"0", true, 0, 0},
{" \t\n 0xffffffffffffffff", true, 16,
(std::numeric_limits<uint64_t>::max)()},
{"012345670123456701234", true, 8, int64_t{012345670123456701234}},
{"12345670123456701234", true, 8, int64_t{012345670123456701234}},
{"12845670123456701234", false, 8, 0},
// Base-10 version.
{"34234324487834466", true, 0, int64_t{34234324487834466}},
{" \t\n 18446744073709551615", true, 0,
(std::numeric_limits<uint64_t>::max)()},
{"34234324487834466 \n\t ", true, 0, int64_t{34234324487834466}},
{" \f\v 46", true, 10, 46}, // must accept weird whitespace
// Unusual base
{"0", true, 3, 0},
{"2", true, 3, 2},
{"11", true, 3, 4},
{"0", true, 0, 0},
// Invalid uints.
{"", false, 0, 0},
{" ", false, 0, 0},
{"abc", false, 0, 0},
{"34234324487834466a", false, 0, 0},
{"34234487834466.3", false, 0, 0},
{"-1", false, 0, 0},
{" -123", false, 0, 0},
{" \t\n -123", false, 0, 0},
// Out of bounds.
{"18446744073709551616", false, 10, 0},
{"18446744073709551616", false, 0, 0},
{"0x10000000000000000", false, 16,
(std::numeric_limits<uint64_t>::max)()},
{"0X10000000000000000", false, 16,
(std::numeric_limits<uint64_t>::max)()}, // 0X versus 0x.
{"0x10000000000000000", false, 0, (std::numeric_limits<uint64_t>::max)()},
{"0X10000000000000000", false, 0,
(std::numeric_limits<uint64_t>::max)()}, // 0X versus 0x.
{"0x1234", true, 16, 0x1234},
// Base-10 string version.
{"1234", true, 0, 1234},
{nullptr, false, 0, 0},
}};
return test_cases;
}
} // namespace strings_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_NUMBERS_TEST_COMMON_H_

View File

@@ -0,0 +1,36 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/ostringstream.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace strings_internal {
OStringStream::Buf::int_type OStringStream::overflow(int c) {
assert(s_);
if (!Buf::traits_type::eq_int_type(c, Buf::traits_type::eof()))
s_->push_back(static_cast<char>(c));
return 1;
}
std::streamsize OStringStream::xsputn(const char* s, std::streamsize n) {
assert(s_);
s_->append(s, n);
return n;
}
} // namespace strings_internal
ABSL_NAMESPACE_END
} // namespace absl

View File

@@ -0,0 +1,89 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef ABSL_STRINGS_INTERNAL_OSTRINGSTREAM_H_
#define ABSL_STRINGS_INTERNAL_OSTRINGSTREAM_H_
#include <cassert>
#include <ostream>
#include <streambuf>
#include <string>
#include "absl/base/port.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace strings_internal {
// The same as std::ostringstream but appends to a user-specified std::string,
// and is faster. It is ~70% faster to create, ~50% faster to write to, and
// completely free to extract the result std::string.
//
// std::string s;
// OStringStream strm(&s);
// strm << 42 << ' ' << 3.14; // appends to `s`
//
// The stream object doesn't have to be named. Starting from C++11 operator<<
// works with rvalues of std::ostream.
//
// std::string s;
// OStringStream(&s) << 42 << ' ' << 3.14; // appends to `s`
//
// OStringStream is faster to create than std::ostringstream but it's still
// relatively slow. Avoid creating multiple streams where a single stream will
// do.
//
// Creates unnecessary instances of OStringStream: slow.
//
// std::string s;
// OStringStream(&s) << 42;
// OStringStream(&s) << ' ';
// OStringStream(&s) << 3.14;
//
// Creates a single instance of OStringStream and reuses it: fast.
//
// std::string s;
// OStringStream strm(&s);
// strm << 42;
// strm << ' ';
// strm << 3.14;
//
// Note: flush() has no effect. No reason to call it.
class OStringStream : private std::basic_streambuf<char>, public std::ostream {
public:
// The argument can be null, in which case you'll need to call str(p) with a
// non-null argument before you can write to the stream.
//
// The destructor of OStringStream doesn't use the std::string. It's OK to
// destroy the std::string before the stream.
explicit OStringStream(std::string* s) : std::ostream(this), s_(s) {}
std::string* str() { return s_; }
const std::string* str() const { return s_; }
void str(std::string* s) { s_ = s; }
private:
using Buf = std::basic_streambuf<char>;
Buf::int_type overflow(int c) override;
std::streamsize xsputn(const char* s, std::streamsize n) override;
std::string* s_;
};
} // namespace strings_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_OSTRINGSTREAM_H_

View File

@@ -0,0 +1,106 @@
// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/ostringstream.h"
#include <sstream>
#include <string>
#include "benchmark/benchmark.h"
namespace {
enum StringType {
kNone,
kStdString,
};
// Benchmarks for std::ostringstream.
template <StringType kOutput>
void BM_StdStream(benchmark::State& state) {
const int num_writes = state.range(0);
const int bytes_per_write = state.range(1);
const std::string payload(bytes_per_write, 'x');
for (auto _ : state) {
std::ostringstream strm;
benchmark::DoNotOptimize(strm);
for (int i = 0; i != num_writes; ++i) {
strm << payload;
}
switch (kOutput) {
case kNone: {
break;
}
case kStdString: {
std::string s = strm.str();
benchmark::DoNotOptimize(s);
break;
}
}
}
}
// Create the stream, optionally write to it, then destroy it.
BENCHMARK_TEMPLATE(BM_StdStream, kNone)
->ArgPair(0, 0)
->ArgPair(1, 16) // 16 bytes is small enough for SSO
->ArgPair(1, 256) // 256 bytes requires heap allocation
->ArgPair(1024, 256);
// Create the stream, write to it, get std::string out, then destroy.
BENCHMARK_TEMPLATE(BM_StdStream, kStdString)
->ArgPair(1, 16) // 16 bytes is small enough for SSO
->ArgPair(1, 256) // 256 bytes requires heap allocation
->ArgPair(1024, 256);
// Benchmarks for OStringStream.
template <StringType kOutput>
void BM_CustomStream(benchmark::State& state) {
const int num_writes = state.range(0);
const int bytes_per_write = state.range(1);
const std::string payload(bytes_per_write, 'x');
for (auto _ : state) {
std::string out;
absl::strings_internal::OStringStream strm(&out);
benchmark::DoNotOptimize(strm);
for (int i = 0; i != num_writes; ++i) {
strm << payload;
}
switch (kOutput) {
case kNone: {
break;
}
case kStdString: {
std::string s = out;
benchmark::DoNotOptimize(s);
break;
}
}
}
}
// Create the stream, optionally write to it, then destroy it.
BENCHMARK_TEMPLATE(BM_CustomStream, kNone)
->ArgPair(0, 0)
->ArgPair(1, 16) // 16 bytes is small enough for SSO
->ArgPair(1, 256) // 256 bytes requires heap allocation
->ArgPair(1024, 256);
// Create the stream, write to it, get std::string out, then destroy.
// It's not useful in practice to extract std::string from OStringStream; we
// measure it for completeness.
BENCHMARK_TEMPLATE(BM_CustomStream, kStdString)
->ArgPair(1, 16) // 16 bytes is small enough for SSO
->ArgPair(1, 256) // 256 bytes requires heap allocation
->ArgPair(1024, 256);
} // namespace

View File

@@ -0,0 +1,102 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/ostringstream.h"
#include <memory>
#include <ostream>
#include <string>
#include <type_traits>
#include "gtest/gtest.h"
namespace {
TEST(OStringStream, IsOStream) {
static_assert(
std::is_base_of<std::ostream, absl::strings_internal::OStringStream>(),
"");
}
TEST(OStringStream, ConstructDestroy) {
{
absl::strings_internal::OStringStream strm(nullptr);
EXPECT_EQ(nullptr, strm.str());
}
{
std::string s = "abc";
{
absl::strings_internal::OStringStream strm(&s);
EXPECT_EQ(&s, strm.str());
}
EXPECT_EQ("abc", s);
}
{
std::unique_ptr<std::string> s(new std::string);
absl::strings_internal::OStringStream strm(s.get());
s.reset();
}
}
TEST(OStringStream, Str) {
std::string s1;
absl::strings_internal::OStringStream strm(&s1);
const absl::strings_internal::OStringStream& c_strm(strm);
static_assert(std::is_same<decltype(strm.str()), std::string*>(), "");
static_assert(std::is_same<decltype(c_strm.str()), const std::string*>(), "");
EXPECT_EQ(&s1, strm.str());
EXPECT_EQ(&s1, c_strm.str());
strm.str(&s1);
EXPECT_EQ(&s1, strm.str());
EXPECT_EQ(&s1, c_strm.str());
std::string s2;
strm.str(&s2);
EXPECT_EQ(&s2, strm.str());
EXPECT_EQ(&s2, c_strm.str());
strm.str(nullptr);
EXPECT_EQ(nullptr, strm.str());
EXPECT_EQ(nullptr, c_strm.str());
}
TEST(OStreamStream, WriteToLValue) {
std::string s = "abc";
{
absl::strings_internal::OStringStream strm(&s);
EXPECT_EQ("abc", s);
strm << "";
EXPECT_EQ("abc", s);
strm << 42;
EXPECT_EQ("abc42", s);
strm << 'x' << 'y';
EXPECT_EQ("abc42xy", s);
}
EXPECT_EQ("abc42xy", s);
}
TEST(OStreamStream, WriteToRValue) {
std::string s = "abc";
absl::strings_internal::OStringStream(&s) << "";
EXPECT_EQ("abc", s);
absl::strings_internal::OStringStream(&s) << 42;
EXPECT_EQ("abc42", s);
absl::strings_internal::OStringStream(&s) << 'x' << 'y';
EXPECT_EQ("abc42xy", s);
}
} // namespace

View File

@@ -0,0 +1,122 @@
// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/pow10_helper.h"
#include <cmath>
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace strings_internal {
namespace {
// The exact value of 1e23 falls precisely halfway between two representable
// doubles. Furthermore, the rounding rules we prefer (break ties by rounding
// to the nearest even) dictate in this case that the number should be rounded
// down, but this is not completely specified for floating-point literals in
// C++. (It just says to use the default rounding mode of the standard
// library.) We ensure the result we want by using a number that has an
// unambiguous correctly rounded answer.
constexpr double k1e23 = 9999999999999999e7;
constexpr double kPowersOfTen[] = {
0.0, 1e-323, 1e-322, 1e-321, 1e-320, 1e-319, 1e-318, 1e-317, 1e-316,
1e-315, 1e-314, 1e-313, 1e-312, 1e-311, 1e-310, 1e-309, 1e-308, 1e-307,
1e-306, 1e-305, 1e-304, 1e-303, 1e-302, 1e-301, 1e-300, 1e-299, 1e-298,
1e-297, 1e-296, 1e-295, 1e-294, 1e-293, 1e-292, 1e-291, 1e-290, 1e-289,
1e-288, 1e-287, 1e-286, 1e-285, 1e-284, 1e-283, 1e-282, 1e-281, 1e-280,
1e-279, 1e-278, 1e-277, 1e-276, 1e-275, 1e-274, 1e-273, 1e-272, 1e-271,
1e-270, 1e-269, 1e-268, 1e-267, 1e-266, 1e-265, 1e-264, 1e-263, 1e-262,
1e-261, 1e-260, 1e-259, 1e-258, 1e-257, 1e-256, 1e-255, 1e-254, 1e-253,
1e-252, 1e-251, 1e-250, 1e-249, 1e-248, 1e-247, 1e-246, 1e-245, 1e-244,
1e-243, 1e-242, 1e-241, 1e-240, 1e-239, 1e-238, 1e-237, 1e-236, 1e-235,
1e-234, 1e-233, 1e-232, 1e-231, 1e-230, 1e-229, 1e-228, 1e-227, 1e-226,
1e-225, 1e-224, 1e-223, 1e-222, 1e-221, 1e-220, 1e-219, 1e-218, 1e-217,
1e-216, 1e-215, 1e-214, 1e-213, 1e-212, 1e-211, 1e-210, 1e-209, 1e-208,
1e-207, 1e-206, 1e-205, 1e-204, 1e-203, 1e-202, 1e-201, 1e-200, 1e-199,
1e-198, 1e-197, 1e-196, 1e-195, 1e-194, 1e-193, 1e-192, 1e-191, 1e-190,
1e-189, 1e-188, 1e-187, 1e-186, 1e-185, 1e-184, 1e-183, 1e-182, 1e-181,
1e-180, 1e-179, 1e-178, 1e-177, 1e-176, 1e-175, 1e-174, 1e-173, 1e-172,
1e-171, 1e-170, 1e-169, 1e-168, 1e-167, 1e-166, 1e-165, 1e-164, 1e-163,
1e-162, 1e-161, 1e-160, 1e-159, 1e-158, 1e-157, 1e-156, 1e-155, 1e-154,
1e-153, 1e-152, 1e-151, 1e-150, 1e-149, 1e-148, 1e-147, 1e-146, 1e-145,
1e-144, 1e-143, 1e-142, 1e-141, 1e-140, 1e-139, 1e-138, 1e-137, 1e-136,
1e-135, 1e-134, 1e-133, 1e-132, 1e-131, 1e-130, 1e-129, 1e-128, 1e-127,
1e-126, 1e-125, 1e-124, 1e-123, 1e-122, 1e-121, 1e-120, 1e-119, 1e-118,
1e-117, 1e-116, 1e-115, 1e-114, 1e-113, 1e-112, 1e-111, 1e-110, 1e-109,
1e-108, 1e-107, 1e-106, 1e-105, 1e-104, 1e-103, 1e-102, 1e-101, 1e-100,
1e-99, 1e-98, 1e-97, 1e-96, 1e-95, 1e-94, 1e-93, 1e-92, 1e-91,
1e-90, 1e-89, 1e-88, 1e-87, 1e-86, 1e-85, 1e-84, 1e-83, 1e-82,
1e-81, 1e-80, 1e-79, 1e-78, 1e-77, 1e-76, 1e-75, 1e-74, 1e-73,
1e-72, 1e-71, 1e-70, 1e-69, 1e-68, 1e-67, 1e-66, 1e-65, 1e-64,
1e-63, 1e-62, 1e-61, 1e-60, 1e-59, 1e-58, 1e-57, 1e-56, 1e-55,
1e-54, 1e-53, 1e-52, 1e-51, 1e-50, 1e-49, 1e-48, 1e-47, 1e-46,
1e-45, 1e-44, 1e-43, 1e-42, 1e-41, 1e-40, 1e-39, 1e-38, 1e-37,
1e-36, 1e-35, 1e-34, 1e-33, 1e-32, 1e-31, 1e-30, 1e-29, 1e-28,
1e-27, 1e-26, 1e-25, 1e-24, 1e-23, 1e-22, 1e-21, 1e-20, 1e-19,
1e-18, 1e-17, 1e-16, 1e-15, 1e-14, 1e-13, 1e-12, 1e-11, 1e-10,
1e-9, 1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1,
1e+0, 1e+1, 1e+2, 1e+3, 1e+4, 1e+5, 1e+6, 1e+7, 1e+8,
1e+9, 1e+10, 1e+11, 1e+12, 1e+13, 1e+14, 1e+15, 1e+16, 1e+17,
1e+18, 1e+19, 1e+20, 1e+21, 1e+22, k1e23, 1e+24, 1e+25, 1e+26,
1e+27, 1e+28, 1e+29, 1e+30, 1e+31, 1e+32, 1e+33, 1e+34, 1e+35,
1e+36, 1e+37, 1e+38, 1e+39, 1e+40, 1e+41, 1e+42, 1e+43, 1e+44,
1e+45, 1e+46, 1e+47, 1e+48, 1e+49, 1e+50, 1e+51, 1e+52, 1e+53,
1e+54, 1e+55, 1e+56, 1e+57, 1e+58, 1e+59, 1e+60, 1e+61, 1e+62,
1e+63, 1e+64, 1e+65, 1e+66, 1e+67, 1e+68, 1e+69, 1e+70, 1e+71,
1e+72, 1e+73, 1e+74, 1e+75, 1e+76, 1e+77, 1e+78, 1e+79, 1e+80,
1e+81, 1e+82, 1e+83, 1e+84, 1e+85, 1e+86, 1e+87, 1e+88, 1e+89,
1e+90, 1e+91, 1e+92, 1e+93, 1e+94, 1e+95, 1e+96, 1e+97, 1e+98,
1e+99, 1e+100, 1e+101, 1e+102, 1e+103, 1e+104, 1e+105, 1e+106, 1e+107,
1e+108, 1e+109, 1e+110, 1e+111, 1e+112, 1e+113, 1e+114, 1e+115, 1e+116,
1e+117, 1e+118, 1e+119, 1e+120, 1e+121, 1e+122, 1e+123, 1e+124, 1e+125,
1e+126, 1e+127, 1e+128, 1e+129, 1e+130, 1e+131, 1e+132, 1e+133, 1e+134,
1e+135, 1e+136, 1e+137, 1e+138, 1e+139, 1e+140, 1e+141, 1e+142, 1e+143,
1e+144, 1e+145, 1e+146, 1e+147, 1e+148, 1e+149, 1e+150, 1e+151, 1e+152,
1e+153, 1e+154, 1e+155, 1e+156, 1e+157, 1e+158, 1e+159, 1e+160, 1e+161,
1e+162, 1e+163, 1e+164, 1e+165, 1e+166, 1e+167, 1e+168, 1e+169, 1e+170,
1e+171, 1e+172, 1e+173, 1e+174, 1e+175, 1e+176, 1e+177, 1e+178, 1e+179,
1e+180, 1e+181, 1e+182, 1e+183, 1e+184, 1e+185, 1e+186, 1e+187, 1e+188,
1e+189, 1e+190, 1e+191, 1e+192, 1e+193, 1e+194, 1e+195, 1e+196, 1e+197,
1e+198, 1e+199, 1e+200, 1e+201, 1e+202, 1e+203, 1e+204, 1e+205, 1e+206,
1e+207, 1e+208, 1e+209, 1e+210, 1e+211, 1e+212, 1e+213, 1e+214, 1e+215,
1e+216, 1e+217, 1e+218, 1e+219, 1e+220, 1e+221, 1e+222, 1e+223, 1e+224,
1e+225, 1e+226, 1e+227, 1e+228, 1e+229, 1e+230, 1e+231, 1e+232, 1e+233,
1e+234, 1e+235, 1e+236, 1e+237, 1e+238, 1e+239, 1e+240, 1e+241, 1e+242,
1e+243, 1e+244, 1e+245, 1e+246, 1e+247, 1e+248, 1e+249, 1e+250, 1e+251,
1e+252, 1e+253, 1e+254, 1e+255, 1e+256, 1e+257, 1e+258, 1e+259, 1e+260,
1e+261, 1e+262, 1e+263, 1e+264, 1e+265, 1e+266, 1e+267, 1e+268, 1e+269,
1e+270, 1e+271, 1e+272, 1e+273, 1e+274, 1e+275, 1e+276, 1e+277, 1e+278,
1e+279, 1e+280, 1e+281, 1e+282, 1e+283, 1e+284, 1e+285, 1e+286, 1e+287,
1e+288, 1e+289, 1e+290, 1e+291, 1e+292, 1e+293, 1e+294, 1e+295, 1e+296,
1e+297, 1e+298, 1e+299, 1e+300, 1e+301, 1e+302, 1e+303, 1e+304, 1e+305,
1e+306, 1e+307, 1e+308,
};
} // namespace
double Pow10(int exp) {
if (exp < -324) {
return 0.0;
} else if (exp > 308) {
return INFINITY;
} else {
return kPowersOfTen[exp + 324];
}
}
} // namespace strings_internal
ABSL_NAMESPACE_END
} // namespace absl

View File

@@ -0,0 +1,40 @@
//
// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// This test helper library contains a table of powers of 10, to guarantee
// precise values are computed across the full range of doubles. We can't rely
// on the pow() function, because not all standard libraries ship a version
// that is precise.
#ifndef ABSL_STRINGS_INTERNAL_POW10_HELPER_H_
#define ABSL_STRINGS_INTERNAL_POW10_HELPER_H_
#include <vector>
#include "absl/base/config.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace strings_internal {
// Computes the precise value of 10^exp. (I.e. the nearest representable
// double to the exact value, rounding to nearest-even in the (single) case of
// being exactly halfway between.)
double Pow10(int exp);
} // namespace strings_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_POW10_HELPER_H_

View File

@@ -0,0 +1,122 @@
// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/pow10_helper.h"
#include <cmath>
#include "gtest/gtest.h"
#include "absl/strings/str_format.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace strings_internal {
namespace {
struct TestCase {
int power; // Testing Pow10(power)
uint64_t significand; // Raw bits of the expected value
int radix; // significand is adjusted by 2^radix
};
TEST(Pow10HelperTest, Works) {
// The logic in pow10_helper.cc is so simple that theoretically we don't even
// need a test. However, we're paranoid and believe that there may be
// compilers that don't round floating-point literals correctly, even though
// it is specified by the standard. We check various edge cases, just to be
// sure.
constexpr TestCase kTestCases[] = {
// Subnormals
{-323, 0x2, -1074},
{-322, 0x14, -1074},
{-321, 0xca, -1074},
{-320, 0x7e8, -1074},
{-319, 0x4f10, -1074},
{-318, 0x316a2, -1074},
{-317, 0x1ee257, -1074},
{-316, 0x134d761, -1074},
{-315, 0xc1069cd, -1074},
{-314, 0x78a42205, -1074},
{-313, 0x4b6695433, -1074},
{-312, 0x2f201d49fb, -1074},
{-311, 0x1d74124e3d1, -1074},
{-310, 0x12688b70e62b, -1074},
{-309, 0xb8157268fdaf, -1074},
{-308, 0x730d67819e8d2, -1074},
// Values that are very close to rounding the other way.
// Comment shows difference of significand from the true value.
{-307, 0x11fa182c40c60d, -1072}, // -.4588
{-290, 0x18f2b061aea072, -1016}, // .4854
{-276, 0x11BA03F5B21000, -969}, // .4709
{-259, 0x1899C2F6732210, -913}, // .4830
{-252, 0x1D53844EE47DD1, -890}, // -.4743
{-227, 0x1E5297287C2F45, -807}, // -.4708
{-198, 0x1322E220A5B17E, -710}, // -.4714
{-195, 0x12B010D3E1CF56, -700}, // .4928
{-192, 0x123FF06EEA847A, -690}, // .4968
{-163, 0x1708D0F84D3DE7, -594}, // -.4977
{-145, 0x13FAAC3E3FA1F3, -534}, // -.4785
{-111, 0x133D4032C2C7F5, -421}, // .4774
{-106, 0x1D5B561574765B, -405}, // -.4869
{-104, 0x16EF5B40C2FC77, -398}, // -.4741
{-88, 0x197683DF2F268D, -345}, // -.4738
{-86, 0x13E497065CD61F, -338}, // .4736
{-76, 0x17288E1271F513, -305}, // -.4761
{-63, 0x1A53FC9631D10D, -262}, // .4929
{-30, 0x14484BFEEBC2A0, -152}, // .4758
{-21, 0x12E3B40A0E9B4F, -122}, // -.4916
{-5, 0x14F8B588E368F1, -69}, // .4829
{23, 0x152D02C7E14AF6, 24}, // -.5000 (exactly, round-to-even)
{29, 0x1431E0FAE6D721, 44}, // -.4870
{34, 0x1ED09BEAD87C03, 60}, // -.4721
{70, 0x172EBAD6DDC73D, 180}, // .4733
{105, 0x1BE7ABD3781ECA, 296}, // -.4850
{126, 0x17A2ECC414A03F, 366}, // -.4999
{130, 0x1CDA62055B2D9E, 379}, // .4855
{165, 0x115D847AD00087, 496}, // -.4913
{172, 0x14B378469B6732, 519}, // .4818
{187, 0x1262DFEEBBB0F9, 569}, // -.4805
{210, 0x18557F31326BBB, 645}, // -.4992
{212, 0x1302CB5E6F642A, 652}, // -.4838
{215, 0x1290BA9A38C7D1, 662}, // -.4881
{236, 0x1F736F9B3494E9, 731}, // .4707
{244, 0x176EC98994F489, 758}, // .4924
{250, 0x1658E3AB795204, 778}, // -.4963
{252, 0x117571DDF6C814, 785}, // .4873
{254, 0x1B4781EAD1989E, 791}, // -.4887
{260, 0x1A03FDE214CAF1, 811}, // .4784
{284, 0x1585041B2C477F, 891}, // .4798
{304, 0x1D2A1BE4048F90, 957}, // -.4987
// Out-of-range values
{-324, 0x0, 0},
{-325, 0x0, 0},
{-326, 0x0, 0},
{309, 1, 2000},
{310, 1, 2000},
{311, 1, 2000},
};
for (const TestCase& test_case : kTestCases) {
EXPECT_EQ(Pow10(test_case.power),
std::ldexp(test_case.significand, test_case.radix))
<< absl::StrFormat("Failure for Pow10(%d): %a vs %a", test_case.power,
Pow10(test_case.power),
std::ldexp(test_case.significand, test_case.radix));
}
}
} // namespace
} // namespace strings_internal
ABSL_NAMESPACE_END
} // namespace absl

View File

@@ -0,0 +1,96 @@
//
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef ABSL_STRINGS_INTERNAL_RESIZE_UNINITIALIZED_H_
#define ABSL_STRINGS_INTERNAL_RESIZE_UNINITIALIZED_H_
#include <algorithm>
#include <string>
#include <type_traits>
#include <utility>
#include "absl/base/port.h"
#include "absl/meta/type_traits.h" // for void_t
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace strings_internal {
// Is a subclass of true_type or false_type, depending on whether or not
// T has a __resize_default_init member.
template <typename string_type, typename = void>
struct ResizeUninitializedTraits {
using HasMember = std::false_type;
static void Resize(string_type* s, size_t new_size) { s->resize(new_size); }
};
// __resize_default_init is provided by libc++ >= 8.0
template <typename string_type>
struct ResizeUninitializedTraits<
string_type, absl::void_t<decltype(std::declval<string_type&>()
.__resize_default_init(237))> > {
using HasMember = std::true_type;
static void Resize(string_type* s, size_t new_size) {
s->__resize_default_init(new_size);
}
};
// Returns true if the std::string implementation supports a resize where
// the new characters added to the std::string are left untouched.
//
// (A better name might be "STLStringSupportsUninitializedResize", alluding to
// the previous function.)
template <typename string_type>
inline constexpr bool STLStringSupportsNontrashingResize(string_type*) {
return ResizeUninitializedTraits<string_type>::HasMember::value;
}
// Like str->resize(new_size), except any new characters added to "*str" as a
// result of resizing may be left uninitialized, rather than being filled with
// '0' bytes. Typically used when code is then going to overwrite the backing
// store of the std::string with known data.
template <typename string_type, typename = void>
inline void STLStringResizeUninitialized(string_type* s, size_t new_size) {
ResizeUninitializedTraits<string_type>::Resize(s, new_size);
}
// Used to ensure exponential growth so that the amortized complexity of
// increasing the string size by a small amount is O(1), in contrast to
// O(str->size()) in the case of precise growth.
template <typename string_type>
void STLStringReserveAmortized(string_type* s, size_t new_size) {
const size_t cap = s->capacity();
if (new_size > cap) {
// Make sure to always grow by at least a factor of 2x.
s->reserve((std::max)(new_size, 2 * cap));
}
}
// Like STLStringResizeUninitialized(str, new_size), except guaranteed to use
// exponential growth so that the amortized complexity of increasing the string
// size by a small amount is O(1), in contrast to O(str->size()) in the case of
// precise growth.
template <typename string_type>
void STLStringResizeUninitializedAmortized(string_type* s, size_t new_size) {
STLStringReserveAmortized(s, new_size);
STLStringResizeUninitialized(s, new_size);
}
} // namespace strings_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_RESIZE_UNINITIALIZED_H_

View File

@@ -0,0 +1,105 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/resize_uninitialized.h"
#include "gtest/gtest.h"
namespace {
int resize_call_count = 0;
// A mock string class whose only purpose is to track how many times its
// resize() method has been called.
struct resizable_string {
size_t size() const { return 0; }
size_t capacity() const { return 0; }
char& operator[](size_t) {
static char c = '\0';
return c;
}
void resize(size_t) { resize_call_count += 1; }
void reserve(size_t) {}
};
int resize_default_init_call_count = 0;
// A mock string class whose only purpose is to track how many times its
// resize() and __resize_default_init() methods have been called.
struct resize_default_init_string {
size_t size() const { return 0; }
size_t capacity() const { return 0; }
char& operator[](size_t) {
static char c = '\0';
return c;
}
void resize(size_t) { resize_call_count += 1; }
void __resize_default_init(size_t) { resize_default_init_call_count += 1; }
void reserve(size_t) {}
};
TEST(ResizeUninit, WithAndWithout) {
resize_call_count = 0;
resize_default_init_call_count = 0;
{
resizable_string rs;
EXPECT_EQ(resize_call_count, 0);
EXPECT_EQ(resize_default_init_call_count, 0);
EXPECT_FALSE(
absl::strings_internal::STLStringSupportsNontrashingResize(&rs));
EXPECT_EQ(resize_call_count, 0);
EXPECT_EQ(resize_default_init_call_count, 0);
absl::strings_internal::STLStringResizeUninitialized(&rs, 237);
EXPECT_EQ(resize_call_count, 1);
EXPECT_EQ(resize_default_init_call_count, 0);
absl::strings_internal::STLStringResizeUninitializedAmortized(&rs, 1000);
EXPECT_EQ(resize_call_count, 2);
EXPECT_EQ(resize_default_init_call_count, 0);
}
resize_call_count = 0;
resize_default_init_call_count = 0;
{
resize_default_init_string rus;
EXPECT_EQ(resize_call_count, 0);
EXPECT_EQ(resize_default_init_call_count, 0);
EXPECT_TRUE(
absl::strings_internal::STLStringSupportsNontrashingResize(&rus));
EXPECT_EQ(resize_call_count, 0);
EXPECT_EQ(resize_default_init_call_count, 0);
absl::strings_internal::STLStringResizeUninitialized(&rus, 237);
EXPECT_EQ(resize_call_count, 0);
EXPECT_EQ(resize_default_init_call_count, 1);
absl::strings_internal::STLStringResizeUninitializedAmortized(&rus, 1000);
EXPECT_EQ(resize_call_count, 0);
EXPECT_EQ(resize_default_init_call_count, 2);
}
}
TEST(ResizeUninit, Amortized) {
std::string str;
size_t prev_cap = str.capacity();
int cap_increase_count = 0;
for (int i = 0; i < 1000; ++i) {
absl::strings_internal::STLStringResizeUninitializedAmortized(&str, i);
size_t new_cap = str.capacity();
if (new_cap > prev_cap) ++cap_increase_count;
prev_cap = new_cap;
}
EXPECT_LT(cap_increase_count, 50);
}
} // namespace

View File

@@ -0,0 +1,248 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Thie file provides the IsStrictlyBaseOfAndConvertibleToSTLContainer type
// trait metafunction to assist in working with the _GLIBCXX_DEBUG debug
// wrappers of STL containers.
//
// DO NOT INCLUDE THIS FILE DIRECTLY. Use this file by including
// absl/strings/str_split.h.
//
// IWYU pragma: private, include "absl/strings/str_split.h"
#ifndef ABSL_STRINGS_INTERNAL_STL_TYPE_TRAITS_H_
#define ABSL_STRINGS_INTERNAL_STL_TYPE_TRAITS_H_
#include <array>
#include <bitset>
#include <deque>
#include <forward_list>
#include <list>
#include <map>
#include <set>
#include <type_traits>
#include <unordered_map>
#include <unordered_set>
#include <vector>
#include "absl/meta/type_traits.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace strings_internal {
template <typename C, template <typename...> class T>
struct IsSpecializationImpl : std::false_type {};
template <template <typename...> class T, typename... Args>
struct IsSpecializationImpl<T<Args...>, T> : std::true_type {};
template <typename C, template <typename...> class T>
using IsSpecialization = IsSpecializationImpl<absl::decay_t<C>, T>;
template <typename C>
struct IsArrayImpl : std::false_type {};
template <template <typename, size_t> class A, typename T, size_t N>
struct IsArrayImpl<A<T, N>> : std::is_same<A<T, N>, std::array<T, N>> {};
template <typename C>
using IsArray = IsArrayImpl<absl::decay_t<C>>;
template <typename C>
struct IsBitsetImpl : std::false_type {};
template <template <size_t> class B, size_t N>
struct IsBitsetImpl<B<N>> : std::is_same<B<N>, std::bitset<N>> {};
template <typename C>
using IsBitset = IsBitsetImpl<absl::decay_t<C>>;
template <typename C>
struct IsSTLContainer
: absl::disjunction<
IsArray<C>, IsBitset<C>, IsSpecialization<C, std::deque>,
IsSpecialization<C, std::forward_list>,
IsSpecialization<C, std::list>, IsSpecialization<C, std::map>,
IsSpecialization<C, std::multimap>, IsSpecialization<C, std::set>,
IsSpecialization<C, std::multiset>,
IsSpecialization<C, std::unordered_map>,
IsSpecialization<C, std::unordered_multimap>,
IsSpecialization<C, std::unordered_set>,
IsSpecialization<C, std::unordered_multiset>,
IsSpecialization<C, std::vector>> {};
template <typename C, template <typename...> class T, typename = void>
struct IsBaseOfSpecializationImpl : std::false_type {};
// IsBaseOfSpecializationImpl needs multiple partial specializations to SFINAE
// on the existence of container dependent types and plug them into the STL
// template.
template <typename C, template <typename, typename> class T>
struct IsBaseOfSpecializationImpl<
C, T, absl::void_t<typename C::value_type, typename C::allocator_type>>
: std::is_base_of<C,
T<typename C::value_type, typename C::allocator_type>> {};
template <typename C, template <typename, typename, typename> class T>
struct IsBaseOfSpecializationImpl<
C, T,
absl::void_t<typename C::key_type, typename C::key_compare,
typename C::allocator_type>>
: std::is_base_of<C, T<typename C::key_type, typename C::key_compare,
typename C::allocator_type>> {};
template <typename C, template <typename, typename, typename, typename> class T>
struct IsBaseOfSpecializationImpl<
C, T,
absl::void_t<typename C::key_type, typename C::mapped_type,
typename C::key_compare, typename C::allocator_type>>
: std::is_base_of<C,
T<typename C::key_type, typename C::mapped_type,
typename C::key_compare, typename C::allocator_type>> {
};
template <typename C, template <typename, typename, typename, typename> class T>
struct IsBaseOfSpecializationImpl<
C, T,
absl::void_t<typename C::key_type, typename C::hasher,
typename C::key_equal, typename C::allocator_type>>
: std::is_base_of<C, T<typename C::key_type, typename C::hasher,
typename C::key_equal, typename C::allocator_type>> {
};
template <typename C,
template <typename, typename, typename, typename, typename> class T>
struct IsBaseOfSpecializationImpl<
C, T,
absl::void_t<typename C::key_type, typename C::mapped_type,
typename C::hasher, typename C::key_equal,
typename C::allocator_type>>
: std::is_base_of<C, T<typename C::key_type, typename C::mapped_type,
typename C::hasher, typename C::key_equal,
typename C::allocator_type>> {};
template <typename C, template <typename...> class T>
using IsBaseOfSpecialization = IsBaseOfSpecializationImpl<absl::decay_t<C>, T>;
template <typename C>
struct IsBaseOfArrayImpl : std::false_type {};
template <template <typename, size_t> class A, typename T, size_t N>
struct IsBaseOfArrayImpl<A<T, N>> : std::is_base_of<A<T, N>, std::array<T, N>> {
};
template <typename C>
using IsBaseOfArray = IsBaseOfArrayImpl<absl::decay_t<C>>;
template <typename C>
struct IsBaseOfBitsetImpl : std::false_type {};
template <template <size_t> class B, size_t N>
struct IsBaseOfBitsetImpl<B<N>> : std::is_base_of<B<N>, std::bitset<N>> {};
template <typename C>
using IsBaseOfBitset = IsBaseOfBitsetImpl<absl::decay_t<C>>;
template <typename C>
struct IsBaseOfSTLContainer
: absl::disjunction<IsBaseOfArray<C>, IsBaseOfBitset<C>,
IsBaseOfSpecialization<C, std::deque>,
IsBaseOfSpecialization<C, std::forward_list>,
IsBaseOfSpecialization<C, std::list>,
IsBaseOfSpecialization<C, std::map>,
IsBaseOfSpecialization<C, std::multimap>,
IsBaseOfSpecialization<C, std::set>,
IsBaseOfSpecialization<C, std::multiset>,
IsBaseOfSpecialization<C, std::unordered_map>,
IsBaseOfSpecialization<C, std::unordered_multimap>,
IsBaseOfSpecialization<C, std::unordered_set>,
IsBaseOfSpecialization<C, std::unordered_multiset>,
IsBaseOfSpecialization<C, std::vector>> {};
template <typename C, template <typename...> class T, typename = void>
struct IsConvertibleToSpecializationImpl : std::false_type {};
// IsConvertibleToSpecializationImpl needs multiple partial specializations to
// SFINAE on the existence of container dependent types and plug them into the
// STL template.
template <typename C, template <typename, typename> class T>
struct IsConvertibleToSpecializationImpl<
C, T, absl::void_t<typename C::value_type, typename C::allocator_type>>
: std::is_convertible<
C, T<typename C::value_type, typename C::allocator_type>> {};
template <typename C, template <typename, typename, typename> class T>
struct IsConvertibleToSpecializationImpl<
C, T,
absl::void_t<typename C::key_type, typename C::key_compare,
typename C::allocator_type>>
: std::is_convertible<C, T<typename C::key_type, typename C::key_compare,
typename C::allocator_type>> {};
template <typename C, template <typename, typename, typename, typename> class T>
struct IsConvertibleToSpecializationImpl<
C, T,
absl::void_t<typename C::key_type, typename C::mapped_type,
typename C::key_compare, typename C::allocator_type>>
: std::is_convertible<
C, T<typename C::key_type, typename C::mapped_type,
typename C::key_compare, typename C::allocator_type>> {};
template <typename C, template <typename, typename, typename, typename> class T>
struct IsConvertibleToSpecializationImpl<
C, T,
absl::void_t<typename C::key_type, typename C::hasher,
typename C::key_equal, typename C::allocator_type>>
: std::is_convertible<
C, T<typename C::key_type, typename C::hasher, typename C::key_equal,
typename C::allocator_type>> {};
template <typename C,
template <typename, typename, typename, typename, typename> class T>
struct IsConvertibleToSpecializationImpl<
C, T,
absl::void_t<typename C::key_type, typename C::mapped_type,
typename C::hasher, typename C::key_equal,
typename C::allocator_type>>
: std::is_convertible<C, T<typename C::key_type, typename C::mapped_type,
typename C::hasher, typename C::key_equal,
typename C::allocator_type>> {};
template <typename C, template <typename...> class T>
using IsConvertibleToSpecialization =
IsConvertibleToSpecializationImpl<absl::decay_t<C>, T>;
template <typename C>
struct IsConvertibleToArrayImpl : std::false_type {};
template <template <typename, size_t> class A, typename T, size_t N>
struct IsConvertibleToArrayImpl<A<T, N>>
: std::is_convertible<A<T, N>, std::array<T, N>> {};
template <typename C>
using IsConvertibleToArray = IsConvertibleToArrayImpl<absl::decay_t<C>>;
template <typename C>
struct IsConvertibleToBitsetImpl : std::false_type {};
template <template <size_t> class B, size_t N>
struct IsConvertibleToBitsetImpl<B<N>>
: std::is_convertible<B<N>, std::bitset<N>> {};
template <typename C>
using IsConvertibleToBitset = IsConvertibleToBitsetImpl<absl::decay_t<C>>;
template <typename C>
struct IsConvertibleToSTLContainer
: absl::disjunction<
IsConvertibleToArray<C>, IsConvertibleToBitset<C>,
IsConvertibleToSpecialization<C, std::deque>,
IsConvertibleToSpecialization<C, std::forward_list>,
IsConvertibleToSpecialization<C, std::list>,
IsConvertibleToSpecialization<C, std::map>,
IsConvertibleToSpecialization<C, std::multimap>,
IsConvertibleToSpecialization<C, std::set>,
IsConvertibleToSpecialization<C, std::multiset>,
IsConvertibleToSpecialization<C, std::unordered_map>,
IsConvertibleToSpecialization<C, std::unordered_multimap>,
IsConvertibleToSpecialization<C, std::unordered_set>,
IsConvertibleToSpecialization<C, std::unordered_multiset>,
IsConvertibleToSpecialization<C, std::vector>> {};
template <typename C>
struct IsStrictlyBaseOfAndConvertibleToSTLContainer
: absl::conjunction<absl::negation<IsSTLContainer<C>>,
IsBaseOfSTLContainer<C>,
IsConvertibleToSTLContainer<C>> {};
} // namespace strings_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_STL_TYPE_TRAITS_H_

View File

@@ -0,0 +1,488 @@
// Copyright 2020 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// POSIX spec:
// http://pubs.opengroup.org/onlinepubs/009695399/functions/fprintf.html
//
#include "absl/strings/internal/str_format/arg.h"
#include <cassert>
#include <cerrno>
#include <cstdlib>
#include <string>
#include <type_traits>
#include "absl/base/port.h"
#include "absl/strings/internal/str_format/float_conversion.h"
#include "absl/strings/numbers.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace str_format_internal {
namespace {
// Reduce *capacity by s.size(), clipped to a 0 minimum.
void ReducePadding(string_view s, size_t *capacity) {
*capacity = Excess(s.size(), *capacity);
}
// Reduce *capacity by n, clipped to a 0 minimum.
void ReducePadding(size_t n, size_t *capacity) {
*capacity = Excess(n, *capacity);
}
template <typename T>
struct MakeUnsigned : std::make_unsigned<T> {};
template <>
struct MakeUnsigned<absl::int128> {
using type = absl::uint128;
};
template <>
struct MakeUnsigned<absl::uint128> {
using type = absl::uint128;
};
template <typename T>
struct IsSigned : std::is_signed<T> {};
template <>
struct IsSigned<absl::int128> : std::true_type {};
template <>
struct IsSigned<absl::uint128> : std::false_type {};
// Integral digit printer.
// Call one of the PrintAs* routines after construction once.
// Use with_neg_and_zero/without_neg_or_zero/is_negative to access the results.
class IntDigits {
public:
// Print the unsigned integer as octal.
// Supports unsigned integral types and uint128.
template <typename T>
void PrintAsOct(T v) {
static_assert(!IsSigned<T>::value, "");
char *p = storage_ + sizeof(storage_);
do {
*--p = static_cast<char>('0' + (static_cast<size_t>(v) & 7));
v >>= 3;
} while (v);
start_ = p;
size_ = storage_ + sizeof(storage_) - p;
}
// Print the signed or unsigned integer as decimal.
// Supports all integral types.
template <typename T>
void PrintAsDec(T v) {
static_assert(std::is_integral<T>::value, "");
start_ = storage_;
size_ = numbers_internal::FastIntToBuffer(v, storage_) - storage_;
}
void PrintAsDec(int128 v) {
auto u = static_cast<uint128>(v);
bool add_neg = false;
if (v < 0) {
add_neg = true;
u = uint128{} - u;
}
PrintAsDec(u, add_neg);
}
void PrintAsDec(uint128 v, bool add_neg = false) {
// This function can be sped up if needed. We can call FastIntToBuffer
// twice, or fix FastIntToBuffer to support uint128.
char *p = storage_ + sizeof(storage_);
do {
p -= 2;
numbers_internal::PutTwoDigits(static_cast<size_t>(v % 100), p);
v /= 100;
} while (v);
if (p[0] == '0') {
// We printed one too many hexits.
++p;
}
if (add_neg) {
*--p = '-';
}
size_ = storage_ + sizeof(storage_) - p;
start_ = p;
}
// Print the unsigned integer as hex using lowercase.
// Supports unsigned integral types and uint128.
template <typename T>
void PrintAsHexLower(T v) {
static_assert(!IsSigned<T>::value, "");
char *p = storage_ + sizeof(storage_);
do {
p -= 2;
constexpr const char* table = numbers_internal::kHexTable;
std::memcpy(p, table + 2 * (static_cast<size_t>(v) & 0xFF), 2);
if (sizeof(T) == 1) break;
v >>= 8;
} while (v);
if (p[0] == '0') {
// We printed one too many digits.
++p;
}
start_ = p;
size_ = storage_ + sizeof(storage_) - p;
}
// Print the unsigned integer as hex using uppercase.
// Supports unsigned integral types and uint128.
template <typename T>
void PrintAsHexUpper(T v) {
static_assert(!IsSigned<T>::value, "");
char *p = storage_ + sizeof(storage_);
// kHexTable is only lowercase, so do it manually for uppercase.
do {
*--p = "0123456789ABCDEF"[static_cast<size_t>(v) & 15];
v >>= 4;
} while (v);
start_ = p;
size_ = storage_ + sizeof(storage_) - p;
}
// The printed value including the '-' sign if available.
// For inputs of value `0`, this will return "0"
string_view with_neg_and_zero() const { return {start_, size_}; }
// The printed value not including the '-' sign.
// For inputs of value `0`, this will return "".
string_view without_neg_or_zero() const {
static_assert('-' < '0', "The check below verifies both.");
size_t advance = start_[0] <= '0' ? 1 : 0;
return {start_ + advance, size_ - advance};
}
bool is_negative() const { return start_[0] == '-'; }
private:
const char *start_;
size_t size_;
// Max size: 128 bit value as octal -> 43 digits, plus sign char
char storage_[128 / 3 + 1 + 1];
};
// Note: 'o' conversions do not have a base indicator, it's just that
// the '#' flag is specified to modify the precision for 'o' conversions.
string_view BaseIndicator(const IntDigits &as_digits,
const FormatConversionSpecImpl conv) {
// always show 0x for %p.
bool alt = conv.has_alt_flag() ||
conv.conversion_char() == FormatConversionCharInternal::p;
bool hex = (conv.conversion_char() == FormatConversionCharInternal::x ||
conv.conversion_char() == FormatConversionCharInternal::X ||
conv.conversion_char() == FormatConversionCharInternal::p);
// From the POSIX description of '#' flag:
// "For x or X conversion specifiers, a non-zero result shall have
// 0x (or 0X) prefixed to it."
if (alt && hex && !as_digits.without_neg_or_zero().empty()) {
return conv.conversion_char() == FormatConversionCharInternal::X ? "0X"
: "0x";
}
return {};
}
string_view SignColumn(bool neg, const FormatConversionSpecImpl conv) {
if (conv.conversion_char() == FormatConversionCharInternal::d ||
conv.conversion_char() == FormatConversionCharInternal::i) {
if (neg) return "-";
if (conv.has_show_pos_flag()) return "+";
if (conv.has_sign_col_flag()) return " ";
}
return {};
}
bool ConvertCharImpl(unsigned char v, const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
size_t fill = 0;
if (conv.width() >= 0) fill = conv.width();
ReducePadding(1, &fill);
if (!conv.has_left_flag()) sink->Append(fill, ' ');
sink->Append(1, v);
if (conv.has_left_flag()) sink->Append(fill, ' ');
return true;
}
bool ConvertIntImplInnerSlow(const IntDigits &as_digits,
const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
// Print as a sequence of Substrings:
// [left_spaces][sign][base_indicator][zeroes][formatted][right_spaces]
size_t fill = 0;
if (conv.width() >= 0) fill = conv.width();
string_view formatted = as_digits.without_neg_or_zero();
ReducePadding(formatted, &fill);
string_view sign = SignColumn(as_digits.is_negative(), conv);
ReducePadding(sign, &fill);
string_view base_indicator = BaseIndicator(as_digits, conv);
ReducePadding(base_indicator, &fill);
int precision = conv.precision();
bool precision_specified = precision >= 0;
if (!precision_specified)
precision = 1;
if (conv.has_alt_flag() &&
conv.conversion_char() == FormatConversionCharInternal::o) {
// From POSIX description of the '#' (alt) flag:
// "For o conversion, it increases the precision (if necessary) to
// force the first digit of the result to be zero."
if (formatted.empty() || *formatted.begin() != '0') {
int needed = static_cast<int>(formatted.size()) + 1;
precision = std::max(precision, needed);
}
}
size_t num_zeroes = Excess(formatted.size(), precision);
ReducePadding(num_zeroes, &fill);
size_t num_left_spaces = !conv.has_left_flag() ? fill : 0;
size_t num_right_spaces = conv.has_left_flag() ? fill : 0;
// From POSIX description of the '0' (zero) flag:
// "For d, i, o, u, x, and X conversion specifiers, if a precision
// is specified, the '0' flag is ignored."
if (!precision_specified && conv.has_zero_flag()) {
num_zeroes += num_left_spaces;
num_left_spaces = 0;
}
sink->Append(num_left_spaces, ' ');
sink->Append(sign);
sink->Append(base_indicator);
sink->Append(num_zeroes, '0');
sink->Append(formatted);
sink->Append(num_right_spaces, ' ');
return true;
}
template <typename T>
bool ConvertIntArg(T v, const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
using U = typename MakeUnsigned<T>::type;
IntDigits as_digits;
// This odd casting is due to a bug in -Wswitch behavior in gcc49 which causes
// it to complain about a switch/case type mismatch, even though both are
// FormatConverionChar. Likely this is because at this point
// FormatConversionChar is declared, but not defined.
switch (static_cast<uint8_t>(conv.conversion_char())) {
case static_cast<uint8_t>(FormatConversionCharInternal::c):
return ConvertCharImpl(static_cast<unsigned char>(v), conv, sink);
case static_cast<uint8_t>(FormatConversionCharInternal::o):
as_digits.PrintAsOct(static_cast<U>(v));
break;
case static_cast<uint8_t>(FormatConversionCharInternal::x):
as_digits.PrintAsHexLower(static_cast<U>(v));
break;
case static_cast<uint8_t>(FormatConversionCharInternal::X):
as_digits.PrintAsHexUpper(static_cast<U>(v));
break;
case static_cast<uint8_t>(FormatConversionCharInternal::u):
as_digits.PrintAsDec(static_cast<U>(v));
break;
case static_cast<uint8_t>(FormatConversionCharInternal::d):
case static_cast<uint8_t>(FormatConversionCharInternal::i):
as_digits.PrintAsDec(v);
break;
case static_cast<uint8_t>(FormatConversionCharInternal::a):
case static_cast<uint8_t>(FormatConversionCharInternal::e):
case static_cast<uint8_t>(FormatConversionCharInternal::f):
case static_cast<uint8_t>(FormatConversionCharInternal::g):
case static_cast<uint8_t>(FormatConversionCharInternal::A):
case static_cast<uint8_t>(FormatConversionCharInternal::E):
case static_cast<uint8_t>(FormatConversionCharInternal::F):
case static_cast<uint8_t>(FormatConversionCharInternal::G):
return ConvertFloatImpl(static_cast<double>(v), conv, sink);
default:
ABSL_INTERNAL_ASSUME(false);
}
if (conv.is_basic()) {
sink->Append(as_digits.with_neg_and_zero());
return true;
}
return ConvertIntImplInnerSlow(as_digits, conv, sink);
}
template <typename T>
bool ConvertFloatArg(T v, const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
return FormatConversionCharIsFloat(conv.conversion_char()) &&
ConvertFloatImpl(v, conv, sink);
}
inline bool ConvertStringArg(string_view v, const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
if (conv.is_basic()) {
sink->Append(v);
return true;
}
return sink->PutPaddedString(v, conv.width(), conv.precision(),
conv.has_left_flag());
}
} // namespace
// ==================== Strings ====================
StringConvertResult FormatConvertImpl(const std::string &v,
const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
return {ConvertStringArg(v, conv, sink)};
}
StringConvertResult FormatConvertImpl(string_view v,
const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
return {ConvertStringArg(v, conv, sink)};
}
ArgConvertResult<FormatConversionCharSetUnion(
FormatConversionCharSetInternal::s, FormatConversionCharSetInternal::p)>
FormatConvertImpl(const char *v, const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
if (conv.conversion_char() == FormatConversionCharInternal::p)
return {FormatConvertImpl(VoidPtr(v), conv, sink).value};
size_t len;
if (v == nullptr) {
len = 0;
} else if (conv.precision() < 0) {
len = std::strlen(v);
} else {
// If precision is set, we look for the NUL-terminator on the valid range.
len = std::find(v, v + conv.precision(), '\0') - v;
}
return {ConvertStringArg(string_view(v, len), conv, sink)};
}
// ==================== Raw pointers ====================
ArgConvertResult<FormatConversionCharSetInternal::p> FormatConvertImpl(
VoidPtr v, const FormatConversionSpecImpl conv, FormatSinkImpl *sink) {
if (!v.value) {
sink->Append("(nil)");
return {true};
}
IntDigits as_digits;
as_digits.PrintAsHexLower(v.value);
return {ConvertIntImplInnerSlow(as_digits, conv, sink)};
}
// ==================== Floats ====================
FloatingConvertResult FormatConvertImpl(float v,
const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
return {ConvertFloatArg(v, conv, sink)};
}
FloatingConvertResult FormatConvertImpl(double v,
const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
return {ConvertFloatArg(v, conv, sink)};
}
FloatingConvertResult FormatConvertImpl(long double v,
const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
return {ConvertFloatArg(v, conv, sink)};
}
// ==================== Chars ====================
IntegralConvertResult FormatConvertImpl(char v,
const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
return {ConvertIntArg(v, conv, sink)};
}
IntegralConvertResult FormatConvertImpl(signed char v,
const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
return {ConvertIntArg(v, conv, sink)};
}
IntegralConvertResult FormatConvertImpl(unsigned char v,
const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
return {ConvertIntArg(v, conv, sink)};
}
// ==================== Ints ====================
IntegralConvertResult FormatConvertImpl(short v, // NOLINT
const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
return {ConvertIntArg(v, conv, sink)};
}
IntegralConvertResult FormatConvertImpl(unsigned short v, // NOLINT
const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
return {ConvertIntArg(v, conv, sink)};
}
IntegralConvertResult FormatConvertImpl(int v,
const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
return {ConvertIntArg(v, conv, sink)};
}
IntegralConvertResult FormatConvertImpl(unsigned v,
const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
return {ConvertIntArg(v, conv, sink)};
}
IntegralConvertResult FormatConvertImpl(long v, // NOLINT
const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
return {ConvertIntArg(v, conv, sink)};
}
IntegralConvertResult FormatConvertImpl(unsigned long v, // NOLINT
const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
return {ConvertIntArg(v, conv, sink)};
}
IntegralConvertResult FormatConvertImpl(long long v, // NOLINT
const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
return {ConvertIntArg(v, conv, sink)};
}
IntegralConvertResult FormatConvertImpl(unsigned long long v, // NOLINT
const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
return {ConvertIntArg(v, conv, sink)};
}
IntegralConvertResult FormatConvertImpl(absl::int128 v,
const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
return {ConvertIntArg(v, conv, sink)};
}
IntegralConvertResult FormatConvertImpl(absl::uint128 v,
const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
return {ConvertIntArg(v, conv, sink)};
}
ABSL_INTERNAL_FORMAT_DISPATCH_OVERLOADS_EXPAND_();
} // namespace str_format_internal
ABSL_NAMESPACE_END
} // namespace absl

View File

@@ -0,0 +1,526 @@
// Copyright 2020 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_ARG_H_
#define ABSL_STRINGS_INTERNAL_STR_FORMAT_ARG_H_
#include <string.h>
#include <wchar.h>
#include <cstdio>
#include <iomanip>
#include <limits>
#include <memory>
#include <sstream>
#include <string>
#include <type_traits>
#include "absl/base/port.h"
#include "absl/meta/type_traits.h"
#include "absl/numeric/int128.h"
#include "absl/strings/internal/str_format/extension.h"
#include "absl/strings/string_view.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
class Cord;
class FormatCountCapture;
class FormatSink;
template <absl::FormatConversionCharSet C>
struct FormatConvertResult;
class FormatConversionSpec;
namespace str_format_internal {
template <typename T, typename = void>
struct HasUserDefinedConvert : std::false_type {};
template <typename T>
struct HasUserDefinedConvert<T, void_t<decltype(AbslFormatConvert(
std::declval<const T&>(),
std::declval<const FormatConversionSpec&>(),
std::declval<FormatSink*>()))>>
: std::true_type {};
void AbslFormatConvert(); // Stops the lexical name lookup
template <typename T>
auto FormatConvertImpl(const T& v, FormatConversionSpecImpl conv,
FormatSinkImpl* sink)
-> decltype(AbslFormatConvert(v,
std::declval<const FormatConversionSpec&>(),
std::declval<FormatSink*>())) {
using FormatConversionSpecT =
absl::enable_if_t<sizeof(const T& (*)()) != 0, FormatConversionSpec>;
using FormatSinkT =
absl::enable_if_t<sizeof(const T& (*)()) != 0, FormatSink>;
auto fcs = conv.Wrap<FormatConversionSpecT>();
auto fs = sink->Wrap<FormatSinkT>();
return AbslFormatConvert(v, fcs, &fs);
}
template <typename T>
class StreamedWrapper;
// If 'v' can be converted (in the printf sense) according to 'conv',
// then convert it, appending to `sink` and return `true`.
// Otherwise fail and return `false`.
// AbslFormatConvert(v, conv, sink) is intended to be found by ADL on 'v'
// as an extension mechanism. These FormatConvertImpl functions are the default
// implementations.
// The ADL search is augmented via the 'Sink*' parameter, which also
// serves as a disambiguator to reject possible unintended 'AbslFormatConvert'
// functions in the namespaces associated with 'v'.
// Raw pointers.
struct VoidPtr {
VoidPtr() = default;
template <typename T,
decltype(reinterpret_cast<uintptr_t>(std::declval<T*>())) = 0>
VoidPtr(T* ptr) // NOLINT
: value(ptr ? reinterpret_cast<uintptr_t>(ptr) : 0) {}
uintptr_t value;
};
template <FormatConversionCharSet C>
struct ArgConvertResult {
bool value;
};
template <FormatConversionCharSet C>
constexpr FormatConversionCharSet ExtractCharSet(FormatConvertResult<C>) {
return C;
}
template <FormatConversionCharSet C>
constexpr FormatConversionCharSet ExtractCharSet(ArgConvertResult<C>) {
return C;
}
using StringConvertResult =
ArgConvertResult<FormatConversionCharSetInternal::s>;
ArgConvertResult<FormatConversionCharSetInternal::p> FormatConvertImpl(
VoidPtr v, FormatConversionSpecImpl conv, FormatSinkImpl* sink);
// Strings.
StringConvertResult FormatConvertImpl(const std::string& v,
FormatConversionSpecImpl conv,
FormatSinkImpl* sink);
StringConvertResult FormatConvertImpl(string_view v,
FormatConversionSpecImpl conv,
FormatSinkImpl* sink);
#if defined(ABSL_HAVE_STD_STRING_VIEW) && !defined(ABSL_USES_STD_STRING_VIEW)
inline StringConvertResult FormatConvertImpl(std::string_view v,
FormatConversionSpecImpl conv,
FormatSinkImpl* sink) {
return FormatConvertImpl(absl::string_view(v.data(), v.size()), conv, sink);
}
#endif // ABSL_HAVE_STD_STRING_VIEW && !ABSL_USES_STD_STRING_VIEW
ArgConvertResult<FormatConversionCharSetUnion(
FormatConversionCharSetInternal::s, FormatConversionCharSetInternal::p)>
FormatConvertImpl(const char* v, const FormatConversionSpecImpl conv,
FormatSinkImpl* sink);
template <class AbslCord, typename std::enable_if<std::is_same<
AbslCord, absl::Cord>::value>::type* = nullptr>
StringConvertResult FormatConvertImpl(const AbslCord& value,
FormatConversionSpecImpl conv,
FormatSinkImpl* sink) {
bool is_left = conv.has_left_flag();
size_t space_remaining = 0;
int width = conv.width();
if (width >= 0) space_remaining = width;
size_t to_write = value.size();
int precision = conv.precision();
if (precision >= 0)
to_write = (std::min)(to_write, static_cast<size_t>(precision));
space_remaining = Excess(to_write, space_remaining);
if (space_remaining > 0 && !is_left) sink->Append(space_remaining, ' ');
for (string_view piece : value.Chunks()) {
if (piece.size() > to_write) {
piece.remove_suffix(piece.size() - to_write);
to_write = 0;
} else {
to_write -= piece.size();
}
sink->Append(piece);
if (to_write == 0) {
break;
}
}
if (space_remaining > 0 && is_left) sink->Append(space_remaining, ' ');
return {true};
}
using IntegralConvertResult = ArgConvertResult<FormatConversionCharSetUnion(
FormatConversionCharSetInternal::c,
FormatConversionCharSetInternal::kNumeric,
FormatConversionCharSetInternal::kStar)>;
using FloatingConvertResult =
ArgConvertResult<FormatConversionCharSetInternal::kFloating>;
// Floats.
FloatingConvertResult FormatConvertImpl(float v, FormatConversionSpecImpl conv,
FormatSinkImpl* sink);
FloatingConvertResult FormatConvertImpl(double v, FormatConversionSpecImpl conv,
FormatSinkImpl* sink);
FloatingConvertResult FormatConvertImpl(long double v,
FormatConversionSpecImpl conv,
FormatSinkImpl* sink);
// Chars.
IntegralConvertResult FormatConvertImpl(char v, FormatConversionSpecImpl conv,
FormatSinkImpl* sink);
IntegralConvertResult FormatConvertImpl(signed char v,
FormatConversionSpecImpl conv,
FormatSinkImpl* sink);
IntegralConvertResult FormatConvertImpl(unsigned char v,
FormatConversionSpecImpl conv,
FormatSinkImpl* sink);
// Ints.
IntegralConvertResult FormatConvertImpl(short v, // NOLINT
FormatConversionSpecImpl conv,
FormatSinkImpl* sink);
IntegralConvertResult FormatConvertImpl(unsigned short v, // NOLINT
FormatConversionSpecImpl conv,
FormatSinkImpl* sink);
IntegralConvertResult FormatConvertImpl(int v, FormatConversionSpecImpl conv,
FormatSinkImpl* sink);
IntegralConvertResult FormatConvertImpl(unsigned v,
FormatConversionSpecImpl conv,
FormatSinkImpl* sink);
IntegralConvertResult FormatConvertImpl(long v, // NOLINT
FormatConversionSpecImpl conv,
FormatSinkImpl* sink);
IntegralConvertResult FormatConvertImpl(unsigned long v, // NOLINT
FormatConversionSpecImpl conv,
FormatSinkImpl* sink);
IntegralConvertResult FormatConvertImpl(long long v, // NOLINT
FormatConversionSpecImpl conv,
FormatSinkImpl* sink);
IntegralConvertResult FormatConvertImpl(unsigned long long v, // NOLINT
FormatConversionSpecImpl conv,
FormatSinkImpl* sink);
IntegralConvertResult FormatConvertImpl(int128 v, FormatConversionSpecImpl conv,
FormatSinkImpl* sink);
IntegralConvertResult FormatConvertImpl(uint128 v,
FormatConversionSpecImpl conv,
FormatSinkImpl* sink);
template <typename T, enable_if_t<std::is_same<T, bool>::value, int> = 0>
IntegralConvertResult FormatConvertImpl(T v, FormatConversionSpecImpl conv,
FormatSinkImpl* sink) {
return FormatConvertImpl(static_cast<int>(v), conv, sink);
}
// We provide this function to help the checker, but it is never defined.
// FormatArgImpl will use the underlying Convert functions instead.
template <typename T>
typename std::enable_if<std::is_enum<T>::value &&
!HasUserDefinedConvert<T>::value,
IntegralConvertResult>::type
FormatConvertImpl(T v, FormatConversionSpecImpl conv, FormatSinkImpl* sink);
template <typename T>
StringConvertResult FormatConvertImpl(const StreamedWrapper<T>& v,
FormatConversionSpecImpl conv,
FormatSinkImpl* out) {
std::ostringstream oss;
oss << v.v_;
if (!oss) return {false};
return str_format_internal::FormatConvertImpl(oss.str(), conv, out);
}
// Use templates and dependent types to delay evaluation of the function
// until after FormatCountCapture is fully defined.
struct FormatCountCaptureHelper {
template <class T = int>
static ArgConvertResult<FormatConversionCharSetInternal::n> ConvertHelper(
const FormatCountCapture& v, FormatConversionSpecImpl conv,
FormatSinkImpl* sink) {
const absl::enable_if_t<sizeof(T) != 0, FormatCountCapture>& v2 = v;
if (conv.conversion_char() !=
str_format_internal::FormatConversionCharInternal::n) {
return {false};
}
*v2.p_ = static_cast<int>(sink->size());
return {true};
}
};
template <class T = int>
ArgConvertResult<FormatConversionCharSetInternal::n> FormatConvertImpl(
const FormatCountCapture& v, FormatConversionSpecImpl conv,
FormatSinkImpl* sink) {
return FormatCountCaptureHelper::ConvertHelper(v, conv, sink);
}
// Helper friend struct to hide implementation details from the public API of
// FormatArgImpl.
struct FormatArgImplFriend {
template <typename Arg>
static bool ToInt(Arg arg, int* out) {
// A value initialized FormatConversionSpecImpl has a `none` conv, which
// tells the dispatcher to run the `int` conversion.
return arg.dispatcher_(arg.data_, {}, out);
}
template <typename Arg>
static bool Convert(Arg arg, FormatConversionSpecImpl conv,
FormatSinkImpl* out) {
return arg.dispatcher_(arg.data_, conv, out);
}
template <typename Arg>
static typename Arg::Dispatcher GetVTablePtrForTest(Arg arg) {
return arg.dispatcher_;
}
};
template <typename Arg>
constexpr FormatConversionCharSet ArgumentToConv() {
return absl::str_format_internal::ExtractCharSet(
decltype(str_format_internal::FormatConvertImpl(
std::declval<const Arg&>(),
std::declval<const FormatConversionSpecImpl&>(),
std::declval<FormatSinkImpl*>())){});
}
// A type-erased handle to a format argument.
class FormatArgImpl {
private:
enum { kInlinedSpace = 8 };
using VoidPtr = str_format_internal::VoidPtr;
union Data {
const void* ptr;
const volatile void* volatile_ptr;
char buf[kInlinedSpace];
};
using Dispatcher = bool (*)(Data, FormatConversionSpecImpl, void* out);
template <typename T>
struct store_by_value
: std::integral_constant<bool, (sizeof(T) <= kInlinedSpace) &&
(std::is_integral<T>::value ||
std::is_floating_point<T>::value ||
std::is_pointer<T>::value ||
std::is_same<VoidPtr, T>::value)> {};
enum StoragePolicy { ByPointer, ByVolatilePointer, ByValue };
template <typename T>
struct storage_policy
: std::integral_constant<StoragePolicy,
(std::is_volatile<T>::value
? ByVolatilePointer
: (store_by_value<T>::value ? ByValue
: ByPointer))> {
};
// To reduce the number of vtables we will decay values before hand.
// Anything with a user-defined Convert will get its own vtable.
// For everything else:
// - Decay char* and char arrays into `const char*`
// - Decay any other pointer to `const void*`
// - Decay all enums to their underlying type.
// - Decay function pointers to void*.
template <typename T, typename = void>
struct DecayType {
static constexpr bool kHasUserDefined =
str_format_internal::HasUserDefinedConvert<T>::value;
using type = typename std::conditional<
!kHasUserDefined && std::is_convertible<T, const char*>::value,
const char*,
typename std::conditional<!kHasUserDefined &&
std::is_convertible<T, VoidPtr>::value,
VoidPtr, const T&>::type>::type;
};
template <typename T>
struct DecayType<T,
typename std::enable_if<
!str_format_internal::HasUserDefinedConvert<T>::value &&
std::is_enum<T>::value>::type> {
using type = typename std::underlying_type<T>::type;
};
public:
template <typename T>
explicit FormatArgImpl(const T& value) {
using D = typename DecayType<T>::type;
static_assert(
std::is_same<D, const T&>::value || storage_policy<D>::value == ByValue,
"Decayed types must be stored by value");
Init(static_cast<D>(value));
}
private:
friend struct str_format_internal::FormatArgImplFriend;
template <typename T, StoragePolicy = storage_policy<T>::value>
struct Manager;
template <typename T>
struct Manager<T, ByPointer> {
static Data SetValue(const T& value) {
Data data;
data.ptr = std::addressof(value);
return data;
}
static const T& Value(Data arg) { return *static_cast<const T*>(arg.ptr); }
};
template <typename T>
struct Manager<T, ByVolatilePointer> {
static Data SetValue(const T& value) {
Data data;
data.volatile_ptr = &value;
return data;
}
static const T& Value(Data arg) {
return *static_cast<const T*>(arg.volatile_ptr);
}
};
template <typename T>
struct Manager<T, ByValue> {
static Data SetValue(const T& value) {
Data data;
memcpy(data.buf, &value, sizeof(value));
return data;
}
static T Value(Data arg) {
T value;
memcpy(&value, arg.buf, sizeof(T));
return value;
}
};
template <typename T>
void Init(const T& value) {
data_ = Manager<T>::SetValue(value);
dispatcher_ = &Dispatch<T>;
}
template <typename T>
static int ToIntVal(const T& val) {
using CommonType = typename std::conditional<std::is_signed<T>::value,
int64_t, uint64_t>::type;
if (static_cast<CommonType>(val) >
static_cast<CommonType>((std::numeric_limits<int>::max)())) {
return (std::numeric_limits<int>::max)();
} else if (std::is_signed<T>::value &&
static_cast<CommonType>(val) <
static_cast<CommonType>((std::numeric_limits<int>::min)())) {
return (std::numeric_limits<int>::min)();
}
return static_cast<int>(val);
}
template <typename T>
static bool ToInt(Data arg, int* out, std::true_type /* is_integral */,
std::false_type) {
*out = ToIntVal(Manager<T>::Value(arg));
return true;
}
template <typename T>
static bool ToInt(Data arg, int* out, std::false_type,
std::true_type /* is_enum */) {
*out = ToIntVal(static_cast<typename std::underlying_type<T>::type>(
Manager<T>::Value(arg)));
return true;
}
template <typename T>
static bool ToInt(Data, int*, std::false_type, std::false_type) {
return false;
}
template <typename T>
static bool Dispatch(Data arg, FormatConversionSpecImpl spec, void* out) {
// A `none` conv indicates that we want the `int` conversion.
if (ABSL_PREDICT_FALSE(spec.conversion_char() ==
FormatConversionCharInternal::kNone)) {
return ToInt<T>(arg, static_cast<int*>(out), std::is_integral<T>(),
std::is_enum<T>());
}
if (ABSL_PREDICT_FALSE(!Contains(ArgumentToConv<T>(),
spec.conversion_char()))) {
return false;
}
return str_format_internal::FormatConvertImpl(
Manager<T>::Value(arg), spec,
static_cast<FormatSinkImpl*>(out))
.value;
}
Data data_;
Dispatcher dispatcher_;
};
#define ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(T, E) \
E template bool FormatArgImpl::Dispatch<T>(Data, FormatConversionSpecImpl, \
void*)
#define ABSL_INTERNAL_FORMAT_DISPATCH_OVERLOADS_EXPAND_(...) \
ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(str_format_internal::VoidPtr, \
__VA_ARGS__); \
ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(bool, __VA_ARGS__); \
ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(char, __VA_ARGS__); \
ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(signed char, __VA_ARGS__); \
ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(unsigned char, __VA_ARGS__); \
ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(short, __VA_ARGS__); /* NOLINT */ \
ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(unsigned short, /* NOLINT */ \
__VA_ARGS__); \
ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(int, __VA_ARGS__); \
ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(unsigned int, __VA_ARGS__); \
ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(long, __VA_ARGS__); /* NOLINT */ \
ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(unsigned long, /* NOLINT */ \
__VA_ARGS__); \
ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(long long, /* NOLINT */ \
__VA_ARGS__); \
ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(unsigned long long, /* NOLINT */ \
__VA_ARGS__); \
ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(int128, __VA_ARGS__); \
ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(uint128, __VA_ARGS__); \
ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(float, __VA_ARGS__); \
ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(double, __VA_ARGS__); \
ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(long double, __VA_ARGS__); \
ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(const char*, __VA_ARGS__); \
ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(std::string, __VA_ARGS__); \
ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(string_view, __VA_ARGS__)
ABSL_INTERNAL_FORMAT_DISPATCH_OVERLOADS_EXPAND_(extern);
} // namespace str_format_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_STR_FORMAT_ARG_H_

View File

@@ -0,0 +1,130 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/str_format/arg.h"
#include <ostream>
#include <string>
#include "gtest/gtest.h"
#include "absl/strings/str_format.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace str_format_internal {
namespace {
class FormatArgImplTest : public ::testing::Test {
public:
enum Color { kRed, kGreen, kBlue };
static const char *hi() { return "hi"; }
struct X {};
X x_;
};
inline FormatConvertResult<FormatConversionCharSet{}> AbslFormatConvert(
const FormatArgImplTest::X &, const FormatConversionSpec &, FormatSink *) {
return {false};
}
TEST_F(FormatArgImplTest, ToInt) {
int out = 0;
EXPECT_TRUE(FormatArgImplFriend::ToInt(FormatArgImpl(1), &out));
EXPECT_EQ(1, out);
EXPECT_TRUE(FormatArgImplFriend::ToInt(FormatArgImpl(-1), &out));
EXPECT_EQ(-1, out);
EXPECT_TRUE(
FormatArgImplFriend::ToInt(FormatArgImpl(static_cast<char>(64)), &out));
EXPECT_EQ(64, out);
EXPECT_TRUE(FormatArgImplFriend::ToInt(
FormatArgImpl(static_cast<unsigned long long>(123456)), &out)); // NOLINT
EXPECT_EQ(123456, out);
EXPECT_TRUE(FormatArgImplFriend::ToInt(
FormatArgImpl(static_cast<unsigned long long>( // NOLINT
std::numeric_limits<int>::max()) +
1),
&out));
EXPECT_EQ(std::numeric_limits<int>::max(), out);
EXPECT_TRUE(FormatArgImplFriend::ToInt(
FormatArgImpl(static_cast<long long>( // NOLINT
std::numeric_limits<int>::min()) -
10),
&out));
EXPECT_EQ(std::numeric_limits<int>::min(), out);
EXPECT_TRUE(FormatArgImplFriend::ToInt(FormatArgImpl(false), &out));
EXPECT_EQ(0, out);
EXPECT_TRUE(FormatArgImplFriend::ToInt(FormatArgImpl(true), &out));
EXPECT_EQ(1, out);
EXPECT_FALSE(FormatArgImplFriend::ToInt(FormatArgImpl(2.2), &out));
EXPECT_FALSE(FormatArgImplFriend::ToInt(FormatArgImpl(3.2f), &out));
EXPECT_FALSE(FormatArgImplFriend::ToInt(
FormatArgImpl(static_cast<int *>(nullptr)), &out));
EXPECT_FALSE(FormatArgImplFriend::ToInt(FormatArgImpl(hi()), &out));
EXPECT_FALSE(FormatArgImplFriend::ToInt(FormatArgImpl("hi"), &out));
EXPECT_FALSE(FormatArgImplFriend::ToInt(FormatArgImpl(x_), &out));
EXPECT_TRUE(FormatArgImplFriend::ToInt(FormatArgImpl(kBlue), &out));
EXPECT_EQ(2, out);
}
extern const char kMyArray[];
TEST_F(FormatArgImplTest, CharArraysDecayToCharPtr) {
const char* a = "";
EXPECT_EQ(FormatArgImplFriend::GetVTablePtrForTest(FormatArgImpl(a)),
FormatArgImplFriend::GetVTablePtrForTest(FormatArgImpl("")));
EXPECT_EQ(FormatArgImplFriend::GetVTablePtrForTest(FormatArgImpl(a)),
FormatArgImplFriend::GetVTablePtrForTest(FormatArgImpl("A")));
EXPECT_EQ(FormatArgImplFriend::GetVTablePtrForTest(FormatArgImpl(a)),
FormatArgImplFriend::GetVTablePtrForTest(FormatArgImpl("ABC")));
EXPECT_EQ(FormatArgImplFriend::GetVTablePtrForTest(FormatArgImpl(a)),
FormatArgImplFriend::GetVTablePtrForTest(FormatArgImpl(kMyArray)));
}
TEST_F(FormatArgImplTest, OtherPtrDecayToVoidPtr) {
auto expected = FormatArgImplFriend::GetVTablePtrForTest(
FormatArgImpl(static_cast<void *>(nullptr)));
EXPECT_EQ(FormatArgImplFriend::GetVTablePtrForTest(
FormatArgImpl(static_cast<int *>(nullptr))),
expected);
EXPECT_EQ(FormatArgImplFriend::GetVTablePtrForTest(
FormatArgImpl(static_cast<volatile int *>(nullptr))),
expected);
auto p = static_cast<void (*)()>([] {});
EXPECT_EQ(FormatArgImplFriend::GetVTablePtrForTest(FormatArgImpl(p)),
expected);
}
TEST_F(FormatArgImplTest, WorksWithCharArraysOfUnknownSize) {
std::string s;
FormatSinkImpl sink(&s);
FormatConversionSpecImpl conv;
FormatConversionSpecImplFriend::SetConversionChar(
FormatConversionCharInternal::s, &conv);
FormatConversionSpecImplFriend::SetFlags(Flags(), &conv);
FormatConversionSpecImplFriend::SetWidth(-1, &conv);
FormatConversionSpecImplFriend::SetPrecision(-1, &conv);
EXPECT_TRUE(
FormatArgImplFriend::Convert(FormatArgImpl(kMyArray), conv, &sink));
sink.Flush();
EXPECT_EQ("ABCDE", s);
}
const char kMyArray[] = "ABCDE";
} // namespace
} // namespace str_format_internal
ABSL_NAMESPACE_END
} // namespace absl

View File

@@ -0,0 +1,258 @@
// Copyright 2020 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/str_format/bind.h"
#include <cerrno>
#include <limits>
#include <sstream>
#include <string>
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace str_format_internal {
namespace {
inline bool BindFromPosition(int position, int* value,
absl::Span<const FormatArgImpl> pack) {
assert(position > 0);
if (static_cast<size_t>(position) > pack.size()) {
return false;
}
// -1 because positions are 1-based
return FormatArgImplFriend::ToInt(pack[position - 1], value);
}
class ArgContext {
public:
explicit ArgContext(absl::Span<const FormatArgImpl> pack) : pack_(pack) {}
// Fill 'bound' with the results of applying the context's argument pack
// to the specified 'unbound'. We synthesize a BoundConversion by
// lining up a UnboundConversion with a user argument. We also
// resolve any '*' specifiers for width and precision, so after
// this call, 'bound' has all the information it needs to be formatted.
// Returns false on failure.
bool Bind(const UnboundConversion* unbound, BoundConversion* bound);
private:
absl::Span<const FormatArgImpl> pack_;
};
inline bool ArgContext::Bind(const UnboundConversion* unbound,
BoundConversion* bound) {
const FormatArgImpl* arg = nullptr;
int arg_position = unbound->arg_position;
if (static_cast<size_t>(arg_position - 1) >= pack_.size()) return false;
arg = &pack_[arg_position - 1]; // 1-based
if (unbound->flags != Flags::kBasic) {
int width = unbound->width.value();
bool force_left = false;
if (unbound->width.is_from_arg()) {
if (!BindFromPosition(unbound->width.get_from_arg(), &width, pack_))
return false;
if (width < 0) {
// "A negative field width is taken as a '-' flag followed by a
// positive field width."
force_left = true;
// Make sure we don't overflow the width when negating it.
width = -std::max(width, -std::numeric_limits<int>::max());
}
}
int precision = unbound->precision.value();
if (unbound->precision.is_from_arg()) {
if (!BindFromPosition(unbound->precision.get_from_arg(), &precision,
pack_))
return false;
}
FormatConversionSpecImplFriend::SetWidth(width, bound);
FormatConversionSpecImplFriend::SetPrecision(precision, bound);
if (force_left) {
FormatConversionSpecImplFriend::SetFlags(unbound->flags | Flags::kLeft,
bound);
} else {
FormatConversionSpecImplFriend::SetFlags(unbound->flags, bound);
}
} else {
FormatConversionSpecImplFriend::SetFlags(unbound->flags, bound);
FormatConversionSpecImplFriend::SetWidth(-1, bound);
FormatConversionSpecImplFriend::SetPrecision(-1, bound);
}
FormatConversionSpecImplFriend::SetConversionChar(unbound->conv, bound);
bound->set_arg(arg);
return true;
}
template <typename Converter>
class ConverterConsumer {
public:
ConverterConsumer(Converter converter, absl::Span<const FormatArgImpl> pack)
: converter_(converter), arg_context_(pack) {}
bool Append(string_view s) {
converter_.Append(s);
return true;
}
bool ConvertOne(const UnboundConversion& conv, string_view conv_string) {
BoundConversion bound;
if (!arg_context_.Bind(&conv, &bound)) return false;
return converter_.ConvertOne(bound, conv_string);
}
private:
Converter converter_;
ArgContext arg_context_;
};
template <typename Converter>
bool ConvertAll(const UntypedFormatSpecImpl format,
absl::Span<const FormatArgImpl> args, Converter converter) {
if (format.has_parsed_conversion()) {
return format.parsed_conversion()->ProcessFormat(
ConverterConsumer<Converter>(converter, args));
} else {
return ParseFormatString(format.str(),
ConverterConsumer<Converter>(converter, args));
}
}
class DefaultConverter {
public:
explicit DefaultConverter(FormatSinkImpl* sink) : sink_(sink) {}
void Append(string_view s) const { sink_->Append(s); }
bool ConvertOne(const BoundConversion& bound, string_view /*conv*/) const {
return FormatArgImplFriend::Convert(*bound.arg(), bound, sink_);
}
private:
FormatSinkImpl* sink_;
};
class SummarizingConverter {
public:
explicit SummarizingConverter(FormatSinkImpl* sink) : sink_(sink) {}
void Append(string_view s) const { sink_->Append(s); }
bool ConvertOne(const BoundConversion& bound, string_view /*conv*/) const {
UntypedFormatSpecImpl spec("%d");
std::ostringstream ss;
ss << "{" << Streamable(spec, {*bound.arg()}) << ":"
<< FormatConversionSpecImplFriend::FlagsToString(bound);
if (bound.width() >= 0) ss << bound.width();
if (bound.precision() >= 0) ss << "." << bound.precision();
ss << bound.conversion_char() << "}";
Append(ss.str());
return true;
}
private:
FormatSinkImpl* sink_;
};
} // namespace
bool BindWithPack(const UnboundConversion* props,
absl::Span<const FormatArgImpl> pack,
BoundConversion* bound) {
return ArgContext(pack).Bind(props, bound);
}
std::string Summarize(const UntypedFormatSpecImpl format,
absl::Span<const FormatArgImpl> args) {
typedef SummarizingConverter Converter;
std::string out;
{
// inner block to destroy sink before returning out. It ensures a last
// flush.
FormatSinkImpl sink(&out);
if (!ConvertAll(format, args, Converter(&sink))) {
return "";
}
}
return out;
}
bool FormatUntyped(FormatRawSinkImpl raw_sink,
const UntypedFormatSpecImpl format,
absl::Span<const FormatArgImpl> args) {
FormatSinkImpl sink(raw_sink);
using Converter = DefaultConverter;
return ConvertAll(format, args, Converter(&sink));
}
std::ostream& Streamable::Print(std::ostream& os) const {
if (!FormatUntyped(&os, format_, args_)) os.setstate(std::ios::failbit);
return os;
}
std::string& AppendPack(std::string* out, const UntypedFormatSpecImpl format,
absl::Span<const FormatArgImpl> args) {
size_t orig = out->size();
if (ABSL_PREDICT_FALSE(!FormatUntyped(out, format, args))) {
out->erase(orig);
}
return *out;
}
std::string FormatPack(const UntypedFormatSpecImpl format,
absl::Span<const FormatArgImpl> args) {
std::string out;
if (ABSL_PREDICT_FALSE(!FormatUntyped(&out, format, args))) {
out.clear();
}
return out;
}
int FprintF(std::FILE* output, const UntypedFormatSpecImpl format,
absl::Span<const FormatArgImpl> args) {
FILERawSink sink(output);
if (!FormatUntyped(&sink, format, args)) {
errno = EINVAL;
return -1;
}
if (sink.error()) {
errno = sink.error();
return -1;
}
if (sink.count() > static_cast<size_t>(std::numeric_limits<int>::max())) {
errno = EFBIG;
return -1;
}
return static_cast<int>(sink.count());
}
int SnprintF(char* output, size_t size, const UntypedFormatSpecImpl format,
absl::Span<const FormatArgImpl> args) {
BufferRawSink sink(output, size ? size - 1 : 0);
if (!FormatUntyped(&sink, format, args)) {
errno = EINVAL;
return -1;
}
size_t total = sink.total_written();
if (size) output[std::min(total, size - 1)] = 0;
return static_cast<int>(total);
}
} // namespace str_format_internal
ABSL_NAMESPACE_END
} // namespace absl

View File

@@ -0,0 +1,217 @@
// Copyright 2020 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_BIND_H_
#define ABSL_STRINGS_INTERNAL_STR_FORMAT_BIND_H_
#include <array>
#include <cstdio>
#include <sstream>
#include <string>
#include "absl/base/port.h"
#include "absl/strings/internal/str_format/arg.h"
#include "absl/strings/internal/str_format/checker.h"
#include "absl/strings/internal/str_format/parser.h"
#include "absl/types/span.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
class UntypedFormatSpec;
namespace str_format_internal {
class BoundConversion : public FormatConversionSpecImpl {
public:
const FormatArgImpl* arg() const { return arg_; }
void set_arg(const FormatArgImpl* a) { arg_ = a; }
private:
const FormatArgImpl* arg_;
};
// This is the type-erased class that the implementation uses.
class UntypedFormatSpecImpl {
public:
UntypedFormatSpecImpl() = delete;
explicit UntypedFormatSpecImpl(string_view s)
: data_(s.data()), size_(s.size()) {}
explicit UntypedFormatSpecImpl(
const str_format_internal::ParsedFormatBase* pc)
: data_(pc), size_(~size_t{}) {}
bool has_parsed_conversion() const { return size_ == ~size_t{}; }
string_view str() const {
assert(!has_parsed_conversion());
return string_view(static_cast<const char*>(data_), size_);
}
const str_format_internal::ParsedFormatBase* parsed_conversion() const {
assert(has_parsed_conversion());
return static_cast<const str_format_internal::ParsedFormatBase*>(data_);
}
template <typename T>
static const UntypedFormatSpecImpl& Extract(const T& s) {
return s.spec_;
}
private:
const void* data_;
size_t size_;
};
template <typename T, FormatConversionCharSet...>
struct MakeDependent {
using type = T;
};
// Implicitly convertible from `const char*`, `string_view`, and the
// `ExtendedParsedFormat` type. This abstraction allows all format functions to
// operate on any without providing too many overloads.
template <FormatConversionCharSet... Args>
class FormatSpecTemplate
: public MakeDependent<UntypedFormatSpec, Args...>::type {
using Base = typename MakeDependent<UntypedFormatSpec, Args...>::type;
public:
#ifdef ABSL_INTERNAL_ENABLE_FORMAT_CHECKER
// Honeypot overload for when the string is not constexpr.
// We use the 'unavailable' attribute to give a better compiler error than
// just 'method is deleted'.
FormatSpecTemplate(...) // NOLINT
__attribute__((unavailable("Format string is not constexpr.")));
// Honeypot overload for when the format is constexpr and invalid.
// We use the 'unavailable' attribute to give a better compiler error than
// just 'method is deleted'.
// To avoid checking the format twice, we just check that the format is
// constexpr. If is it valid, then the overload below will kick in.
// We add the template here to make this overload have lower priority.
template <typename = void>
FormatSpecTemplate(const char* s) // NOLINT
__attribute__((
enable_if(str_format_internal::EnsureConstexpr(s), "constexpr trap"),
unavailable(
"Format specified does not match the arguments passed.")));
template <typename T = void>
FormatSpecTemplate(string_view s) // NOLINT
__attribute__((enable_if(str_format_internal::EnsureConstexpr(s),
"constexpr trap"))) {
static_assert(sizeof(T*) == 0,
"Format specified does not match the arguments passed.");
}
// Good format overload.
FormatSpecTemplate(const char* s) // NOLINT
__attribute__((enable_if(ValidFormatImpl<Args...>(s), "bad format trap")))
: Base(s) {}
FormatSpecTemplate(string_view s) // NOLINT
__attribute__((enable_if(ValidFormatImpl<Args...>(s), "bad format trap")))
: Base(s) {}
#else // ABSL_INTERNAL_ENABLE_FORMAT_CHECKER
FormatSpecTemplate(const char* s) : Base(s) {} // NOLINT
FormatSpecTemplate(string_view s) : Base(s) {} // NOLINT
#endif // ABSL_INTERNAL_ENABLE_FORMAT_CHECKER
template <
FormatConversionCharSet... C,
typename = typename std::enable_if<sizeof...(C) == sizeof...(Args)>::type,
typename = typename std::enable_if<AllOf(Contains(Args,
C)...)>::type>
FormatSpecTemplate(const ExtendedParsedFormat<C...>& pc) // NOLINT
: Base(&pc) {}
};
class Streamable {
public:
Streamable(const UntypedFormatSpecImpl& format,
absl::Span<const FormatArgImpl> args)
: format_(format) {
if (args.size() <= ABSL_ARRAYSIZE(few_args_)) {
for (size_t i = 0; i < args.size(); ++i) {
few_args_[i] = args[i];
}
args_ = absl::MakeSpan(few_args_, args.size());
} else {
many_args_.assign(args.begin(), args.end());
args_ = many_args_;
}
}
std::ostream& Print(std::ostream& os) const;
friend std::ostream& operator<<(std::ostream& os, const Streamable& l) {
return l.Print(os);
}
private:
const UntypedFormatSpecImpl& format_;
absl::Span<const FormatArgImpl> args_;
// if args_.size() is 4 or less:
FormatArgImpl few_args_[4] = {FormatArgImpl(0), FormatArgImpl(0),
FormatArgImpl(0), FormatArgImpl(0)};
// if args_.size() is more than 4:
std::vector<FormatArgImpl> many_args_;
};
// for testing
std::string Summarize(UntypedFormatSpecImpl format,
absl::Span<const FormatArgImpl> args);
bool BindWithPack(const UnboundConversion* props,
absl::Span<const FormatArgImpl> pack, BoundConversion* bound);
bool FormatUntyped(FormatRawSinkImpl raw_sink,
UntypedFormatSpecImpl format,
absl::Span<const FormatArgImpl> args);
std::string& AppendPack(std::string* out, UntypedFormatSpecImpl format,
absl::Span<const FormatArgImpl> args);
std::string FormatPack(const UntypedFormatSpecImpl format,
absl::Span<const FormatArgImpl> args);
int FprintF(std::FILE* output, UntypedFormatSpecImpl format,
absl::Span<const FormatArgImpl> args);
int SnprintF(char* output, size_t size, UntypedFormatSpecImpl format,
absl::Span<const FormatArgImpl> args);
// Returned by Streamed(v). Converts via '%s' to the std::string created
// by std::ostream << v.
template <typename T>
class StreamedWrapper {
public:
explicit StreamedWrapper(const T& v) : v_(v) { }
private:
template <typename S>
friend ArgConvertResult<FormatConversionCharSetInternal::s> FormatConvertImpl(
const StreamedWrapper<S>& v, FormatConversionSpecImpl conv,
FormatSinkImpl* out);
const T& v_;
};
} // namespace str_format_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_STR_FORMAT_BIND_H_

View File

@@ -0,0 +1,157 @@
// Copyright 2020 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/str_format/bind.h"
#include <string.h>
#include <limits>
#include "gtest/gtest.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace str_format_internal {
namespace {
class FormatBindTest : public ::testing::Test {
public:
bool Extract(const char *s, UnboundConversion *props, int *next) const {
return ConsumeUnboundConversion(s, s + strlen(s), props, next) ==
s + strlen(s);
}
};
TEST_F(FormatBindTest, BindSingle) {
struct Expectation {
int line;
const char *fmt;
int ok_phases;
const FormatArgImpl *arg;
int width;
int precision;
int next_arg;
};
const int no = -1;
const int ia[] = { 10, 20, 30, 40};
const FormatArgImpl args[] = {FormatArgImpl(ia[0]), FormatArgImpl(ia[1]),
FormatArgImpl(ia[2]), FormatArgImpl(ia[3])};
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wmissing-field-initializers"
const Expectation kExpect[] = {
{__LINE__, "d", 2, &args[0], no, no, 2},
{__LINE__, "4d", 2, &args[0], 4, no, 2},
{__LINE__, ".5d", 2, &args[0], no, 5, 2},
{__LINE__, "4.5d", 2, &args[0], 4, 5, 2},
{__LINE__, "*d", 2, &args[1], 10, no, 3},
{__LINE__, ".*d", 2, &args[1], no, 10, 3},
{__LINE__, "*.*d", 2, &args[2], 10, 20, 4},
{__LINE__, "1$d", 2, &args[0], no, no, 0},
{__LINE__, "2$d", 2, &args[1], no, no, 0},
{__LINE__, "3$d", 2, &args[2], no, no, 0},
{__LINE__, "4$d", 2, &args[3], no, no, 0},
{__LINE__, "2$*1$d", 2, &args[1], 10, no, 0},
{__LINE__, "2$*2$d", 2, &args[1], 20, no, 0},
{__LINE__, "2$*3$d", 2, &args[1], 30, no, 0},
{__LINE__, "2$.*1$d", 2, &args[1], no, 10, 0},
{__LINE__, "2$.*2$d", 2, &args[1], no, 20, 0},
{__LINE__, "2$.*3$d", 2, &args[1], no, 30, 0},
{__LINE__, "2$*3$.*1$d", 2, &args[1], 30, 10, 0},
{__LINE__, "2$*2$.*2$d", 2, &args[1], 20, 20, 0},
{__LINE__, "2$*1$.*3$d", 2, &args[1], 10, 30, 0},
{__LINE__, "2$*3$.*1$d", 2, &args[1], 30, 10, 0},
{__LINE__, "1$*d", 0}, // indexed, then positional
{__LINE__, "*2$d", 0}, // positional, then indexed
{__LINE__, "6$d", 1}, // arg position out of bounds
{__LINE__, "1$6$d", 0}, // width position incorrectly specified
{__LINE__, "1$.6$d", 0}, // precision position incorrectly specified
{__LINE__, "1$*6$d", 1}, // width position out of bounds
{__LINE__, "1$.*6$d", 1}, // precision position out of bounds
};
#pragma GCC diagnostic pop
for (const Expectation &e : kExpect) {
SCOPED_TRACE(e.line);
SCOPED_TRACE(e.fmt);
UnboundConversion props;
BoundConversion bound;
int ok_phases = 0;
int next = 0;
if (Extract(e.fmt, &props, &next)) {
++ok_phases;
if (BindWithPack(&props, args, &bound)) {
++ok_phases;
}
}
EXPECT_EQ(e.ok_phases, ok_phases);
if (e.ok_phases < 2) continue;
if (e.arg != nullptr) {
EXPECT_EQ(e.arg, bound.arg());
}
EXPECT_EQ(e.width, bound.width());
EXPECT_EQ(e.precision, bound.precision());
}
}
TEST_F(FormatBindTest, WidthUnderflowRegression) {
UnboundConversion props;
BoundConversion bound;
int next = 0;
const int args_i[] = {std::numeric_limits<int>::min(), 17};
const FormatArgImpl args[] = {FormatArgImpl(args_i[0]),
FormatArgImpl(args_i[1])};
ASSERT_TRUE(Extract("*d", &props, &next));
ASSERT_TRUE(BindWithPack(&props, args, &bound));
EXPECT_EQ(bound.width(), std::numeric_limits<int>::max());
EXPECT_EQ(bound.arg(), args + 1);
}
TEST_F(FormatBindTest, FormatPack) {
struct Expectation {
int line;
const char *fmt;
const char *summary;
};
const int ia[] = { 10, 20, 30, 40, -10 };
const FormatArgImpl args[] = {FormatArgImpl(ia[0]), FormatArgImpl(ia[1]),
FormatArgImpl(ia[2]), FormatArgImpl(ia[3]),
FormatArgImpl(ia[4])};
const Expectation kExpect[] = {
{__LINE__, "a%4db%dc", "a{10:4d}b{20:d}c"},
{__LINE__, "a%.4db%dc", "a{10:.4d}b{20:d}c"},
{__LINE__, "a%4.5db%dc", "a{10:4.5d}b{20:d}c"},
{__LINE__, "a%db%4.5dc", "a{10:d}b{20:4.5d}c"},
{__LINE__, "a%db%*.*dc", "a{10:d}b{40:20.30d}c"},
{__LINE__, "a%.*fb", "a{20:.10f}b"},
{__LINE__, "a%1$db%2$*3$.*4$dc", "a{10:d}b{20:30.40d}c"},
{__LINE__, "a%4$db%3$*2$.*1$dc", "a{40:d}b{30:20.10d}c"},
{__LINE__, "a%04ldb", "a{10:04d}b"},
{__LINE__, "a%-#04lldb", "a{10:-#04d}b"},
{__LINE__, "a%1$*5$db", "a{10:-10d}b"},
{__LINE__, "a%1$.*5$db", "a{10:d}b"},
};
for (const Expectation &e : kExpect) {
absl::string_view fmt = e.fmt;
SCOPED_TRACE(e.line);
SCOPED_TRACE(e.fmt);
UntypedFormatSpecImpl format(fmt);
EXPECT_EQ(e.summary,
str_format_internal::Summarize(format, absl::MakeSpan(args)))
<< "line:" << e.line;
}
}
} // namespace
} // namespace str_format_internal
ABSL_NAMESPACE_END
} // namespace absl

View File

@@ -0,0 +1,333 @@
// Copyright 2020 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_CHECKER_H_
#define ABSL_STRINGS_INTERNAL_STR_FORMAT_CHECKER_H_
#include "absl/base/attributes.h"
#include "absl/strings/internal/str_format/arg.h"
#include "absl/strings/internal/str_format/extension.h"
// Compile time check support for entry points.
#ifndef ABSL_INTERNAL_ENABLE_FORMAT_CHECKER
#if ABSL_HAVE_ATTRIBUTE(enable_if) && !defined(__native_client__)
#define ABSL_INTERNAL_ENABLE_FORMAT_CHECKER 1
#endif // ABSL_HAVE_ATTRIBUTE(enable_if) && !defined(__native_client__)
#endif // ABSL_INTERNAL_ENABLE_FORMAT_CHECKER
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace str_format_internal {
constexpr bool AllOf() { return true; }
template <typename... T>
constexpr bool AllOf(bool b, T... t) {
return b && AllOf(t...);
}
#ifdef ABSL_INTERNAL_ENABLE_FORMAT_CHECKER
constexpr bool ContainsChar(const char* chars, char c) {
return *chars == c || (*chars && ContainsChar(chars + 1, c));
}
// A constexpr compatible list of Convs.
struct ConvList {
const FormatConversionCharSet* array;
int count;
// We do the bound check here to avoid having to do it on the callers.
// Returning an empty FormatConversionCharSet has the same effect as
// short circuiting because it will never match any conversion.
constexpr FormatConversionCharSet operator[](int i) const {
return i < count ? array[i] : FormatConversionCharSet{};
}
constexpr ConvList without_front() const {
return count != 0 ? ConvList{array + 1, count - 1} : *this;
}
};
template <size_t count>
struct ConvListT {
// Make sure the array has size > 0.
FormatConversionCharSet list[count ? count : 1];
};
constexpr char GetChar(string_view str, size_t index) {
return index < str.size() ? str[index] : char{};
}
constexpr string_view ConsumeFront(string_view str, size_t len = 1) {
return len <= str.size() ? string_view(str.data() + len, str.size() - len)
: string_view();
}
constexpr string_view ConsumeAnyOf(string_view format, const char* chars) {
return ContainsChar(chars, GetChar(format, 0))
? ConsumeAnyOf(ConsumeFront(format), chars)
: format;
}
constexpr bool IsDigit(char c) { return c >= '0' && c <= '9'; }
// Helper class for the ParseDigits function.
// It encapsulates the two return values we need there.
struct Integer {
string_view format;
int value;
// If the next character is a '$', consume it.
// Otherwise, make `this` an invalid positional argument.
constexpr Integer ConsumePositionalDollar() const {
return GetChar(format, 0) == '$' ? Integer{ConsumeFront(format), value}
: Integer{format, 0};
}
};
constexpr Integer ParseDigits(string_view format, int value = 0) {
return IsDigit(GetChar(format, 0))
? ParseDigits(ConsumeFront(format),
10 * value + GetChar(format, 0) - '0')
: Integer{format, value};
}
// Parse digits for a positional argument.
// The parsing also consumes the '$'.
constexpr Integer ParsePositional(string_view format) {
return ParseDigits(format).ConsumePositionalDollar();
}
// Parses a single conversion specifier.
// See ConvParser::Run() for post conditions.
class ConvParser {
constexpr ConvParser SetFormat(string_view format) const {
return ConvParser(format, args_, error_, arg_position_, is_positional_);
}
constexpr ConvParser SetArgs(ConvList args) const {
return ConvParser(format_, args, error_, arg_position_, is_positional_);
}
constexpr ConvParser SetError(bool error) const {
return ConvParser(format_, args_, error_ || error, arg_position_,
is_positional_);
}
constexpr ConvParser SetArgPosition(int arg_position) const {
return ConvParser(format_, args_, error_, arg_position, is_positional_);
}
// Consumes the next arg and verifies that it matches `conv`.
// `error_` is set if there is no next arg or if it doesn't match `conv`.
constexpr ConvParser ConsumeNextArg(char conv) const {
return SetArgs(args_.without_front()).SetError(!Contains(args_[0], conv));
}
// Verify that positional argument `i.value` matches `conv`.
// `error_` is set if `i.value` is not a valid argument or if it doesn't
// match.
constexpr ConvParser VerifyPositional(Integer i, char conv) const {
return SetFormat(i.format).SetError(!Contains(args_[i.value - 1], conv));
}
// Parse the position of the arg and store it in `arg_position_`.
constexpr ConvParser ParseArgPosition(Integer arg) const {
return SetFormat(arg.format).SetArgPosition(arg.value);
}
// Consume the flags.
constexpr ConvParser ParseFlags() const {
return SetFormat(ConsumeAnyOf(format_, "-+ #0"));
}
// Consume the width.
// If it is '*', we verify that it matches `args_`. `error_` is set if it
// doesn't match.
constexpr ConvParser ParseWidth() const {
return IsDigit(GetChar(format_, 0))
? SetFormat(ParseDigits(format_).format)
: GetChar(format_, 0) == '*'
? is_positional_
? VerifyPositional(
ParsePositional(ConsumeFront(format_)), '*')
: SetFormat(ConsumeFront(format_))
.ConsumeNextArg('*')
: *this;
}
// Consume the precision.
// If it is '*', we verify that it matches `args_`. `error_` is set if it
// doesn't match.
constexpr ConvParser ParsePrecision() const {
return GetChar(format_, 0) != '.'
? *this
: GetChar(format_, 1) == '*'
? is_positional_
? VerifyPositional(
ParsePositional(ConsumeFront(format_, 2)), '*')
: SetFormat(ConsumeFront(format_, 2))
.ConsumeNextArg('*')
: SetFormat(ParseDigits(ConsumeFront(format_)).format);
}
// Consume the length characters.
constexpr ConvParser ParseLength() const {
return SetFormat(ConsumeAnyOf(format_, "lLhjztq"));
}
// Consume the conversion character and verify that it matches `args_`.
// `error_` is set if it doesn't match.
constexpr ConvParser ParseConversion() const {
return is_positional_
? VerifyPositional({ConsumeFront(format_), arg_position_},
GetChar(format_, 0))
: ConsumeNextArg(GetChar(format_, 0))
.SetFormat(ConsumeFront(format_));
}
constexpr ConvParser(string_view format, ConvList args, bool error,
int arg_position, bool is_positional)
: format_(format),
args_(args),
error_(error),
arg_position_(arg_position),
is_positional_(is_positional) {}
public:
constexpr ConvParser(string_view format, ConvList args, bool is_positional)
: format_(format),
args_(args),
error_(false),
arg_position_(0),
is_positional_(is_positional) {}
// Consume the whole conversion specifier.
// `format()` will be set to the character after the conversion character.
// `error()` will be set if any of the arguments do not match.
constexpr ConvParser Run() const {
return (is_positional_ ? ParseArgPosition(ParsePositional(format_)) : *this)
.ParseFlags()
.ParseWidth()
.ParsePrecision()
.ParseLength()
.ParseConversion();
}
constexpr string_view format() const { return format_; }
constexpr ConvList args() const { return args_; }
constexpr bool error() const { return error_; }
constexpr bool is_positional() const { return is_positional_; }
private:
string_view format_;
// Current list of arguments. If we are not in positional mode we will consume
// from the front.
ConvList args_;
bool error_;
// Holds the argument position of the conversion character, if we are in
// positional mode. Otherwise, it is unspecified.
int arg_position_;
// Whether we are in positional mode.
// It changes the behavior of '*' and where to find the converted argument.
bool is_positional_;
};
// Parses a whole format expression.
// See FormatParser::Run().
class FormatParser {
static constexpr bool FoundPercent(string_view format) {
return format.empty() ||
(GetChar(format, 0) == '%' && GetChar(format, 1) != '%');
}
// We use an inner function to increase the recursion limit.
// The inner function consumes up to `limit` characters on every run.
// This increases the limit from 512 to ~512*limit.
static constexpr string_view ConsumeNonPercentInner(string_view format,
int limit = 20) {
return FoundPercent(format) || !limit
? format
: ConsumeNonPercentInner(
ConsumeFront(format, GetChar(format, 0) == '%' &&
GetChar(format, 1) == '%'
? 2
: 1),
limit - 1);
}
// Consume characters until the next conversion spec %.
// It skips %%.
static constexpr string_view ConsumeNonPercent(string_view format) {
return FoundPercent(format)
? format
: ConsumeNonPercent(ConsumeNonPercentInner(format));
}
static constexpr bool IsPositional(string_view format) {
return IsDigit(GetChar(format, 0)) ? IsPositional(ConsumeFront(format))
: GetChar(format, 0) == '$';
}
constexpr bool RunImpl(bool is_positional) const {
// In non-positional mode we require all arguments to be consumed.
// In positional mode just reaching the end of the format without errors is
// enough.
return (format_.empty() && (is_positional || args_.count == 0)) ||
(!format_.empty() &&
ValidateArg(
ConvParser(ConsumeFront(format_), args_, is_positional).Run()));
}
constexpr bool ValidateArg(ConvParser conv) const {
return !conv.error() && FormatParser(conv.format(), conv.args())
.RunImpl(conv.is_positional());
}
public:
constexpr FormatParser(string_view format, ConvList args)
: format_(ConsumeNonPercent(format)), args_(args) {}
// Runs the parser for `format` and `args`.
// It verifies that the format is valid and that all conversion specifiers
// match the arguments passed.
// In non-positional mode it also verfies that all arguments are consumed.
constexpr bool Run() const {
return RunImpl(!format_.empty() && IsPositional(ConsumeFront(format_)));
}
private:
string_view format_;
// Current list of arguments.
// If we are not in positional mode we will consume from the front and will
// have to be empty in the end.
ConvList args_;
};
template <FormatConversionCharSet... C>
constexpr bool ValidFormatImpl(string_view format) {
return FormatParser(format,
{ConvListT<sizeof...(C)>{{C...}}.list, sizeof...(C)})
.Run();
}
#endif // ABSL_INTERNAL_ENABLE_FORMAT_CHECKER
} // namespace str_format_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_STR_FORMAT_CHECKER_H_

View File

@@ -0,0 +1,170 @@
// Copyright 2020 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <string>
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "absl/strings/str_format.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace str_format_internal {
namespace {
std::string ConvToString(FormatConversionCharSet conv) {
std::string out;
#define CONV_SET_CASE(c) \
if (Contains(conv, FormatConversionCharSetInternal::c)) { \
out += #c; \
}
ABSL_INTERNAL_CONVERSION_CHARS_EXPAND_(CONV_SET_CASE, )
#undef CONV_SET_CASE
if (Contains(conv, FormatConversionCharSetInternal::kStar)) {
out += "*";
}
return out;
}
TEST(StrFormatChecker, ArgumentToConv) {
FormatConversionCharSet conv = ArgumentToConv<std::string>();
EXPECT_EQ(ConvToString(conv), "s");
conv = ArgumentToConv<const char*>();
EXPECT_EQ(ConvToString(conv), "sp");
conv = ArgumentToConv<double>();
EXPECT_EQ(ConvToString(conv), "fFeEgGaA");
conv = ArgumentToConv<int>();
EXPECT_EQ(ConvToString(conv), "cdiouxXfFeEgGaA*");
conv = ArgumentToConv<std::string*>();
EXPECT_EQ(ConvToString(conv), "p");
}
#ifdef ABSL_INTERNAL_ENABLE_FORMAT_CHECKER
struct Case {
bool result;
const char* format;
};
template <typename... Args>
constexpr Case ValidFormat(const char* format) {
return {ValidFormatImpl<ArgumentToConv<Args>()...>(format), format};
}
TEST(StrFormatChecker, ValidFormat) {
// We want to make sure these expressions are constexpr and they have the
// expected value.
// If they are not constexpr the attribute will just ignore them and not give
// a compile time error.
enum e {};
enum class e2 {};
constexpr Case trues[] = {
ValidFormat<>("abc"), //
ValidFormat<e>("%d"), //
ValidFormat<e2>("%d"), //
ValidFormat<int>("%% %d"), //
ValidFormat<int>("%ld"), //
ValidFormat<int>("%lld"), //
ValidFormat<std::string>("%s"), //
ValidFormat<std::string>("%10s"), //
ValidFormat<int>("%.10x"), //
ValidFormat<int, int>("%*.3x"), //
ValidFormat<int>("%1.d"), //
ValidFormat<int>("%.d"), //
ValidFormat<int, double>("%d %g"), //
ValidFormat<int, std::string>("%*s"), //
ValidFormat<int, double>("%.*f"), //
ValidFormat<void (*)(), volatile int*>("%p %p"), //
ValidFormat<string_view, const char*, double, void*>(
"string_view=%s const char*=%s double=%f void*=%p)"),
ValidFormat<int>("%% %1$d"), //
ValidFormat<int>("%1$ld"), //
ValidFormat<int>("%1$lld"), //
ValidFormat<std::string>("%1$s"), //
ValidFormat<std::string>("%1$10s"), //
ValidFormat<int>("%1$.10x"), //
ValidFormat<int>("%1$*1$.*1$d"), //
ValidFormat<int, int>("%1$*2$.3x"), //
ValidFormat<int>("%1$1.d"), //
ValidFormat<int>("%1$.d"), //
ValidFormat<double, int>("%2$d %1$g"), //
ValidFormat<int, std::string>("%2$*1$s"), //
ValidFormat<int, double>("%2$.*1$f"), //
ValidFormat<void*, string_view, const char*, double>(
"string_view=%2$s const char*=%3$s double=%4$f void*=%1$p "
"repeat=%3$s)")};
for (Case c : trues) {
EXPECT_TRUE(c.result) << c.format;
}
constexpr Case falses[] = {
ValidFormat<int>(""), //
ValidFormat<e>("%s"), //
ValidFormat<e2>("%s"), //
ValidFormat<>("%s"), //
ValidFormat<>("%r"), //
ValidFormat<int>("%s"), //
ValidFormat<int>("%.1.d"), //
ValidFormat<int>("%*1d"), //
ValidFormat<int>("%1-d"), //
ValidFormat<std::string, int>("%*s"), //
ValidFormat<int>("%*d"), //
ValidFormat<std::string>("%p"), //
ValidFormat<int (*)(int)>("%d"), //
ValidFormat<>("%3$d"), //
ValidFormat<>("%1$r"), //
ValidFormat<int>("%1$s"), //
ValidFormat<int>("%1$.1.d"), //
ValidFormat<int>("%1$*2$1d"), //
ValidFormat<int>("%1$1-d"), //
ValidFormat<std::string, int>("%2$*1$s"), //
ValidFormat<std::string>("%1$p"),
ValidFormat<int, int>("%d %2$d"), //
};
for (Case c : falses) {
EXPECT_FALSE(c.result) << c.format;
}
}
TEST(StrFormatChecker, LongFormat) {
#define CHARS_X_40 "1234567890123456789012345678901234567890"
#define CHARS_X_400 \
CHARS_X_40 CHARS_X_40 CHARS_X_40 CHARS_X_40 CHARS_X_40 CHARS_X_40 CHARS_X_40 \
CHARS_X_40 CHARS_X_40 CHARS_X_40
#define CHARS_X_4000 \
CHARS_X_400 CHARS_X_400 CHARS_X_400 CHARS_X_400 CHARS_X_400 CHARS_X_400 \
CHARS_X_400 CHARS_X_400 CHARS_X_400 CHARS_X_400
constexpr char long_format[] =
CHARS_X_4000 "%d" CHARS_X_4000 "%s" CHARS_X_4000;
constexpr bool is_valid = ValidFormat<int, std::string>(long_format).result;
EXPECT_TRUE(is_valid);
}
#endif // ABSL_INTERNAL_ENABLE_FORMAT_CHECKER
} // namespace
} // namespace str_format_internal
ABSL_NAMESPACE_END
} // namespace absl

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,75 @@
//
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/str_format/extension.h"
#include <errno.h>
#include <algorithm>
#include <string>
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace str_format_internal {
std::string FlagsToString(Flags v) {
std::string s;
s.append(FlagsContains(v, Flags::kLeft) ? "-" : "");
s.append(FlagsContains(v, Flags::kShowPos) ? "+" : "");
s.append(FlagsContains(v, Flags::kSignCol) ? " " : "");
s.append(FlagsContains(v, Flags::kAlt) ? "#" : "");
s.append(FlagsContains(v, Flags::kZero) ? "0" : "");
return s;
}
#define ABSL_INTERNAL_X_VAL(id) \
constexpr absl::FormatConversionChar FormatConversionCharInternal::id;
ABSL_INTERNAL_CONVERSION_CHARS_EXPAND_(ABSL_INTERNAL_X_VAL, )
#undef ABSL_INTERNAL_X_VAL
// NOLINTNEXTLINE(readability-redundant-declaration)
constexpr absl::FormatConversionChar FormatConversionCharInternal::kNone;
#define ABSL_INTERNAL_CHAR_SET_CASE(c) \
constexpr FormatConversionCharSet FormatConversionCharSetInternal::c;
ABSL_INTERNAL_CONVERSION_CHARS_EXPAND_(ABSL_INTERNAL_CHAR_SET_CASE, )
#undef ABSL_INTERNAL_CHAR_SET_CASE
// NOLINTNEXTLINE(readability-redundant-declaration)
constexpr FormatConversionCharSet FormatConversionCharSetInternal::kStar;
// NOLINTNEXTLINE(readability-redundant-declaration)
constexpr FormatConversionCharSet FormatConversionCharSetInternal::kIntegral;
// NOLINTNEXTLINE(readability-redundant-declaration)
constexpr FormatConversionCharSet FormatConversionCharSetInternal::kFloating;
// NOLINTNEXTLINE(readability-redundant-declaration)
constexpr FormatConversionCharSet FormatConversionCharSetInternal::kNumeric;
// NOLINTNEXTLINE(readability-redundant-declaration)
constexpr FormatConversionCharSet FormatConversionCharSetInternal::kPointer;
bool FormatSinkImpl::PutPaddedString(string_view value, int width,
int precision, bool left) {
size_t space_remaining = 0;
if (width >= 0) space_remaining = width;
size_t n = value.size();
if (precision >= 0) n = std::min(n, static_cast<size_t>(precision));
string_view shown(value.data(), n);
space_remaining = Excess(shown.size(), space_remaining);
if (!left) Append(space_remaining, ' ');
Append(shown);
if (left) Append(space_remaining, ' ');
return true;
}
} // namespace str_format_internal
ABSL_NAMESPACE_END
} // namespace absl

View File

@@ -0,0 +1,445 @@
//
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_EXTENSION_H_
#define ABSL_STRINGS_INTERNAL_STR_FORMAT_EXTENSION_H_
#include <limits.h>
#include <cstddef>
#include <cstring>
#include <ostream>
#include "absl/base/config.h"
#include "absl/base/port.h"
#include "absl/meta/type_traits.h"
#include "absl/strings/internal/str_format/output.h"
#include "absl/strings/string_view.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
enum class FormatConversionChar : uint8_t;
enum class FormatConversionCharSet : uint64_t;
namespace str_format_internal {
class FormatRawSinkImpl {
public:
// Implicitly convert from any type that provides the hook function as
// described above.
template <typename T, decltype(str_format_internal::InvokeFlush(
std::declval<T*>(), string_view()))* = nullptr>
FormatRawSinkImpl(T* raw) // NOLINT
: sink_(raw), write_(&FormatRawSinkImpl::Flush<T>) {}
void Write(string_view s) { write_(sink_, s); }
template <typename T>
static FormatRawSinkImpl Extract(T s) {
return s.sink_;
}
private:
template <typename T>
static void Flush(void* r, string_view s) {
str_format_internal::InvokeFlush(static_cast<T*>(r), s);
}
void* sink_;
void (*write_)(void*, string_view);
};
// An abstraction to which conversions write their string data.
class FormatSinkImpl {
public:
explicit FormatSinkImpl(FormatRawSinkImpl raw) : raw_(raw) {}
~FormatSinkImpl() { Flush(); }
void Flush() {
raw_.Write(string_view(buf_, pos_ - buf_));
pos_ = buf_;
}
void Append(size_t n, char c) {
if (n == 0) return;
size_ += n;
auto raw_append = [&](size_t count) {
memset(pos_, c, count);
pos_ += count;
};
while (n > Avail()) {
n -= Avail();
if (Avail() > 0) {
raw_append(Avail());
}
Flush();
}
raw_append(n);
}
void Append(string_view v) {
size_t n = v.size();
if (n == 0) return;
size_ += n;
if (n >= Avail()) {
Flush();
raw_.Write(v);
return;
}
memcpy(pos_, v.data(), n);
pos_ += n;
}
size_t size() const { return size_; }
// Put 'v' to 'sink' with specified width, precision, and left flag.
bool PutPaddedString(string_view v, int width, int precision, bool left);
template <typename T>
T Wrap() {
return T(this);
}
template <typename T>
static FormatSinkImpl* Extract(T* s) {
return s->sink_;
}
private:
size_t Avail() const { return buf_ + sizeof(buf_) - pos_; }
FormatRawSinkImpl raw_;
size_t size_ = 0;
char* pos_ = buf_;
char buf_[1024];
};
enum class Flags : uint8_t {
kBasic = 0,
kLeft = 1 << 0,
kShowPos = 1 << 1,
kSignCol = 1 << 2,
kAlt = 1 << 3,
kZero = 1 << 4,
// This is not a real flag. It just exists to turn off kBasic when no other
// flags are set. This is for when width/precision are specified.
kNonBasic = 1 << 5,
};
constexpr Flags operator|(Flags a, Flags b) {
return static_cast<Flags>(static_cast<uint8_t>(a) | static_cast<uint8_t>(b));
}
constexpr bool FlagsContains(Flags haystack, Flags needle) {
return (static_cast<uint8_t>(haystack) & static_cast<uint8_t>(needle)) ==
static_cast<uint8_t>(needle);
}
std::string FlagsToString(Flags v);
inline std::ostream& operator<<(std::ostream& os, Flags v) {
return os << FlagsToString(v);
}
// clang-format off
#define ABSL_INTERNAL_CONVERSION_CHARS_EXPAND_(X_VAL, X_SEP) \
/* text */ \
X_VAL(c) X_SEP X_VAL(s) X_SEP \
/* ints */ \
X_VAL(d) X_SEP X_VAL(i) X_SEP X_VAL(o) X_SEP \
X_VAL(u) X_SEP X_VAL(x) X_SEP X_VAL(X) X_SEP \
/* floats */ \
X_VAL(f) X_SEP X_VAL(F) X_SEP X_VAL(e) X_SEP X_VAL(E) X_SEP \
X_VAL(g) X_SEP X_VAL(G) X_SEP X_VAL(a) X_SEP X_VAL(A) X_SEP \
/* misc */ \
X_VAL(n) X_SEP X_VAL(p)
// clang-format on
// This type should not be referenced, it exists only to provide labels
// internally that match the values declared in FormatConversionChar in
// str_format.h. This is meant to allow internal libraries to use the same
// declared interface type as the public interface
// (absl::StrFormatConversionChar) while keeping the definition in a public
// header.
// Internal libraries should use the form
// `FormatConversionCharInternal::c`, `FormatConversionCharInternal::kNone` for
// comparisons. Use in switch statements is not recommended due to a bug in how
// gcc 4.9 -Wswitch handles declared but undefined enums.
struct FormatConversionCharInternal {
FormatConversionCharInternal() = delete;
private:
// clang-format off
enum class Enum : uint8_t {
c, s, // text
d, i, o, u, x, X, // int
f, F, e, E, g, G, a, A, // float
n, p, // misc
kNone
};
// clang-format on
public:
#define ABSL_INTERNAL_X_VAL(id) \
static constexpr FormatConversionChar id = \
static_cast<FormatConversionChar>(Enum::id);
ABSL_INTERNAL_CONVERSION_CHARS_EXPAND_(ABSL_INTERNAL_X_VAL, )
#undef ABSL_INTERNAL_X_VAL
static constexpr FormatConversionChar kNone =
static_cast<FormatConversionChar>(Enum::kNone);
};
// clang-format on
inline FormatConversionChar FormatConversionCharFromChar(char c) {
switch (c) {
#define ABSL_INTERNAL_X_VAL(id) \
case #id[0]: \
return FormatConversionCharInternal::id;
ABSL_INTERNAL_CONVERSION_CHARS_EXPAND_(ABSL_INTERNAL_X_VAL, )
#undef ABSL_INTERNAL_X_VAL
}
return FormatConversionCharInternal::kNone;
}
inline bool FormatConversionCharIsUpper(FormatConversionChar c) {
if (c == FormatConversionCharInternal::X ||
c == FormatConversionCharInternal::F ||
c == FormatConversionCharInternal::E ||
c == FormatConversionCharInternal::G ||
c == FormatConversionCharInternal::A) {
return true;
} else {
return false;
}
}
inline bool FormatConversionCharIsFloat(FormatConversionChar c) {
if (c == FormatConversionCharInternal::a ||
c == FormatConversionCharInternal::e ||
c == FormatConversionCharInternal::f ||
c == FormatConversionCharInternal::g ||
c == FormatConversionCharInternal::A ||
c == FormatConversionCharInternal::E ||
c == FormatConversionCharInternal::F ||
c == FormatConversionCharInternal::G) {
return true;
} else {
return false;
}
}
inline char FormatConversionCharToChar(FormatConversionChar c) {
if (c == FormatConversionCharInternal::kNone) {
return '\0';
#define ABSL_INTERNAL_X_VAL(e) \
} else if (c == FormatConversionCharInternal::e) { \
return #e[0];
#define ABSL_INTERNAL_X_SEP
ABSL_INTERNAL_CONVERSION_CHARS_EXPAND_(ABSL_INTERNAL_X_VAL,
ABSL_INTERNAL_X_SEP)
} else {
return '\0';
}
#undef ABSL_INTERNAL_X_VAL
#undef ABSL_INTERNAL_X_SEP
}
// The associated char.
inline std::ostream& operator<<(std::ostream& os, FormatConversionChar v) {
char c = FormatConversionCharToChar(v);
if (!c) c = '?';
return os << c;
}
struct FormatConversionSpecImplFriend;
class FormatConversionSpecImpl {
public:
// Width and precison are not specified, no flags are set.
bool is_basic() const { return flags_ == Flags::kBasic; }
bool has_left_flag() const { return FlagsContains(flags_, Flags::kLeft); }
bool has_show_pos_flag() const {
return FlagsContains(flags_, Flags::kShowPos);
}
bool has_sign_col_flag() const {
return FlagsContains(flags_, Flags::kSignCol);
}
bool has_alt_flag() const { return FlagsContains(flags_, Flags::kAlt); }
bool has_zero_flag() const { return FlagsContains(flags_, Flags::kZero); }
FormatConversionChar conversion_char() const {
// Keep this field first in the struct . It generates better code when
// accessing it when ConversionSpec is passed by value in registers.
static_assert(offsetof(FormatConversionSpecImpl, conv_) == 0, "");
return conv_;
}
// Returns the specified width. If width is unspecfied, it returns a negative
// value.
int width() const { return width_; }
// Returns the specified precision. If precision is unspecfied, it returns a
// negative value.
int precision() const { return precision_; }
template <typename T>
T Wrap() {
return T(*this);
}
private:
friend struct str_format_internal::FormatConversionSpecImplFriend;
FormatConversionChar conv_ = FormatConversionCharInternal::kNone;
Flags flags_;
int width_;
int precision_;
};
struct FormatConversionSpecImplFriend final {
static void SetFlags(Flags f, FormatConversionSpecImpl* conv) {
conv->flags_ = f;
}
static void SetConversionChar(FormatConversionChar c,
FormatConversionSpecImpl* conv) {
conv->conv_ = c;
}
static void SetWidth(int w, FormatConversionSpecImpl* conv) {
conv->width_ = w;
}
static void SetPrecision(int p, FormatConversionSpecImpl* conv) {
conv->precision_ = p;
}
static std::string FlagsToString(const FormatConversionSpecImpl& spec) {
return str_format_internal::FlagsToString(spec.flags_);
}
};
// Type safe OR operator.
// We need this for two reasons:
// 1. operator| on enums makes them decay to integers and the result is an
// integer. We need the result to stay as an enum.
// 2. We use "enum class" which would not work even if we accepted the decay.
constexpr FormatConversionCharSet FormatConversionCharSetUnion(
FormatConversionCharSet a) {
return a;
}
template <typename... CharSet>
constexpr FormatConversionCharSet FormatConversionCharSetUnion(
FormatConversionCharSet a, CharSet... rest) {
return static_cast<FormatConversionCharSet>(
static_cast<uint64_t>(a) |
static_cast<uint64_t>(FormatConversionCharSetUnion(rest...)));
}
constexpr uint64_t FormatConversionCharToConvInt(FormatConversionChar c) {
return uint64_t{1} << (1 + static_cast<uint8_t>(c));
}
constexpr uint64_t FormatConversionCharToConvInt(char conv) {
return
#define ABSL_INTERNAL_CHAR_SET_CASE(c) \
conv == #c[0] \
? FormatConversionCharToConvInt(FormatConversionCharInternal::c) \
:
ABSL_INTERNAL_CONVERSION_CHARS_EXPAND_(ABSL_INTERNAL_CHAR_SET_CASE, )
#undef ABSL_INTERNAL_CHAR_SET_CASE
conv == '*'
? 1
: 0;
}
constexpr FormatConversionCharSet FormatConversionCharToConvValue(char conv) {
return static_cast<FormatConversionCharSet>(
FormatConversionCharToConvInt(conv));
}
struct FormatConversionCharSetInternal {
#define ABSL_INTERNAL_CHAR_SET_CASE(c) \
static constexpr FormatConversionCharSet c = \
FormatConversionCharToConvValue(#c[0]);
ABSL_INTERNAL_CONVERSION_CHARS_EXPAND_(ABSL_INTERNAL_CHAR_SET_CASE, )
#undef ABSL_INTERNAL_CHAR_SET_CASE
// Used for width/precision '*' specification.
static constexpr FormatConversionCharSet kStar =
FormatConversionCharToConvValue('*');
static constexpr FormatConversionCharSet kIntegral =
FormatConversionCharSetUnion(d, i, u, o, x, X);
static constexpr FormatConversionCharSet kFloating =
FormatConversionCharSetUnion(a, e, f, g, A, E, F, G);
static constexpr FormatConversionCharSet kNumeric =
FormatConversionCharSetUnion(kIntegral, kFloating);
static constexpr FormatConversionCharSet kPointer = p;
};
// Type safe OR operator.
// We need this for two reasons:
// 1. operator| on enums makes them decay to integers and the result is an
// integer. We need the result to stay as an enum.
// 2. We use "enum class" which would not work even if we accepted the decay.
constexpr FormatConversionCharSet operator|(FormatConversionCharSet a,
FormatConversionCharSet b) {
return FormatConversionCharSetUnion(a, b);
}
// Overloaded conversion functions to support absl::ParsedFormat.
// Get a conversion with a single character in it.
constexpr FormatConversionCharSet ToFormatConversionCharSet(char c) {
return static_cast<FormatConversionCharSet>(
FormatConversionCharToConvValue(c));
}
// Get a conversion with a single character in it.
constexpr FormatConversionCharSet ToFormatConversionCharSet(
FormatConversionCharSet c) {
return c;
}
template <typename T>
void ToFormatConversionCharSet(T) = delete;
// Checks whether `c` exists in `set`.
constexpr bool Contains(FormatConversionCharSet set, char c) {
return (static_cast<uint64_t>(set) &
static_cast<uint64_t>(FormatConversionCharToConvValue(c))) != 0;
}
// Checks whether all the characters in `c` are contained in `set`
constexpr bool Contains(FormatConversionCharSet set,
FormatConversionCharSet c) {
return (static_cast<uint64_t>(set) & static_cast<uint64_t>(c)) ==
static_cast<uint64_t>(c);
}
// Checks whether all the characters in `c` are contained in `set`
constexpr bool Contains(FormatConversionCharSet set, FormatConversionChar c) {
return (static_cast<uint64_t>(set) & FormatConversionCharToConvInt(c)) != 0;
}
// Return capacity - used, clipped to a minimum of 0.
inline size_t Excess(size_t used, size_t capacity) {
return used < capacity ? capacity - used : 0;
}
} // namespace str_format_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_STR_FORMAT_EXTENSION_H_

View File

@@ -0,0 +1,98 @@
//
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "absl/strings/internal/str_format/extension.h"
#include <random>
#include <string>
#include "gtest/gtest.h"
#include "absl/strings/str_format.h"
#include "absl/strings/string_view.h"
namespace my_namespace {
class UserDefinedType {
public:
UserDefinedType() = default;
void Append(absl::string_view str) { value_.append(str.data(), str.size()); }
const std::string& Value() const { return value_; }
friend void AbslFormatFlush(UserDefinedType* x, absl::string_view str) {
x->Append(str);
}
private:
std::string value_;
};
} // namespace my_namespace
namespace {
std::string MakeRandomString(size_t len) {
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_int_distribution<> dis('a', 'z');
std::string s(len, '0');
for (char& c : s) {
c = dis(gen);
}
return s;
}
TEST(FormatExtensionTest, SinkAppendSubstring) {
for (size_t chunk_size : {1, 10, 100, 1000, 10000}) {
std::string expected, actual;
absl::str_format_internal::FormatSinkImpl sink(&actual);
for (size_t chunks = 0; chunks < 10; ++chunks) {
std::string rand = MakeRandomString(chunk_size);
expected += rand;
sink.Append(rand);
}
sink.Flush();
EXPECT_EQ(actual, expected);
}
}
TEST(FormatExtensionTest, SinkAppendChars) {
for (size_t chunk_size : {1, 10, 100, 1000, 10000}) {
std::string expected, actual;
absl::str_format_internal::FormatSinkImpl sink(&actual);
for (size_t chunks = 0; chunks < 10; ++chunks) {
std::string rand = MakeRandomString(1);
expected.append(chunk_size, rand[0]);
sink.Append(chunk_size, rand[0]);
}
sink.Flush();
EXPECT_EQ(actual, expected);
}
}
TEST(FormatExtensionTest, VerifyEnumEquality) {
#define X_VAL(id) \
EXPECT_EQ(absl::FormatConversionChar::id, \
absl::str_format_internal::FormatConversionCharInternal::id);
ABSL_INTERNAL_CONVERSION_CHARS_EXPAND_(X_VAL, );
#undef X_VAL
#define X_VAL(id) \
EXPECT_EQ(absl::FormatConversionCharSet::id, \
absl::str_format_internal::FormatConversionCharSetInternal::id);
ABSL_INTERNAL_CONVERSION_CHARS_EXPAND_(X_VAL, );
#undef X_VAL
}
} // namespace

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,37 @@
// Copyright 2020 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_FLOAT_CONVERSION_H_
#define ABSL_STRINGS_INTERNAL_STR_FORMAT_FLOAT_CONVERSION_H_
#include "absl/strings/internal/str_format/extension.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace str_format_internal {
bool ConvertFloatImpl(float v, const FormatConversionSpecImpl &conv,
FormatSinkImpl *sink);
bool ConvertFloatImpl(double v, const FormatConversionSpecImpl &conv,
FormatSinkImpl *sink);
bool ConvertFloatImpl(long double v, const FormatConversionSpecImpl &conv,
FormatSinkImpl *sink);
} // namespace str_format_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_STR_FORMAT_FLOAT_CONVERSION_H_

View File

@@ -0,0 +1,72 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/str_format/output.h"
#include <errno.h>
#include <cstring>
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace str_format_internal {
namespace {
struct ClearErrnoGuard {
ClearErrnoGuard() : old_value(errno) { errno = 0; }
~ClearErrnoGuard() {
if (!errno) errno = old_value;
}
int old_value;
};
} // namespace
void BufferRawSink::Write(string_view v) {
size_t to_write = std::min(v.size(), size_);
std::memcpy(buffer_, v.data(), to_write);
buffer_ += to_write;
size_ -= to_write;
total_written_ += v.size();
}
void FILERawSink::Write(string_view v) {
while (!v.empty() && !error_) {
// Reset errno to zero in case the libc implementation doesn't set errno
// when a failure occurs.
ClearErrnoGuard guard;
if (size_t result = std::fwrite(v.data(), 1, v.size(), output_)) {
// Some progress was made.
count_ += result;
v.remove_prefix(result);
} else {
if (errno == EINTR) {
continue;
} else if (errno) {
error_ = errno;
} else if (std::ferror(output_)) {
// Non-POSIX compliant libc implementations may not set errno, so we
// have check the streams error indicator.
error_ = EBADF;
} else {
// We're likely on a non-POSIX system that encountered EINTR but had no
// way of reporting it.
continue;
}
}
}
}
} // namespace str_format_internal
ABSL_NAMESPACE_END
} // namespace absl

View File

@@ -0,0 +1,96 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Output extension hooks for the Format library.
// `internal::InvokeFlush` calls the appropriate flush function for the
// specified output argument.
// `BufferRawSink` is a simple output sink for a char buffer. Used by SnprintF.
// `FILERawSink` is a std::FILE* based sink. Used by PrintF and FprintF.
#ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_OUTPUT_H_
#define ABSL_STRINGS_INTERNAL_STR_FORMAT_OUTPUT_H_
#include <cstdio>
#include <ostream>
#include <string>
#include "absl/base/port.h"
#include "absl/strings/string_view.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace str_format_internal {
// RawSink implementation that writes into a char* buffer.
// It will not overflow the buffer, but will keep the total count of chars
// that would have been written.
class BufferRawSink {
public:
BufferRawSink(char* buffer, size_t size) : buffer_(buffer), size_(size) {}
size_t total_written() const { return total_written_; }
void Write(string_view v);
private:
char* buffer_;
size_t size_;
size_t total_written_ = 0;
};
// RawSink implementation that writes into a FILE*.
// It keeps track of the total number of bytes written and any error encountered
// during the writes.
class FILERawSink {
public:
explicit FILERawSink(std::FILE* output) : output_(output) {}
void Write(string_view v);
size_t count() const { return count_; }
int error() const { return error_; }
private:
std::FILE* output_;
int error_ = 0;
size_t count_ = 0;
};
// Provide RawSink integration with common types from the STL.
inline void AbslFormatFlush(std::string* out, string_view s) {
out->append(s.data(), s.size());
}
inline void AbslFormatFlush(std::ostream* out, string_view s) {
out->write(s.data(), s.size());
}
inline void AbslFormatFlush(FILERawSink* sink, string_view v) {
sink->Write(v);
}
inline void AbslFormatFlush(BufferRawSink* sink, string_view v) {
sink->Write(v);
}
// This is a SFINAE to get a better compiler error message when the type
// is not supported.
template <typename T>
auto InvokeFlush(T* out, string_view s) -> decltype(AbslFormatFlush(out, s)) {
AbslFormatFlush(out, s);
}
} // namespace str_format_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_STR_FORMAT_OUTPUT_H_

View File

@@ -0,0 +1,79 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/str_format/output.h"
#include <sstream>
#include <string>
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "absl/strings/cord.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace {
TEST(InvokeFlush, String) {
std::string str = "ABC";
str_format_internal::InvokeFlush(&str, "DEF");
EXPECT_EQ(str, "ABCDEF");
}
TEST(InvokeFlush, Stream) {
std::stringstream str;
str << "ABC";
str_format_internal::InvokeFlush(&str, "DEF");
EXPECT_EQ(str.str(), "ABCDEF");
}
TEST(InvokeFlush, Cord) {
absl::Cord str("ABC");
str_format_internal::InvokeFlush(&str, "DEF");
EXPECT_EQ(str, "ABCDEF");
}
TEST(BufferRawSink, Limits) {
char buf[16];
{
std::fill(std::begin(buf), std::end(buf), 'x');
str_format_internal::BufferRawSink bufsink(buf, sizeof(buf) - 1);
str_format_internal::InvokeFlush(&bufsink, "Hello World237");
EXPECT_EQ(std::string(buf, sizeof(buf)), "Hello World237xx");
}
{
std::fill(std::begin(buf), std::end(buf), 'x');
str_format_internal::BufferRawSink bufsink(buf, sizeof(buf) - 1);
str_format_internal::InvokeFlush(&bufsink, "Hello World237237");
EXPECT_EQ(std::string(buf, sizeof(buf)), "Hello World2372x");
}
{
std::fill(std::begin(buf), std::end(buf), 'x');
str_format_internal::BufferRawSink bufsink(buf, sizeof(buf) - 1);
str_format_internal::InvokeFlush(&bufsink, "Hello World");
str_format_internal::InvokeFlush(&bufsink, "237");
EXPECT_EQ(std::string(buf, sizeof(buf)), "Hello World237xx");
}
{
std::fill(std::begin(buf), std::end(buf), 'x');
str_format_internal::BufferRawSink bufsink(buf, sizeof(buf) - 1);
str_format_internal::InvokeFlush(&bufsink, "Hello World");
str_format_internal::InvokeFlush(&bufsink, "237237");
EXPECT_EQ(std::string(buf, sizeof(buf)), "Hello World2372x");
}
}
} // namespace
ABSL_NAMESPACE_END
} // namespace absl

View File

@@ -0,0 +1,339 @@
// Copyright 2020 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/str_format/parser.h"
#include <assert.h>
#include <string.h>
#include <wchar.h>
#include <cctype>
#include <cstdint>
#include <algorithm>
#include <initializer_list>
#include <limits>
#include <ostream>
#include <string>
#include <unordered_set>
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace str_format_internal {
using CC = FormatConversionCharInternal;
using LM = LengthMod;
// Abbreviations to fit in the table below.
constexpr auto f_sign = Flags::kSignCol;
constexpr auto f_alt = Flags::kAlt;
constexpr auto f_pos = Flags::kShowPos;
constexpr auto f_left = Flags::kLeft;
constexpr auto f_zero = Flags::kZero;
ABSL_CONST_INIT const ConvTag kTags[256] = {
{}, {}, {}, {}, {}, {}, {}, {}, // 00-07
{}, {}, {}, {}, {}, {}, {}, {}, // 08-0f
{}, {}, {}, {}, {}, {}, {}, {}, // 10-17
{}, {}, {}, {}, {}, {}, {}, {}, // 18-1f
f_sign, {}, {}, f_alt, {}, {}, {}, {}, // !"#$%&'
{}, {}, {}, f_pos, {}, f_left, {}, {}, // ()*+,-./
f_zero, {}, {}, {}, {}, {}, {}, {}, // 01234567
{}, {}, {}, {}, {}, {}, {}, {}, // 89:;<=>?
{}, CC::A, {}, {}, {}, CC::E, CC::F, CC::G, // @ABCDEFG
{}, {}, {}, {}, LM::L, {}, {}, {}, // HIJKLMNO
{}, {}, {}, {}, {}, {}, {}, {}, // PQRSTUVW
CC::X, {}, {}, {}, {}, {}, {}, {}, // XYZ[\]^_
{}, CC::a, {}, CC::c, CC::d, CC::e, CC::f, CC::g, // `abcdefg
LM::h, CC::i, LM::j, {}, LM::l, {}, CC::n, CC::o, // hijklmno
CC::p, LM::q, {}, CC::s, LM::t, CC::u, {}, {}, // pqrstuvw
CC::x, {}, LM::z, {}, {}, {}, {}, {}, // xyz{|}!
{}, {}, {}, {}, {}, {}, {}, {}, // 80-87
{}, {}, {}, {}, {}, {}, {}, {}, // 88-8f
{}, {}, {}, {}, {}, {}, {}, {}, // 90-97
{}, {}, {}, {}, {}, {}, {}, {}, // 98-9f
{}, {}, {}, {}, {}, {}, {}, {}, // a0-a7
{}, {}, {}, {}, {}, {}, {}, {}, // a8-af
{}, {}, {}, {}, {}, {}, {}, {}, // b0-b7
{}, {}, {}, {}, {}, {}, {}, {}, // b8-bf
{}, {}, {}, {}, {}, {}, {}, {}, // c0-c7
{}, {}, {}, {}, {}, {}, {}, {}, // c8-cf
{}, {}, {}, {}, {}, {}, {}, {}, // d0-d7
{}, {}, {}, {}, {}, {}, {}, {}, // d8-df
{}, {}, {}, {}, {}, {}, {}, {}, // e0-e7
{}, {}, {}, {}, {}, {}, {}, {}, // e8-ef
{}, {}, {}, {}, {}, {}, {}, {}, // f0-f7
{}, {}, {}, {}, {}, {}, {}, {}, // f8-ff
};
namespace {
bool CheckFastPathSetting(const UnboundConversion& conv) {
bool width_precision_needed =
conv.width.value() >= 0 || conv.precision.value() >= 0;
if (width_precision_needed && conv.flags == Flags::kBasic) {
fprintf(stderr,
"basic=%d left=%d show_pos=%d sign_col=%d alt=%d zero=%d "
"width=%d precision=%d\n",
conv.flags == Flags::kBasic ? 1 : 0,
FlagsContains(conv.flags, Flags::kLeft) ? 1 : 0,
FlagsContains(conv.flags, Flags::kShowPos) ? 1 : 0,
FlagsContains(conv.flags, Flags::kSignCol) ? 1 : 0,
FlagsContains(conv.flags, Flags::kAlt) ? 1 : 0,
FlagsContains(conv.flags, Flags::kZero) ? 1 : 0, conv.width.value(),
conv.precision.value());
return false;
}
return true;
}
template <bool is_positional>
const char *ConsumeConversion(const char *pos, const char *const end,
UnboundConversion *conv, int *next_arg) {
const char* const original_pos = pos;
char c;
// Read the next char into `c` and update `pos`. Returns false if there are
// no more chars to read.
#define ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR() \
do { \
if (ABSL_PREDICT_FALSE(pos == end)) return nullptr; \
c = *pos++; \
} while (0)
const auto parse_digits = [&] {
int digits = c - '0';
// We do not want to overflow `digits` so we consume at most digits10
// digits. If there are more digits the parsing will fail later on when the
// digit doesn't match the expected characters.
int num_digits = std::numeric_limits<int>::digits10;
for (;;) {
if (ABSL_PREDICT_FALSE(pos == end)) break;
c = *pos++;
if (!std::isdigit(c)) break;
--num_digits;
if (ABSL_PREDICT_FALSE(!num_digits)) break;
digits = 10 * digits + c - '0';
}
return digits;
};
if (is_positional) {
ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr;
conv->arg_position = parse_digits();
assert(conv->arg_position > 0);
if (ABSL_PREDICT_FALSE(c != '$')) return nullptr;
}
ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
// We should start with the basic flag on.
assert(conv->flags == Flags::kBasic);
// Any non alpha character makes this conversion not basic.
// This includes flags (-+ #0), width (1-9, *) or precision (.).
// All conversion characters and length modifiers are alpha characters.
if (c < 'A') {
while (c <= '0') {
auto tag = GetTagForChar(c);
if (tag.is_flags()) {
conv->flags = conv->flags | tag.as_flags();
ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
} else {
break;
}
}
if (c <= '9') {
if (c >= '0') {
int maybe_width = parse_digits();
if (!is_positional && c == '$') {
if (ABSL_PREDICT_FALSE(*next_arg != 0)) return nullptr;
// Positional conversion.
*next_arg = -1;
return ConsumeConversion<true>(original_pos, end, conv, next_arg);
}
conv->flags = conv->flags | Flags::kNonBasic;
conv->width.set_value(maybe_width);
} else if (c == '*') {
conv->flags = conv->flags | Flags::kNonBasic;
ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
if (is_positional) {
if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr;
conv->width.set_from_arg(parse_digits());
if (ABSL_PREDICT_FALSE(c != '$')) return nullptr;
ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
} else {
conv->width.set_from_arg(++*next_arg);
}
}
}
if (c == '.') {
conv->flags = conv->flags | Flags::kNonBasic;
ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
if (std::isdigit(c)) {
conv->precision.set_value(parse_digits());
} else if (c == '*') {
ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
if (is_positional) {
if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr;
conv->precision.set_from_arg(parse_digits());
if (c != '$') return nullptr;
ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
} else {
conv->precision.set_from_arg(++*next_arg);
}
} else {
conv->precision.set_value(0);
}
}
}
auto tag = GetTagForChar(c);
if (ABSL_PREDICT_FALSE(!tag.is_conv())) {
if (ABSL_PREDICT_FALSE(!tag.is_length())) return nullptr;
// It is a length modifier.
using str_format_internal::LengthMod;
LengthMod length_mod = tag.as_length();
ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
if (c == 'h' && length_mod == LengthMod::h) {
conv->length_mod = LengthMod::hh;
ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
} else if (c == 'l' && length_mod == LengthMod::l) {
conv->length_mod = LengthMod::ll;
ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
} else {
conv->length_mod = length_mod;
}
tag = GetTagForChar(c);
if (ABSL_PREDICT_FALSE(!tag.is_conv())) return nullptr;
}
assert(CheckFastPathSetting(*conv));
(void)(&CheckFastPathSetting);
conv->conv = tag.as_conv();
if (!is_positional) conv->arg_position = ++*next_arg;
return pos;
}
} // namespace
std::string LengthModToString(LengthMod v) {
switch (v) {
case LengthMod::h:
return "h";
case LengthMod::hh:
return "hh";
case LengthMod::l:
return "l";
case LengthMod::ll:
return "ll";
case LengthMod::L:
return "L";
case LengthMod::j:
return "j";
case LengthMod::z:
return "z";
case LengthMod::t:
return "t";
case LengthMod::q:
return "q";
case LengthMod::none:
return "";
}
return "";
}
const char *ConsumeUnboundConversion(const char *p, const char *end,
UnboundConversion *conv, int *next_arg) {
if (*next_arg < 0) return ConsumeConversion<true>(p, end, conv, next_arg);
return ConsumeConversion<false>(p, end, conv, next_arg);
}
struct ParsedFormatBase::ParsedFormatConsumer {
explicit ParsedFormatConsumer(ParsedFormatBase *parsedformat)
: parsed(parsedformat), data_pos(parsedformat->data_.get()) {}
bool Append(string_view s) {
if (s.empty()) return true;
size_t text_end = AppendText(s);
if (!parsed->items_.empty() && !parsed->items_.back().is_conversion) {
// Let's extend the existing text run.
parsed->items_.back().text_end = text_end;
} else {
// Let's make a new text run.
parsed->items_.push_back({false, text_end, {}});
}
return true;
}
bool ConvertOne(const UnboundConversion &conv, string_view s) {
size_t text_end = AppendText(s);
parsed->items_.push_back({true, text_end, conv});
return true;
}
size_t AppendText(string_view s) {
memcpy(data_pos, s.data(), s.size());
data_pos += s.size();
return static_cast<size_t>(data_pos - parsed->data_.get());
}
ParsedFormatBase *parsed;
char* data_pos;
};
ParsedFormatBase::ParsedFormatBase(
string_view format, bool allow_ignored,
std::initializer_list<FormatConversionCharSet> convs)
: data_(format.empty() ? nullptr : new char[format.size()]) {
has_error_ = !ParseFormatString(format, ParsedFormatConsumer(this)) ||
!MatchesConversions(allow_ignored, convs);
}
bool ParsedFormatBase::MatchesConversions(
bool allow_ignored,
std::initializer_list<FormatConversionCharSet> convs) const {
std::unordered_set<int> used;
auto add_if_valid_conv = [&](int pos, char c) {
if (static_cast<size_t>(pos) > convs.size() ||
!Contains(convs.begin()[pos - 1], c))
return false;
used.insert(pos);
return true;
};
for (const ConversionItem &item : items_) {
if (!item.is_conversion) continue;
auto &conv = item.conv;
if (conv.precision.is_from_arg() &&
!add_if_valid_conv(conv.precision.get_from_arg(), '*'))
return false;
if (conv.width.is_from_arg() &&
!add_if_valid_conv(conv.width.get_from_arg(), '*'))
return false;
if (!add_if_valid_conv(conv.arg_position,
FormatConversionCharToChar(conv.conv)))
return false;
}
return used.size() == convs.size() || allow_ignored;
}
} // namespace str_format_internal
ABSL_NAMESPACE_END
} // namespace absl

View File

@@ -0,0 +1,357 @@
// Copyright 2020 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_PARSER_H_
#define ABSL_STRINGS_INTERNAL_STR_FORMAT_PARSER_H_
#include <limits.h>
#include <stddef.h>
#include <stdlib.h>
#include <cassert>
#include <cstdint>
#include <initializer_list>
#include <iosfwd>
#include <iterator>
#include <memory>
#include <string>
#include <vector>
#include "absl/strings/internal/str_format/checker.h"
#include "absl/strings/internal/str_format/extension.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace str_format_internal {
enum class LengthMod : std::uint8_t { h, hh, l, ll, L, j, z, t, q, none };
std::string LengthModToString(LengthMod v);
// The analyzed properties of a single specified conversion.
struct UnboundConversion {
UnboundConversion() {}
class InputValue {
public:
void set_value(int value) {
assert(value >= 0);
value_ = value;
}
int value() const { return value_; }
// Marks the value as "from arg". aka the '*' format.
// Requires `value >= 1`.
// When set, is_from_arg() return true and get_from_arg() returns the
// original value.
// `value()`'s return value is unspecfied in this state.
void set_from_arg(int value) {
assert(value > 0);
value_ = -value - 1;
}
bool is_from_arg() const { return value_ < -1; }
int get_from_arg() const {
assert(is_from_arg());
return -value_ - 1;
}
private:
int value_ = -1;
};
// No need to initialize. It will always be set in the parser.
int arg_position;
InputValue width;
InputValue precision;
Flags flags = Flags::kBasic;
LengthMod length_mod = LengthMod::none;
FormatConversionChar conv = FormatConversionCharInternal::kNone;
};
// Consume conversion spec prefix (not including '%') of [p, end) if valid.
// Examples of valid specs would be e.g.: "s", "d", "-12.6f".
// If valid, it returns the first character following the conversion spec,
// and the spec part is broken down and returned in 'conv'.
// If invalid, returns nullptr.
const char* ConsumeUnboundConversion(const char* p, const char* end,
UnboundConversion* conv, int* next_arg);
// Helper tag class for the table below.
// It allows fast `char -> ConversionChar/LengthMod/Flags` checking and
// conversions.
class ConvTag {
public:
constexpr ConvTag(FormatConversionChar conversion_char) // NOLINT
: tag_(static_cast<uint8_t>(conversion_char)) {}
constexpr ConvTag(LengthMod length_mod) // NOLINT
: tag_(0x80 | static_cast<uint8_t>(length_mod)) {}
constexpr ConvTag(Flags flags) // NOLINT
: tag_(0xc0 | static_cast<uint8_t>(flags)) {}
constexpr ConvTag() : tag_(0xFF) {}
bool is_conv() const { return (tag_ & 0x80) == 0; }
bool is_length() const { return (tag_ & 0xC0) == 0x80; }
bool is_flags() const { return (tag_ & 0xE0) == 0xC0; }
FormatConversionChar as_conv() const {
assert(is_conv());
assert(!is_length());
assert(!is_flags());
return static_cast<FormatConversionChar>(tag_);
}
LengthMod as_length() const {
assert(!is_conv());
assert(is_length());
assert(!is_flags());
return static_cast<LengthMod>(tag_ & 0x3F);
}
Flags as_flags() const {
assert(!is_conv());
assert(!is_length());
assert(is_flags());
return static_cast<Flags>(tag_ & 0x1F);
}
private:
uint8_t tag_;
};
extern const ConvTag kTags[256];
// Keep a single table for all the conversion chars and length modifiers.
inline ConvTag GetTagForChar(char c) {
return kTags[static_cast<unsigned char>(c)];
}
// Parse the format string provided in 'src' and pass the identified items into
// 'consumer'.
// Text runs will be passed by calling
// Consumer::Append(string_view);
// ConversionItems will be passed by calling
// Consumer::ConvertOne(UnboundConversion, string_view);
// In the case of ConvertOne, the string_view that is passed is the
// portion of the format string corresponding to the conversion, not including
// the leading %. On success, it returns true. On failure, it stops and returns
// false.
template <typename Consumer>
bool ParseFormatString(string_view src, Consumer consumer) {
int next_arg = 0;
const char* p = src.data();
const char* const end = p + src.size();
while (p != end) {
const char* percent = static_cast<const char*>(memchr(p, '%', end - p));
if (!percent) {
// We found the last substring.
return consumer.Append(string_view(p, end - p));
}
// We found a percent, so push the text run then process the percent.
if (ABSL_PREDICT_FALSE(!consumer.Append(string_view(p, percent - p)))) {
return false;
}
if (ABSL_PREDICT_FALSE(percent + 1 >= end)) return false;
auto tag = GetTagForChar(percent[1]);
if (tag.is_conv()) {
if (ABSL_PREDICT_FALSE(next_arg < 0)) {
// This indicates an error in the format string.
// The only way to get `next_arg < 0` here is to have a positional
// argument first which sets next_arg to -1 and then a non-positional
// argument.
return false;
}
p = percent + 2;
// Keep this case separate from the one below.
// ConvertOne is more efficient when the compiler can see that the `basic`
// flag is set.
UnboundConversion conv;
conv.conv = tag.as_conv();
conv.arg_position = ++next_arg;
if (ABSL_PREDICT_FALSE(
!consumer.ConvertOne(conv, string_view(percent + 1, 1)))) {
return false;
}
} else if (percent[1] != '%') {
UnboundConversion conv;
p = ConsumeUnboundConversion(percent + 1, end, &conv, &next_arg);
if (ABSL_PREDICT_FALSE(p == nullptr)) return false;
if (ABSL_PREDICT_FALSE(!consumer.ConvertOne(
conv, string_view(percent + 1, p - (percent + 1))))) {
return false;
}
} else {
if (ABSL_PREDICT_FALSE(!consumer.Append("%"))) return false;
p = percent + 2;
continue;
}
}
return true;
}
// Always returns true, or fails to compile in a constexpr context if s does not
// point to a constexpr char array.
constexpr bool EnsureConstexpr(string_view s) {
return s.empty() || s[0] == s[0];
}
class ParsedFormatBase {
public:
explicit ParsedFormatBase(
string_view format, bool allow_ignored,
std::initializer_list<FormatConversionCharSet> convs);
ParsedFormatBase(const ParsedFormatBase& other) { *this = other; }
ParsedFormatBase(ParsedFormatBase&& other) { *this = std::move(other); }
ParsedFormatBase& operator=(const ParsedFormatBase& other) {
if (this == &other) return *this;
has_error_ = other.has_error_;
items_ = other.items_;
size_t text_size = items_.empty() ? 0 : items_.back().text_end;
data_.reset(new char[text_size]);
memcpy(data_.get(), other.data_.get(), text_size);
return *this;
}
ParsedFormatBase& operator=(ParsedFormatBase&& other) {
if (this == &other) return *this;
has_error_ = other.has_error_;
data_ = std::move(other.data_);
items_ = std::move(other.items_);
// Reset the vector to make sure the invariants hold.
other.items_.clear();
return *this;
}
template <typename Consumer>
bool ProcessFormat(Consumer consumer) const {
const char* const base = data_.get();
string_view text(base, 0);
for (const auto& item : items_) {
const char* const end = text.data() + text.size();
text = string_view(end, (base + item.text_end) - end);
if (item.is_conversion) {
if (!consumer.ConvertOne(item.conv, text)) return false;
} else {
if (!consumer.Append(text)) return false;
}
}
return !has_error_;
}
bool has_error() const { return has_error_; }
private:
// Returns whether the conversions match and if !allow_ignored it verifies
// that all conversions are used by the format.
bool MatchesConversions(
bool allow_ignored,
std::initializer_list<FormatConversionCharSet> convs) const;
struct ParsedFormatConsumer;
struct ConversionItem {
bool is_conversion;
// Points to the past-the-end location of this element in the data_ array.
size_t text_end;
UnboundConversion conv;
};
bool has_error_;
std::unique_ptr<char[]> data_;
std::vector<ConversionItem> items_;
};
// A value type representing a preparsed format. These can be created, copied
// around, and reused to speed up formatting loops.
// The user must specify through the template arguments the conversion
// characters used in the format. This will be checked at compile time.
//
// This class uses Conv enum values to specify each argument.
// This allows for more flexibility as you can specify multiple possible
// conversion characters for each argument.
// ParsedFormat<char...> is a simplified alias for when the user only
// needs to specify a single conversion character for each argument.
//
// Example:
// // Extended format supports multiple characters per argument:
// using MyFormat = ExtendedParsedFormat<Conv::d | Conv::x>;
// MyFormat GetFormat(bool use_hex) {
// if (use_hex) return MyFormat("foo %x bar");
// return MyFormat("foo %d bar");
// }
// // 'format' can be used with any value that supports 'd' and 'x',
// // like `int`.
// auto format = GetFormat(use_hex);
// value = StringF(format, i);
//
// This class also supports runtime format checking with the ::New() and
// ::NewAllowIgnored() factory functions.
// This is the only API that allows the user to pass a runtime specified format
// string. These factory functions will return NULL if the format does not match
// the conversions requested by the user.
template <FormatConversionCharSet... C>
class ExtendedParsedFormat : public str_format_internal::ParsedFormatBase {
public:
explicit ExtendedParsedFormat(string_view format)
#ifdef ABSL_INTERNAL_ENABLE_FORMAT_CHECKER
__attribute__((
enable_if(str_format_internal::EnsureConstexpr(format),
"Format string is not constexpr."),
enable_if(str_format_internal::ValidFormatImpl<C...>(format),
"Format specified does not match the template arguments.")))
#endif // ABSL_INTERNAL_ENABLE_FORMAT_CHECKER
: ExtendedParsedFormat(format, false) {
}
// ExtendedParsedFormat factory function.
// The user still has to specify the conversion characters, but they will not
// be checked at compile time. Instead, it will be checked at runtime.
// This delays the checking to runtime, but allows the user to pass
// dynamically sourced formats.
// It returns NULL if the format does not match the conversion characters.
// The user is responsible for checking the return value before using it.
//
// The 'New' variant will check that all the specified arguments are being
// consumed by the format and return NULL if any argument is being ignored.
// The 'NewAllowIgnored' variant will not verify this and will allow formats
// that ignore arguments.
static std::unique_ptr<ExtendedParsedFormat> New(string_view format) {
return New(format, false);
}
static std::unique_ptr<ExtendedParsedFormat> NewAllowIgnored(
string_view format) {
return New(format, true);
}
private:
static std::unique_ptr<ExtendedParsedFormat> New(string_view format,
bool allow_ignored) {
std::unique_ptr<ExtendedParsedFormat> conv(
new ExtendedParsedFormat(format, allow_ignored));
if (conv->has_error()) return nullptr;
return conv;
}
ExtendedParsedFormat(string_view s, bool allow_ignored)
: ParsedFormatBase(s, allow_ignored, {C...}) {}
};
} // namespace str_format_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_STR_FORMAT_PARSER_H_

View File

@@ -0,0 +1,434 @@
// Copyright 2020 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/str_format/parser.h"
#include <string.h>
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "absl/base/macros.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace str_format_internal {
namespace {
using testing::Pair;
TEST(LengthModTest, Names) {
struct Expectation {
int line;
LengthMod mod;
const char *name;
};
const Expectation kExpect[] = {
{__LINE__, LengthMod::none, "" },
{__LINE__, LengthMod::h, "h" },
{__LINE__, LengthMod::hh, "hh"},
{__LINE__, LengthMod::l, "l" },
{__LINE__, LengthMod::ll, "ll"},
{__LINE__, LengthMod::L, "L" },
{__LINE__, LengthMod::j, "j" },
{__LINE__, LengthMod::z, "z" },
{__LINE__, LengthMod::t, "t" },
{__LINE__, LengthMod::q, "q" },
};
EXPECT_EQ(ABSL_ARRAYSIZE(kExpect), 10);
for (auto e : kExpect) {
SCOPED_TRACE(e.line);
EXPECT_EQ(e.name, LengthModToString(e.mod));
}
}
TEST(ConversionCharTest, Names) {
struct Expectation {
FormatConversionChar id;
char name;
};
// clang-format off
const Expectation kExpect[] = {
#define X(c) {FormatConversionCharInternal::c, #c[0]}
X(c), X(s), // text
X(d), X(i), X(o), X(u), X(x), X(X), // int
X(f), X(F), X(e), X(E), X(g), X(G), X(a), X(A), // float
X(n), X(p), // misc
#undef X
{FormatConversionCharInternal::kNone, '\0'},
};
// clang-format on
for (auto e : kExpect) {
SCOPED_TRACE(e.name);
FormatConversionChar v = e.id;
EXPECT_EQ(e.name, FormatConversionCharToChar(v));
}
}
class ConsumeUnboundConversionTest : public ::testing::Test {
public:
std::pair<string_view, string_view> Consume(string_view src) {
int next = 0;
o = UnboundConversion(); // refresh
const char* p = ConsumeUnboundConversion(
src.data(), src.data() + src.size(), &o, &next);
if (!p) return {{}, src};
return {string_view(src.data(), p - src.data()),
string_view(p, src.data() + src.size() - p)};
}
bool Run(const char *fmt, bool force_positional = false) {
int next = force_positional ? -1 : 0;
o = UnboundConversion(); // refresh
return ConsumeUnboundConversion(fmt, fmt + strlen(fmt), &o, &next) ==
fmt + strlen(fmt);
}
UnboundConversion o;
};
TEST_F(ConsumeUnboundConversionTest, ConsumeSpecification) {
struct Expectation {
int line;
string_view src;
string_view out;
string_view src_post;
};
const Expectation kExpect[] = {
{__LINE__, "", "", "" },
{__LINE__, "b", "", "b" }, // 'b' is invalid
{__LINE__, "ba", "", "ba"}, // 'b' is invalid
{__LINE__, "l", "", "l" }, // just length mod isn't okay
{__LINE__, "d", "d", "" }, // basic
{__LINE__, "d ", "d", " " }, // leave suffix
{__LINE__, "dd", "d", "d" }, // don't be greedy
{__LINE__, "d9", "d", "9" }, // leave non-space suffix
{__LINE__, "dzz", "d", "zz"}, // length mod as suffix
{__LINE__, "1$*2$d", "1$*2$d", "" }, // arg indexing and * allowed.
{__LINE__, "0-14.3hhd", "0-14.3hhd", ""}, // precision, width
{__LINE__, " 0-+#14.3hhd", " 0-+#14.3hhd", ""}, // flags
};
for (const auto& e : kExpect) {
SCOPED_TRACE(e.line);
EXPECT_THAT(Consume(e.src), Pair(e.out, e.src_post));
}
}
TEST_F(ConsumeUnboundConversionTest, BasicConversion) {
EXPECT_FALSE(Run(""));
EXPECT_FALSE(Run("z"));
EXPECT_FALSE(Run("dd")); // no excess allowed
EXPECT_TRUE(Run("d"));
EXPECT_EQ('d', FormatConversionCharToChar(o.conv));
EXPECT_FALSE(o.width.is_from_arg());
EXPECT_LT(o.width.value(), 0);
EXPECT_FALSE(o.precision.is_from_arg());
EXPECT_LT(o.precision.value(), 0);
EXPECT_EQ(1, o.arg_position);
}
TEST_F(ConsumeUnboundConversionTest, ArgPosition) {
EXPECT_TRUE(Run("d"));
EXPECT_EQ(1, o.arg_position);
EXPECT_TRUE(Run("3$d"));
EXPECT_EQ(3, o.arg_position);
EXPECT_TRUE(Run("1$d"));
EXPECT_EQ(1, o.arg_position);
EXPECT_TRUE(Run("1$d", true));
EXPECT_EQ(1, o.arg_position);
EXPECT_TRUE(Run("123$d"));
EXPECT_EQ(123, o.arg_position);
EXPECT_TRUE(Run("123$d", true));
EXPECT_EQ(123, o.arg_position);
EXPECT_TRUE(Run("10$d"));
EXPECT_EQ(10, o.arg_position);
EXPECT_TRUE(Run("10$d", true));
EXPECT_EQ(10, o.arg_position);
// Position can't be zero.
EXPECT_FALSE(Run("0$d"));
EXPECT_FALSE(Run("0$d", true));
EXPECT_FALSE(Run("1$*0$d"));
EXPECT_FALSE(Run("1$.*0$d"));
// Position can't start with a zero digit at all. That is not a 'decimal'.
EXPECT_FALSE(Run("01$p"));
EXPECT_FALSE(Run("01$p", true));
EXPECT_FALSE(Run("1$*01$p"));
EXPECT_FALSE(Run("1$.*01$p"));
}
TEST_F(ConsumeUnboundConversionTest, WidthAndPrecision) {
EXPECT_TRUE(Run("14d"));
EXPECT_EQ('d', FormatConversionCharToChar(o.conv));
EXPECT_FALSE(o.width.is_from_arg());
EXPECT_EQ(14, o.width.value());
EXPECT_FALSE(o.precision.is_from_arg());
EXPECT_LT(o.precision.value(), 0);
EXPECT_TRUE(Run("14.d"));
EXPECT_FALSE(o.width.is_from_arg());
EXPECT_FALSE(o.precision.is_from_arg());
EXPECT_EQ(14, o.width.value());
EXPECT_EQ(0, o.precision.value());
EXPECT_TRUE(Run(".d"));
EXPECT_FALSE(o.width.is_from_arg());
EXPECT_LT(o.width.value(), 0);
EXPECT_FALSE(o.precision.is_from_arg());
EXPECT_EQ(0, o.precision.value());
EXPECT_TRUE(Run(".5d"));
EXPECT_FALSE(o.width.is_from_arg());
EXPECT_LT(o.width.value(), 0);
EXPECT_FALSE(o.precision.is_from_arg());
EXPECT_EQ(5, o.precision.value());
EXPECT_TRUE(Run(".0d"));
EXPECT_FALSE(o.width.is_from_arg());
EXPECT_LT(o.width.value(), 0);
EXPECT_FALSE(o.precision.is_from_arg());
EXPECT_EQ(0, o.precision.value());
EXPECT_TRUE(Run("14.5d"));
EXPECT_FALSE(o.width.is_from_arg());
EXPECT_FALSE(o.precision.is_from_arg());
EXPECT_EQ(14, o.width.value());
EXPECT_EQ(5, o.precision.value());
EXPECT_TRUE(Run("*.*d"));
EXPECT_TRUE(o.width.is_from_arg());
EXPECT_EQ(1, o.width.get_from_arg());
EXPECT_TRUE(o.precision.is_from_arg());
EXPECT_EQ(2, o.precision.get_from_arg());
EXPECT_EQ(3, o.arg_position);
EXPECT_TRUE(Run("*d"));
EXPECT_TRUE(o.width.is_from_arg());
EXPECT_EQ(1, o.width.get_from_arg());
EXPECT_FALSE(o.precision.is_from_arg());
EXPECT_LT(o.precision.value(), 0);
EXPECT_EQ(2, o.arg_position);
EXPECT_TRUE(Run(".*d"));
EXPECT_FALSE(o.width.is_from_arg());
EXPECT_LT(o.width.value(), 0);
EXPECT_TRUE(o.precision.is_from_arg());
EXPECT_EQ(1, o.precision.get_from_arg());
EXPECT_EQ(2, o.arg_position);
// mixed implicit and explicit: didn't specify arg position.
EXPECT_FALSE(Run("*23$.*34$d"));
EXPECT_TRUE(Run("12$*23$.*34$d"));
EXPECT_EQ(12, o.arg_position);
EXPECT_TRUE(o.width.is_from_arg());
EXPECT_EQ(23, o.width.get_from_arg());
EXPECT_TRUE(o.precision.is_from_arg());
EXPECT_EQ(34, o.precision.get_from_arg());
EXPECT_TRUE(Run("2$*5$.*9$d"));
EXPECT_EQ(2, o.arg_position);
EXPECT_TRUE(o.width.is_from_arg());
EXPECT_EQ(5, o.width.get_from_arg());
EXPECT_TRUE(o.precision.is_from_arg());
EXPECT_EQ(9, o.precision.get_from_arg());
EXPECT_FALSE(Run(".*0$d")) << "no arg 0";
// Large values
EXPECT_TRUE(Run("999999999.999999999d"));
EXPECT_FALSE(o.width.is_from_arg());
EXPECT_EQ(999999999, o.width.value());
EXPECT_FALSE(o.precision.is_from_arg());
EXPECT_EQ(999999999, o.precision.value());
EXPECT_FALSE(Run("1000000000.999999999d"));
EXPECT_FALSE(Run("999999999.1000000000d"));
EXPECT_FALSE(Run("9999999999d"));
EXPECT_FALSE(Run(".9999999999d"));
}
TEST_F(ConsumeUnboundConversionTest, Flags) {
static const char kAllFlags[] = "-+ #0";
static const int kNumFlags = ABSL_ARRAYSIZE(kAllFlags) - 1;
for (int rev = 0; rev < 2; ++rev) {
for (int i = 0; i < 1 << kNumFlags; ++i) {
std::string fmt;
for (int k = 0; k < kNumFlags; ++k)
if ((i >> k) & 1) fmt += kAllFlags[k];
// flag order shouldn't matter
if (rev == 1) {
std::reverse(fmt.begin(), fmt.end());
}
fmt += 'd';
SCOPED_TRACE(fmt);
EXPECT_TRUE(Run(fmt.c_str()));
EXPECT_EQ(fmt.find('-') == std::string::npos,
!FlagsContains(o.flags, Flags::kLeft));
EXPECT_EQ(fmt.find('+') == std::string::npos,
!FlagsContains(o.flags, Flags::kShowPos));
EXPECT_EQ(fmt.find(' ') == std::string::npos,
!FlagsContains(o.flags, Flags::kSignCol));
EXPECT_EQ(fmt.find('#') == std::string::npos,
!FlagsContains(o.flags, Flags::kAlt));
EXPECT_EQ(fmt.find('0') == std::string::npos,
!FlagsContains(o.flags, Flags::kZero));
}
}
}
TEST_F(ConsumeUnboundConversionTest, BasicFlag) {
// Flag is on
for (const char* fmt : {"d", "llx", "G", "1$X"}) {
SCOPED_TRACE(fmt);
EXPECT_TRUE(Run(fmt));
EXPECT_EQ(o.flags, Flags::kBasic);
}
// Flag is off
for (const char* fmt : {"3d", ".llx", "-G", "1$#X"}) {
SCOPED_TRACE(fmt);
EXPECT_TRUE(Run(fmt));
EXPECT_NE(o.flags, Flags::kBasic);
}
}
TEST_F(ConsumeUnboundConversionTest, LengthMod) {
EXPECT_TRUE(Run("d"));
EXPECT_EQ(LengthMod::none, o.length_mod);
EXPECT_TRUE(Run("hd"));
EXPECT_EQ(LengthMod::h, o.length_mod);
EXPECT_TRUE(Run("hhd"));
EXPECT_EQ(LengthMod::hh, o.length_mod);
EXPECT_TRUE(Run("ld"));
EXPECT_EQ(LengthMod::l, o.length_mod);
EXPECT_TRUE(Run("lld"));
EXPECT_EQ(LengthMod::ll, o.length_mod);
EXPECT_TRUE(Run("Lf"));
EXPECT_EQ(LengthMod::L, o.length_mod);
EXPECT_TRUE(Run("qf"));
EXPECT_EQ(LengthMod::q, o.length_mod);
EXPECT_TRUE(Run("jd"));
EXPECT_EQ(LengthMod::j, o.length_mod);
EXPECT_TRUE(Run("zd"));
EXPECT_EQ(LengthMod::z, o.length_mod);
EXPECT_TRUE(Run("td"));
EXPECT_EQ(LengthMod::t, o.length_mod);
}
struct SummarizeConsumer {
std::string* out;
explicit SummarizeConsumer(std::string* out) : out(out) {}
bool Append(string_view s) {
*out += "[" + std::string(s) + "]";
return true;
}
bool ConvertOne(const UnboundConversion& conv, string_view s) {
*out += "{";
*out += std::string(s);
*out += ":";
*out += std::to_string(conv.arg_position) + "$";
if (conv.width.is_from_arg()) {
*out += std::to_string(conv.width.get_from_arg()) + "$*";
}
if (conv.precision.is_from_arg()) {
*out += "." + std::to_string(conv.precision.get_from_arg()) + "$*";
}
*out += FormatConversionCharToChar(conv.conv);
*out += "}";
return true;
}
};
std::string SummarizeParsedFormat(const ParsedFormatBase& pc) {
std::string out;
if (!pc.ProcessFormat(SummarizeConsumer(&out))) out += "!";
return out;
}
class ParsedFormatTest : public testing::Test {};
TEST_F(ParsedFormatTest, ValueSemantics) {
ParsedFormatBase p1({}, true, {}); // empty format
EXPECT_EQ("", SummarizeParsedFormat(p1));
ParsedFormatBase p2 = p1; // copy construct (empty)
EXPECT_EQ(SummarizeParsedFormat(p1), SummarizeParsedFormat(p2));
p1 = ParsedFormatBase("hello%s", true,
{FormatConversionCharSetInternal::s}); // move assign
EXPECT_EQ("[hello]{s:1$s}", SummarizeParsedFormat(p1));
ParsedFormatBase p3 = p1; // copy construct (nonempty)
EXPECT_EQ(SummarizeParsedFormat(p1), SummarizeParsedFormat(p3));
using std::swap;
swap(p1, p2);
EXPECT_EQ("", SummarizeParsedFormat(p1));
EXPECT_EQ("[hello]{s:1$s}", SummarizeParsedFormat(p2));
swap(p1, p2); // undo
p2 = p1; // copy assign
EXPECT_EQ(SummarizeParsedFormat(p1), SummarizeParsedFormat(p2));
}
struct ExpectParse {
const char* in;
std::initializer_list<FormatConversionCharSet> conv_set;
const char* out;
};
TEST_F(ParsedFormatTest, Parsing) {
// Parse should be equivalent to that obtained by ConversionParseIterator.
// No need to retest the parsing edge cases here.
const ExpectParse kExpect[] = {
{"", {}, ""},
{"ab", {}, "[ab]"},
{"a%d", {FormatConversionCharSetInternal::d}, "[a]{d:1$d}"},
{"a%+d", {FormatConversionCharSetInternal::d}, "[a]{+d:1$d}"},
{"a% d", {FormatConversionCharSetInternal::d}, "[a]{ d:1$d}"},
{"a%b %d", {}, "[a]!"}, // stop after error
};
for (const auto& e : kExpect) {
SCOPED_TRACE(e.in);
EXPECT_EQ(e.out,
SummarizeParsedFormat(ParsedFormatBase(e.in, false, e.conv_set)));
}
}
TEST_F(ParsedFormatTest, ParsingFlagOrder) {
const ExpectParse kExpect[] = {
{"a%+ 0d", {FormatConversionCharSetInternal::d}, "[a]{+ 0d:1$d}"},
{"a%+0 d", {FormatConversionCharSetInternal::d}, "[a]{+0 d:1$d}"},
{"a%0+ d", {FormatConversionCharSetInternal::d}, "[a]{0+ d:1$d}"},
{"a% +0d", {FormatConversionCharSetInternal::d}, "[a]{ +0d:1$d}"},
{"a%0 +d", {FormatConversionCharSetInternal::d}, "[a]{0 +d:1$d}"},
{"a% 0+d", {FormatConversionCharSetInternal::d}, "[a]{ 0+d:1$d}"},
{"a%+ 0+d", {FormatConversionCharSetInternal::d}, "[a]{+ 0+d:1$d}"},
};
for (const auto& e : kExpect) {
SCOPED_TRACE(e.in);
EXPECT_EQ(e.out,
SummarizeParsedFormat(ParsedFormatBase(e.in, false, e.conv_set)));
}
}
} // namespace
} // namespace str_format_internal
ABSL_NAMESPACE_END
} // namespace absl

View File

@@ -0,0 +1,314 @@
//
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// This file declares INTERNAL parts of the Join API that are inlined/templated
// or otherwise need to be available at compile time. The main abstractions
// defined in this file are:
//
// - A handful of default Formatters
// - JoinAlgorithm() overloads
// - JoinRange() overloads
// - JoinTuple()
//
// DO NOT INCLUDE THIS FILE DIRECTLY. Use this file by including
// absl/strings/str_join.h
//
// IWYU pragma: private, include "absl/strings/str_join.h"
#ifndef ABSL_STRINGS_INTERNAL_STR_JOIN_INTERNAL_H_
#define ABSL_STRINGS_INTERNAL_STR_JOIN_INTERNAL_H_
#include <cstring>
#include <iterator>
#include <memory>
#include <string>
#include <type_traits>
#include <utility>
#include "absl/strings/internal/ostringstream.h"
#include "absl/strings/internal/resize_uninitialized.h"
#include "absl/strings/str_cat.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace strings_internal {
//
// Formatter objects
//
// The following are implementation classes for standard Formatter objects. The
// factory functions that users will call to create and use these formatters are
// defined and documented in strings/join.h.
//
// The default formatter. Converts alpha-numeric types to strings.
struct AlphaNumFormatterImpl {
// This template is needed in order to support passing in a dereferenced
// vector<bool>::iterator
template <typename T>
void operator()(std::string* out, const T& t) const {
StrAppend(out, AlphaNum(t));
}
void operator()(std::string* out, const AlphaNum& t) const {
StrAppend(out, t);
}
};
// A type that's used to overload the JoinAlgorithm() function (defined below)
// for ranges that do not require additional formatting (e.g., a range of
// strings).
struct NoFormatter : public AlphaNumFormatterImpl {};
// Formats types to strings using the << operator.
class StreamFormatterImpl {
public:
// The method isn't const because it mutates state. Making it const will
// render StreamFormatterImpl thread-hostile.
template <typename T>
void operator()(std::string* out, const T& t) {
// The stream is created lazily to avoid paying the relatively high cost
// of its construction when joining an empty range.
if (strm_) {
strm_->clear(); // clear the bad, fail and eof bits in case they were set
strm_->str(out);
} else {
strm_.reset(new strings_internal::OStringStream(out));
}
*strm_ << t;
}
private:
std::unique_ptr<strings_internal::OStringStream> strm_;
};
// Formats a std::pair<>. The 'first' member is formatted using f1_ and the
// 'second' member is formatted using f2_. sep_ is the separator.
template <typename F1, typename F2>
class PairFormatterImpl {
public:
PairFormatterImpl(F1 f1, absl::string_view sep, F2 f2)
: f1_(std::move(f1)), sep_(sep), f2_(std::move(f2)) {}
template <typename T>
void operator()(std::string* out, const T& p) {
f1_(out, p.first);
out->append(sep_);
f2_(out, p.second);
}
template <typename T>
void operator()(std::string* out, const T& p) const {
f1_(out, p.first);
out->append(sep_);
f2_(out, p.second);
}
private:
F1 f1_;
std::string sep_;
F2 f2_;
};
// Wraps another formatter and dereferences the argument to operator() then
// passes the dereferenced argument to the wrapped formatter. This can be
// useful, for example, to join a std::vector<int*>.
template <typename Formatter>
class DereferenceFormatterImpl {
public:
DereferenceFormatterImpl() : f_() {}
explicit DereferenceFormatterImpl(Formatter&& f)
: f_(std::forward<Formatter>(f)) {}
template <typename T>
void operator()(std::string* out, const T& t) {
f_(out, *t);
}
template <typename T>
void operator()(std::string* out, const T& t) const {
f_(out, *t);
}
private:
Formatter f_;
};
// DefaultFormatter<T> is a traits class that selects a default Formatter to use
// for the given type T. The ::Type member names the Formatter to use. This is
// used by the strings::Join() functions that do NOT take a Formatter argument,
// in which case a default Formatter must be chosen.
//
// AlphaNumFormatterImpl is the default in the base template, followed by
// specializations for other types.
template <typename ValueType>
struct DefaultFormatter {
typedef AlphaNumFormatterImpl Type;
};
template <>
struct DefaultFormatter<const char*> {
typedef AlphaNumFormatterImpl Type;
};
template <>
struct DefaultFormatter<char*> {
typedef AlphaNumFormatterImpl Type;
};
template <>
struct DefaultFormatter<std::string> {
typedef NoFormatter Type;
};
template <>
struct DefaultFormatter<absl::string_view> {
typedef NoFormatter Type;
};
template <typename ValueType>
struct DefaultFormatter<ValueType*> {
typedef DereferenceFormatterImpl<typename DefaultFormatter<ValueType>::Type>
Type;
};
template <typename ValueType>
struct DefaultFormatter<std::unique_ptr<ValueType>>
: public DefaultFormatter<ValueType*> {};
//
// JoinAlgorithm() functions
//
// The main joining algorithm. This simply joins the elements in the given
// iterator range, each separated by the given separator, into an output string,
// and formats each element using the provided Formatter object.
template <typename Iterator, typename Formatter>
std::string JoinAlgorithm(Iterator start, Iterator end, absl::string_view s,
Formatter&& f) {
std::string result;
absl::string_view sep("");
for (Iterator it = start; it != end; ++it) {
result.append(sep.data(), sep.size());
f(&result, *it);
sep = s;
}
return result;
}
// A joining algorithm that's optimized for a forward iterator range of
// string-like objects that do not need any additional formatting. This is to
// optimize the common case of joining, say, a std::vector<string> or a
// std::vector<absl::string_view>.
//
// This is an overload of the previous JoinAlgorithm() function. Here the
// Formatter argument is of type NoFormatter. Since NoFormatter is an internal
// type, this overload is only invoked when strings::Join() is called with a
// range of string-like objects (e.g., std::string, absl::string_view), and an
// explicit Formatter argument was NOT specified.
//
// The optimization is that the needed space will be reserved in the output
// string to avoid the need to resize while appending. To do this, the iterator
// range will be traversed twice: once to calculate the total needed size, and
// then again to copy the elements and delimiters to the output string.
template <typename Iterator,
typename = typename std::enable_if<std::is_convertible<
typename std::iterator_traits<Iterator>::iterator_category,
std::forward_iterator_tag>::value>::type>
std::string JoinAlgorithm(Iterator start, Iterator end, absl::string_view s,
NoFormatter) {
std::string result;
if (start != end) {
// Sums size
size_t result_size = start->size();
for (Iterator it = start; ++it != end;) {
result_size += s.size();
result_size += it->size();
}
if (result_size > 0) {
STLStringResizeUninitialized(&result, result_size);
// Joins strings
char* result_buf = &*result.begin();
memcpy(result_buf, start->data(), start->size());
result_buf += start->size();
for (Iterator it = start; ++it != end;) {
memcpy(result_buf, s.data(), s.size());
result_buf += s.size();
memcpy(result_buf, it->data(), it->size());
result_buf += it->size();
}
}
}
return result;
}
// JoinTupleLoop implements a loop over the elements of a std::tuple, which
// are heterogeneous. The primary template matches the tuple interior case. It
// continues the iteration after appending a separator (for nonzero indices)
// and formatting an element of the tuple. The specialization for the I=N case
// matches the end-of-tuple, and terminates the iteration.
template <size_t I, size_t N>
struct JoinTupleLoop {
template <typename Tup, typename Formatter>
void operator()(std::string* out, const Tup& tup, absl::string_view sep,
Formatter&& fmt) {
if (I > 0) out->append(sep.data(), sep.size());
fmt(out, std::get<I>(tup));
JoinTupleLoop<I + 1, N>()(out, tup, sep, fmt);
}
};
template <size_t N>
struct JoinTupleLoop<N, N> {
template <typename Tup, typename Formatter>
void operator()(std::string*, const Tup&, absl::string_view, Formatter&&) {}
};
template <typename... T, typename Formatter>
std::string JoinAlgorithm(const std::tuple<T...>& tup, absl::string_view sep,
Formatter&& fmt) {
std::string result;
JoinTupleLoop<0, sizeof...(T)>()(&result, tup, sep, fmt);
return result;
}
template <typename Iterator>
std::string JoinRange(Iterator first, Iterator last,
absl::string_view separator) {
// No formatter was explicitly given, so a default must be chosen.
typedef typename std::iterator_traits<Iterator>::value_type ValueType;
typedef typename DefaultFormatter<ValueType>::Type Formatter;
return JoinAlgorithm(first, last, separator, Formatter());
}
template <typename Range, typename Formatter>
std::string JoinRange(const Range& range, absl::string_view separator,
Formatter&& fmt) {
using std::begin;
using std::end;
return JoinAlgorithm(begin(range), end(range), separator, fmt);
}
template <typename Range>
std::string JoinRange(const Range& range, absl::string_view separator) {
using std::begin;
using std::end;
return JoinRange(begin(range), end(range), separator);
}
} // namespace strings_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_STR_JOIN_INTERNAL_H_

View File

@@ -0,0 +1,430 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// This file declares INTERNAL parts of the Split API that are inline/templated
// or otherwise need to be available at compile time. The main abstractions
// defined in here are
//
// - ConvertibleToStringView
// - SplitIterator<>
// - Splitter<>
//
// DO NOT INCLUDE THIS FILE DIRECTLY. Use this file by including
// absl/strings/str_split.h.
//
// IWYU pragma: private, include "absl/strings/str_split.h"
#ifndef ABSL_STRINGS_INTERNAL_STR_SPLIT_INTERNAL_H_
#define ABSL_STRINGS_INTERNAL_STR_SPLIT_INTERNAL_H_
#include <array>
#include <initializer_list>
#include <iterator>
#include <tuple>
#include <type_traits>
#include <utility>
#include <vector>
#include "absl/base/macros.h"
#include "absl/base/port.h"
#include "absl/meta/type_traits.h"
#include "absl/strings/string_view.h"
#ifdef _GLIBCXX_DEBUG
#include "absl/strings/internal/stl_type_traits.h"
#endif // _GLIBCXX_DEBUG
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace strings_internal {
// This class is implicitly constructible from everything that absl::string_view
// is implicitly constructible from, except for rvalue strings. This means it
// can be used as a function parameter in places where passing a temporary
// string might cause memory lifetime issues.
class ConvertibleToStringView {
public:
ConvertibleToStringView(const char* s) // NOLINT(runtime/explicit)
: value_(s) {}
ConvertibleToStringView(char* s) : value_(s) {} // NOLINT(runtime/explicit)
ConvertibleToStringView(absl::string_view s) // NOLINT(runtime/explicit)
: value_(s) {}
ConvertibleToStringView(const std::string& s) // NOLINT(runtime/explicit)
: value_(s) {}
// Disable conversion from rvalue strings.
ConvertibleToStringView(std::string&& s) = delete;
ConvertibleToStringView(const std::string&& s) = delete;
absl::string_view value() const { return value_; }
private:
absl::string_view value_;
};
// An iterator that enumerates the parts of a string from a Splitter. The text
// to be split, the Delimiter, and the Predicate are all taken from the given
// Splitter object. Iterators may only be compared if they refer to the same
// Splitter instance.
//
// This class is NOT part of the public splitting API.
template <typename Splitter>
class SplitIterator {
public:
using iterator_category = std::input_iterator_tag;
using value_type = absl::string_view;
using difference_type = ptrdiff_t;
using pointer = const value_type*;
using reference = const value_type&;
enum State { kInitState, kLastState, kEndState };
SplitIterator(State state, const Splitter* splitter)
: pos_(0),
state_(state),
splitter_(splitter),
delimiter_(splitter->delimiter()),
predicate_(splitter->predicate()) {
// Hack to maintain backward compatibility. This one block makes it so an
// empty absl::string_view whose .data() happens to be nullptr behaves
// *differently* from an otherwise empty absl::string_view whose .data() is
// not nullptr. This is an undesirable difference in general, but this
// behavior is maintained to avoid breaking existing code that happens to
// depend on this old behavior/bug. Perhaps it will be fixed one day. The
// difference in behavior is as follows:
// Split(absl::string_view(""), '-'); // {""}
// Split(absl::string_view(), '-'); // {}
if (splitter_->text().data() == nullptr) {
state_ = kEndState;
pos_ = splitter_->text().size();
return;
}
if (state_ == kEndState) {
pos_ = splitter_->text().size();
} else {
++(*this);
}
}
bool at_end() const { return state_ == kEndState; }
reference operator*() const { return curr_; }
pointer operator->() const { return &curr_; }
SplitIterator& operator++() {
do {
if (state_ == kLastState) {
state_ = kEndState;
return *this;
}
const absl::string_view text = splitter_->text();
const absl::string_view d = delimiter_.Find(text, pos_);
if (d.data() == text.data() + text.size()) state_ = kLastState;
curr_ = text.substr(pos_, d.data() - (text.data() + pos_));
pos_ += curr_.size() + d.size();
} while (!predicate_(curr_));
return *this;
}
SplitIterator operator++(int) {
SplitIterator old(*this);
++(*this);
return old;
}
friend bool operator==(const SplitIterator& a, const SplitIterator& b) {
return a.state_ == b.state_ && a.pos_ == b.pos_;
}
friend bool operator!=(const SplitIterator& a, const SplitIterator& b) {
return !(a == b);
}
private:
size_t pos_;
State state_;
absl::string_view curr_;
const Splitter* splitter_;
typename Splitter::DelimiterType delimiter_;
typename Splitter::PredicateType predicate_;
};
// HasMappedType<T>::value is true iff there exists a type T::mapped_type.
template <typename T, typename = void>
struct HasMappedType : std::false_type {};
template <typename T>
struct HasMappedType<T, absl::void_t<typename T::mapped_type>>
: std::true_type {};
// HasValueType<T>::value is true iff there exists a type T::value_type.
template <typename T, typename = void>
struct HasValueType : std::false_type {};
template <typename T>
struct HasValueType<T, absl::void_t<typename T::value_type>> : std::true_type {
};
// HasConstIterator<T>::value is true iff there exists a type T::const_iterator.
template <typename T, typename = void>
struct HasConstIterator : std::false_type {};
template <typename T>
struct HasConstIterator<T, absl::void_t<typename T::const_iterator>>
: std::true_type {};
// HasEmplace<T>::value is true iff there exists a method T::emplace().
template <typename T, typename = void>
struct HasEmplace : std::false_type {};
template <typename T>
struct HasEmplace<T, absl::void_t<decltype(std::declval<T>().emplace())>>
: std::true_type {};
// IsInitializerList<T>::value is true iff T is an std::initializer_list. More
// details below in Splitter<> where this is used.
std::false_type IsInitializerListDispatch(...); // default: No
template <typename T>
std::true_type IsInitializerListDispatch(std::initializer_list<T>*);
template <typename T>
struct IsInitializerList
: decltype(IsInitializerListDispatch(static_cast<T*>(nullptr))) {};
// A SplitterIsConvertibleTo<C>::type alias exists iff the specified condition
// is true for type 'C'.
//
// Restricts conversion to container-like types (by testing for the presence of
// a const_iterator member type) and also to disable conversion to an
// std::initializer_list (which also has a const_iterator). Otherwise, code
// compiled in C++11 will get an error due to ambiguous conversion paths (in
// C++11 std::vector<T>::operator= is overloaded to take either a std::vector<T>
// or an std::initializer_list<T>).
template <typename C, bool has_value_type, bool has_mapped_type>
struct SplitterIsConvertibleToImpl : std::false_type {};
template <typename C>
struct SplitterIsConvertibleToImpl<C, true, false>
: std::is_constructible<typename C::value_type, absl::string_view> {};
template <typename C>
struct SplitterIsConvertibleToImpl<C, true, true>
: absl::conjunction<
std::is_constructible<typename C::key_type, absl::string_view>,
std::is_constructible<typename C::mapped_type, absl::string_view>> {};
template <typename C>
struct SplitterIsConvertibleTo
: SplitterIsConvertibleToImpl<
C,
#ifdef _GLIBCXX_DEBUG
!IsStrictlyBaseOfAndConvertibleToSTLContainer<C>::value &&
#endif // _GLIBCXX_DEBUG
!IsInitializerList<
typename std::remove_reference<C>::type>::value &&
HasValueType<C>::value && HasConstIterator<C>::value,
HasMappedType<C>::value> {
};
// This class implements the range that is returned by absl::StrSplit(). This
// class has templated conversion operators that allow it to be implicitly
// converted to a variety of types that the caller may have specified on the
// left-hand side of an assignment.
//
// The main interface for interacting with this class is through its implicit
// conversion operators. However, this class may also be used like a container
// in that it has .begin() and .end() member functions. It may also be used
// within a range-for loop.
//
// Output containers can be collections of any type that is constructible from
// an absl::string_view.
//
// An Predicate functor may be supplied. This predicate will be used to filter
// the split strings: only strings for which the predicate returns true will be
// kept. A Predicate object is any unary functor that takes an absl::string_view
// and returns bool.
//
// The StringType parameter can be either string_view or string, depending on
// whether the Splitter refers to a string stored elsewhere, or if the string
// resides inside the Splitter itself.
template <typename Delimiter, typename Predicate, typename StringType>
class Splitter {
public:
using DelimiterType = Delimiter;
using PredicateType = Predicate;
using const_iterator = strings_internal::SplitIterator<Splitter>;
using value_type = typename std::iterator_traits<const_iterator>::value_type;
Splitter(StringType input_text, Delimiter d, Predicate p)
: text_(std::move(input_text)),
delimiter_(std::move(d)),
predicate_(std::move(p)) {}
absl::string_view text() const { return text_; }
const Delimiter& delimiter() const { return delimiter_; }
const Predicate& predicate() const { return predicate_; }
// Range functions that iterate the split substrings as absl::string_view
// objects. These methods enable a Splitter to be used in a range-based for
// loop.
const_iterator begin() const { return {const_iterator::kInitState, this}; }
const_iterator end() const { return {const_iterator::kEndState, this}; }
// An implicit conversion operator that is restricted to only those containers
// that the splitter is convertible to.
template <typename Container,
typename = typename std::enable_if<
SplitterIsConvertibleTo<Container>::value>::type>
operator Container() const { // NOLINT(runtime/explicit)
return ConvertToContainer<Container, typename Container::value_type,
HasMappedType<Container>::value>()(*this);
}
// Returns a pair with its .first and .second members set to the first two
// strings returned by the begin() iterator. Either/both of .first and .second
// will be constructed with empty strings if the iterator doesn't have a
// corresponding value.
template <typename First, typename Second>
operator std::pair<First, Second>() const { // NOLINT(runtime/explicit)
absl::string_view first, second;
auto it = begin();
if (it != end()) {
first = *it;
if (++it != end()) {
second = *it;
}
}
return {First(first), Second(second)};
}
private:
// ConvertToContainer is a functor converting a Splitter to the requested
// Container of ValueType. It is specialized below to optimize splitting to
// certain combinations of Container and ValueType.
//
// This base template handles the generic case of storing the split results in
// the requested non-map-like container and converting the split substrings to
// the requested type.
template <typename Container, typename ValueType, bool is_map = false>
struct ConvertToContainer {
Container operator()(const Splitter& splitter) const {
Container c;
auto it = std::inserter(c, c.end());
for (const auto& sp : splitter) {
*it++ = ValueType(sp);
}
return c;
}
};
// Partial specialization for a std::vector<absl::string_view>.
//
// Optimized for the common case of splitting to a
// std::vector<absl::string_view>. In this case we first split the results to
// a small array of absl::string_view on the stack, to reduce reallocations.
template <typename A>
struct ConvertToContainer<std::vector<absl::string_view, A>,
absl::string_view, false> {
std::vector<absl::string_view, A> operator()(
const Splitter& splitter) const {
struct raw_view {
const char* data;
size_t size;
operator absl::string_view() const { // NOLINT(runtime/explicit)
return {data, size};
}
};
std::vector<absl::string_view, A> v;
std::array<raw_view, 16> ar;
for (auto it = splitter.begin(); !it.at_end();) {
size_t index = 0;
do {
ar[index].data = it->data();
ar[index].size = it->size();
++it;
} while (++index != ar.size() && !it.at_end());
v.insert(v.end(), ar.begin(), ar.begin() + index);
}
return v;
}
};
// Partial specialization for a std::vector<std::string>.
//
// Optimized for the common case of splitting to a std::vector<std::string>.
// In this case we first split the results to a std::vector<absl::string_view>
// so the returned std::vector<std::string> can have space reserved to avoid
// std::string moves.
template <typename A>
struct ConvertToContainer<std::vector<std::string, A>, std::string, false> {
std::vector<std::string, A> operator()(const Splitter& splitter) const {
const std::vector<absl::string_view> v = splitter;
return std::vector<std::string, A>(v.begin(), v.end());
}
};
// Partial specialization for containers of pairs (e.g., maps).
//
// The algorithm is to insert a new pair into the map for each even-numbered
// item, with the even-numbered item as the key with a default-constructed
// value. Each odd-numbered item will then be assigned to the last pair's
// value.
template <typename Container, typename First, typename Second>
struct ConvertToContainer<Container, std::pair<const First, Second>, true> {
using iterator = typename Container::iterator;
Container operator()(const Splitter& splitter) const {
Container m;
iterator it;
bool insert = true;
for (const absl::string_view sv : splitter) {
if (insert) {
it = InsertOrEmplace(&m, sv);
} else {
it->second = Second(sv);
}
insert = !insert;
}
return m;
}
// Inserts the key and an empty value into the map, returning an iterator to
// the inserted item. We use emplace() if available, otherwise insert().
template <typename M>
static absl::enable_if_t<HasEmplace<M>::value, iterator> InsertOrEmplace(
M* m, absl::string_view key) {
// Use piecewise_construct to support old versions of gcc in which pair
// constructor can't otherwise construct string from string_view.
return ToIter(m->emplace(std::piecewise_construct, std::make_tuple(key),
std::tuple<>()));
}
template <typename M>
static absl::enable_if_t<!HasEmplace<M>::value, iterator> InsertOrEmplace(
M* m, absl::string_view key) {
return ToIter(m->insert(std::make_pair(First(key), Second(""))));
}
static iterator ToIter(std::pair<iterator, bool> pair) {
return pair.first;
}
static iterator ToIter(iterator iter) { return iter; }
};
StringType text_;
Delimiter delimiter_;
Predicate predicate_;
};
} // namespace strings_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_STR_SPLIT_INTERNAL_H_

View File

@@ -0,0 +1,64 @@
// Copyright 2020 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef ABSL_STRINGS_INTERNAL_STRING_CONSTANT_H_
#define ABSL_STRINGS_INTERNAL_STRING_CONSTANT_H_
#include "absl/meta/type_traits.h"
#include "absl/strings/string_view.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace strings_internal {
// StringConstant<T> represents a compile time string constant.
// It can be accessed via its `absl::string_view value` static member.
// It is guaranteed that the `string_view` returned has constant `.data()`,
// constant `.size()` and constant `value[i]` for all `0 <= i < .size()`
//
// The `T` is an opaque type. It is guaranteed that different string constants
// will have different values of `T`. This allows users to associate the string
// constant with other static state at compile time.
//
// Instances should be made using the `MakeStringConstant()` factory function
// below.
template <typename T>
struct StringConstant {
static constexpr absl::string_view value = T{}();
constexpr absl::string_view operator()() const { return value; }
// Check to be sure `view` points to constant data.
// Otherwise, it can't be constant evaluated.
static_assert(value.empty() || 2 * value[0] != 1,
"The input string_view must point to constant data.");
};
template <typename T>
constexpr absl::string_view StringConstant<T>::value; // NOLINT
// Factory function for `StringConstant` instances.
// It supports callables that have a constexpr default constructor and a
// constexpr operator().
// It must return an `absl::string_view` or `const char*` pointing to constant
// data. This is validated at compile time.
template <typename T>
constexpr StringConstant<T> MakeStringConstant(T) {
return {};
}
} // namespace strings_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_STRING_CONSTANT_H_

View File

@@ -0,0 +1,60 @@
// Copyright 2020 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/string_constant.h"
#include "absl/meta/type_traits.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
namespace {
using absl::strings_internal::MakeStringConstant;
struct Callable {
constexpr absl::string_view operator()() const {
return absl::string_view("Callable", 8);
}
};
TEST(StringConstant, Traits) {
constexpr auto str = MakeStringConstant(Callable{});
using T = decltype(str);
EXPECT_TRUE(std::is_empty<T>::value);
EXPECT_TRUE(std::is_trivial<T>::value);
EXPECT_TRUE(absl::is_trivially_default_constructible<T>::value);
EXPECT_TRUE(absl::is_trivially_copy_constructible<T>::value);
EXPECT_TRUE(absl::is_trivially_move_constructible<T>::value);
EXPECT_TRUE(absl::is_trivially_destructible<T>::value);
}
TEST(StringConstant, MakeFromCallable) {
constexpr auto str = MakeStringConstant(Callable{});
using T = decltype(str);
EXPECT_EQ(Callable{}(), T::value);
EXPECT_EQ(Callable{}(), str());
}
TEST(StringConstant, MakeFromStringConstant) {
// We want to make sure the StringConstant itself is a valid input to the
// factory function.
constexpr auto str = MakeStringConstant(Callable{});
constexpr auto str2 = MakeStringConstant(str);
using T = decltype(str2);
EXPECT_EQ(Callable{}(), T::value);
EXPECT_EQ(Callable{}(), str2());
}
} // namespace

View File

@@ -0,0 +1,53 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// UTF8 utilities, implemented to reduce dependencies.
#include "absl/strings/internal/utf8.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace strings_internal {
size_t EncodeUTF8Char(char *buffer, char32_t utf8_char) {
if (utf8_char <= 0x7F) {
*buffer = static_cast<char>(utf8_char);
return 1;
} else if (utf8_char <= 0x7FF) {
buffer[1] = 0x80 | (utf8_char & 0x3F);
utf8_char >>= 6;
buffer[0] = 0xC0 | utf8_char;
return 2;
} else if (utf8_char <= 0xFFFF) {
buffer[2] = 0x80 | (utf8_char & 0x3F);
utf8_char >>= 6;
buffer[1] = 0x80 | (utf8_char & 0x3F);
utf8_char >>= 6;
buffer[0] = 0xE0 | utf8_char;
return 3;
} else {
buffer[3] = 0x80 | (utf8_char & 0x3F);
utf8_char >>= 6;
buffer[2] = 0x80 | (utf8_char & 0x3F);
utf8_char >>= 6;
buffer[1] = 0x80 | (utf8_char & 0x3F);
utf8_char >>= 6;
buffer[0] = 0xF0 | utf8_char;
return 4;
}
}
} // namespace strings_internal
ABSL_NAMESPACE_END
} // namespace absl

View File

@@ -0,0 +1,50 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// UTF8 utilities, implemented to reduce dependencies.
#ifndef ABSL_STRINGS_INTERNAL_UTF8_H_
#define ABSL_STRINGS_INTERNAL_UTF8_H_
#include <cstddef>
#include <cstdint>
#include "absl/base/config.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace strings_internal {
// For Unicode code points 0 through 0x10FFFF, EncodeUTF8Char writes
// out the UTF-8 encoding into buffer, and returns the number of chars
// it wrote.
//
// As described in https://tools.ietf.org/html/rfc3629#section-3 , the encodings
// are:
// 00 - 7F : 0xxxxxxx
// 80 - 7FF : 110xxxxx 10xxxxxx
// 800 - FFFF : 1110xxxx 10xxxxxx 10xxxxxx
// 10000 - 10FFFF : 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
//
// Values greater than 0x10FFFF are not supported and may or may not write
// characters into buffer, however never will more than kMaxEncodedUTF8Size
// bytes be written, regardless of the value of utf8_char.
enum { kMaxEncodedUTF8Size = 4 };
size_t EncodeUTF8Char(char *buffer, char32_t utf8_char);
} // namespace strings_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_UTF8_H_

View File

@@ -0,0 +1,66 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/utf8.h"
#include <cstdint>
#include <utility>
#include "gtest/gtest.h"
#include "absl/base/port.h"
namespace {
#if !defined(__cpp_char8_t)
#if defined(__clang__)
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wc++2a-compat"
#endif
TEST(EncodeUTF8Char, BasicFunction) {
std::pair<char32_t, std::string> tests[] = {{0x0030, u8"\u0030"},
{0x00A3, u8"\u00A3"},
{0x00010000, u8"\U00010000"},
{0x0000FFFF, u8"\U0000FFFF"},
{0x0010FFFD, u8"\U0010FFFD"}};
for (auto &test : tests) {
char buf0[7] = {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'};
char buf1[7] = {'\xFF', '\xFF', '\xFF', '\xFF', '\xFF', '\xFF', '\xFF'};
char *buf0_written =
&buf0[absl::strings_internal::EncodeUTF8Char(buf0, test.first)];
char *buf1_written =
&buf1[absl::strings_internal::EncodeUTF8Char(buf1, test.first)];
int apparent_length = 7;
while (buf0[apparent_length - 1] == '\x00' &&
buf1[apparent_length - 1] == '\xFF') {
if (--apparent_length == 0) break;
}
EXPECT_EQ(apparent_length, buf0_written - buf0);
EXPECT_EQ(apparent_length, buf1_written - buf1);
EXPECT_EQ(apparent_length, test.second.length());
EXPECT_EQ(std::string(buf0, apparent_length), test.second);
EXPECT_EQ(std::string(buf1, apparent_length), test.second);
}
char buf[32] = "Don't Tread On Me";
EXPECT_LE(absl::strings_internal::EncodeUTF8Char(buf, 0x00110000),
absl::strings_internal::kMaxEncodedUTF8Size);
char buf2[32] = "Negative is invalid but sane";
EXPECT_LE(absl::strings_internal::EncodeUTF8Char(buf2, -1),
absl::strings_internal::kMaxEncodedUTF8Size);
}
#if defined(__clang__)
#pragma clang diagnostic pop
#endif
#endif // !defined(__cpp_char8_t)
} // namespace