1
What is libc++
A Standard Library Implementation
Part of LLVM Project
Ships with clang
2
Contents
Various Optimisations in the Library
std::expected
stop_token
ranges::for_each, ranges::copy
flat_map
Testing
3
About Me
Hui Xie (GitHub @huixie90)
Software Developer @Qube Research & Technologies
libc++ contributor
BSI (WG21 UK National Body) member
WG21 (C++ Standard Committee) member
Active in SG9 (Ranges Study Group)
Write C++ Proposals
4
std::expected
1 std::expected<MyData, MyError> compute();
2
3 int main() {
4 std::expected<MyData, MyError> res = compute();
5
6 if (res.has_value()) {
7 // some code that uses res.value();
8 } else {
9 // some code that handles res.error();
10 }
11 }
5
std::expected
class Foo {
int i;
char c;
bool b;
};
enum class ErrCode : int { Err1, Err2, Err3 };
static_assert(sizeof(Foo) == 8);
?); // on most implementations
static_assert(sizeof(std::expected<Foo, ErrCode>) == ?);
6
std::expected in libstdc++
// gcc libstdc++
template <class Val, class Err>
class expected {
union {
Val val_;
Err err_;
};
bool has_val_;
};
static_assert(sizeof(std::expected<Foo, ErrCode>) == ?);
7
std::expected in libstdc++
1 // gcc libstdc++
2 static_assert(sizeof(std::expected<Foo, ErrCode>) == 12);
1 int3 | int2 | int1 | int0 | char | bool | xxxx | xxxx | has_val | xxxx | xxxx | xxxx
2 <-------------- Foo Data --------------->
3 <- Foo pad. ->
4 <--------------- std::expected<Foo, ErrCode> Data -------------->
5 <- expected pad. ->
6 <-------------------------- std::expected<Foo, ErrCode> --------------------------->
8
std::expected in libc++
// clang libc++ simplified version
template <class Val, class Err>
class expected {
union U {
[[no_unique_address]] Val val_;
[[no_unique_address]] Err err_;
};
[[no_unique_address]] U u_;
bool has_val_;
};
static_assert(sizeof(std::expected<Foo, ErrCode>) == ?);
The attribute-token no_unique_address specifies that
a non-static data member is a potentially-overlapping
subobject.
9
std::expected in libc++
1 int3 | int2 | int1 | int0 | char | bool | has_val | xxxx
2 <-------------- Foo Data --------------->
3 <-- Foo pad. -->
4 <----- std::expected<Foo, ErrCode> Data ---------->
5 <ex pd>
6 <------------ std::expected<Foo, ErrCode> -------------->
// clang libc++
static_assert(sizeof(std::expected<Foo, ErrCode>) == 8);
What are the benefits?
10
sizeof(std::expected) Smaller, Better ?
Smaller Memory Footprint
Better Cache Locality
std::expected is likely to be used as a return type
11
std::expected return Type
std::expected<Foo, ErrCode> compute() { return Foo{}; }
# gcc libstdc++
compute():
mov DWORD PTR [rsp-24], 0
xor eax, eax
mov BYTE PTR [rsp-24], 1
mov ecx, DWORD PTR [rsp-24]
mov rdx, rcx
ret
# clang libc++
compute():
movabs rax, 281474976710656
ret
12
Takeaway 1
Reuse tail padding with [[no_unique_address]]
Including the padding of an empty type
13
A Nasty Bug
1 template <class Val, class Err>
2 class expected {
3 union U {
4 [[no_unique_address]] Val val_;
5 [[no_unique_address]] Err err_;
6 };
7 [[no_unique_address]] U u_;
8 bool has_val_;
9
10 public:
11 expected(const expected& other)
12 : has_val_(other.has_val_) {
13 if (has_val_) {
14 std::construct_at(std::addressof(u_.val_), other.u_.val_);
15 } else {
16 std::construct_at(std::addressof(u_.err_), other.u_.err_);
17 }
18 }
19 };
14
A Nasty Bug
1 int main() {
2 std::expected<Foo, int> e1(Foo{});
3 std::expected e2(e1);
4 assert(e2.has_value());
5 }
output.s: /app/example.cpp:15: int main(): Assertion `e2.has_value()' failed.
Program terminated with signal: SIGSEGV
15
Zero-Initialisation
To zero-initialize an object or reference of type T
means: … if T is a (possibly cv-qualified) non-union
class type, its padding bits ([basic.types.general]) are
initialized to zero bits and …
16
A Nasty Bug
1 template <class Val, class Err>
2 class expected {
3 union U {
4 [[no_unique_address]] Val val_;
5 [[no_unique_address]] Err err_;
6 };
7 [[no_unique_address]] U u_;
8 bool has_val_;
9
10 public:
11 expected(const expected& other)
12 : has_val_(other.has_val_) {
13 if (has_val_) {
14 std::construct_at(std::addressof(u_.val_), other.u_.val_);
15 } else {
16 std::construct_at(std::addressof(u_.err_), other.u_.err_);
17 }
18 }
19 };
17
Takeaway 2
Don’t mix [[no_unique_address]] with manual lifetime management
(union, construct_at, placement-new).
18
stop_source, stop_token, stop_callback
1 std::stop_source stop_src;
2
3 // Thread 1
4 std::stop_token token = stop_src.get_token();
5 while(!token.stop_requested()) {
6 // do some work
7 }
8
9 // Thread 2
10 std::stop_token token = stop_src.get_token();
11 std::stop_callback cb(token, [] { /*clean up*/});
12
13 // Thread 3
14 std::stop_token token = stop_src.get_token();
15 std::stop_callback cb(token, [] { /* do stuff */ });
16
17 // Thread 4
18 stop_src.request_stop();
19
Under The Hood
1 class stop_token {
2 std::shared_ptr<__stop_state> state_;
3 };
4
5 class stop_source {
6 std::shared_ptr<__stop_state> state_;
7 };
8
9 template <class Callback>
10 class stop_callback {
11 [[no_unique_address]] Callback callback_;
12 std::shared_ptr<__stop_state> state_;
13 };
20
But What About The Shared State?
stop_requested(): needs a flag to hold whether a stop was requested
stop_possible(): needs to count how many stop_sources exist
stop_callback: needs to store a list of refs to all the stop_callbacks
stop_callback: needs to synchronise the list of callbacks
major requirement is that the whole thing is noexcept, ruling out
mutex
The state needs a ref count to manage its lifetime
21
A Naive Implementation
1 class __stop_state {
2 std::atomic<bool> stop_requested_;
3 std::atomic<unsigned> stop_source_count_; // for stop_possible()
4 std::list<stop_callback*> stop_callbacks_;
5 std::mutex list_mutex_;
6 };
static_assert(sizeof(__stop_state) == 72);
static_assert(sizeof(stop_token) == 16);
22
libc++’s Implementation
1 class __stop_state {
2 // The "callback list locked" bit implements a 1-bit lock to guard
3 // operations on the callback list
4 //
5 // 31 - 2 | 1 | 0 |
6 // stop_source counter | callback list locked | stop_requested |
7 atomic<uint32_t> state_ = 0;
8
9 // Reference count for stop_token + stop_callback + stop_source
10 atomic<uint32_t> ref_count_ = 0;
11
12 // Lightweight intrusive non-owning list of callbacks
13 // Only stores a pointer to the root node
14 __intrusive_list_view<stop_callback_base> callback_list_;
15 };
static_assert(sizeof(__stop_state) == 16);
static_assert(sizeof(stop_token) == 8);
23
Takeaway 3
Sometimes we can reuse unused bits to save space
Look for existing padding bytes for free storage
24
Segmented Iterators for_each
1 std::deque<int> d = ...
2
3 // 1
4 for (int& i : d) {
5 i = std::clamp(i, 200, 500);
6 }
7
8 // 2
9 std::ranges::for_each(d, [](int& i) {
10 i = std::clamp(i, 200, 500);
11 });
25
Benchmarking Iteration
Benchmark for loop for_each speed up
[for_loop vs. for_each]/32 12.5 ns 3.69 ns 3.4x
[for_loop vs. for_each]/8192 2973 ns 259 ns 11.5x
[for_loop vs. for_each]/65536 24221 ns 3327 ns 7.3x
clang20 -O3 cpu: M4 MAX 16 cores, memory: 48GB
26
std::deque
27
std::deque for Loop
1 deque_iterator begin = d.begin();
2 deque_iterator end = d.end();
3 for ( ; begin != end; ++begin ) {
4 int& i = *begin;
5 i = std::clamp(i, 200, 500);
6 }
1 deque_iterator& operator++() {
2 if (++elem_ptr_ - *block_iter_ == block_size) {
3 ++block_iter_;
4 elem_ptr_ = *block_iter_;
5 }
6 return *this;
7 }
28
Segmented Iterators Concept
Segmented Iterators and Hierarchical Algorithms, Matt Austern
We introduced this Segmented Iterator concept into libc++
Iterators over ranges of sub-ranges
Allows algorithms to operate over these multi-level iterators natively
29
for_each_segment
1 template <class SegmentedIterator, class Func>
2 void __for_each_segment(SegmentedIterator first, SegmentedIterator last, Func func) {
3 using Traits = SegmentedIteratorTraits<SegmentedIterator>;
4 auto seg_first = Traits::segment(first);
5 auto seg_last = Traits::segment(last);
6 // some code handles the first segment ...
7 // iterate over the segments
8 while (seg_first != seg_last) {
9 func(Traits::begin(seg_first), Traits::end(seg_first));
10 ++seg_first;
11 }
12 // some code handles the last segment ...
13 }
1 template <class SegmentedIterator, class Func>
2 requires is_segmented_iterator<SegmentedIterator>
3 void for_each(SegmentedIterator first, SegmentedIterator last, Func func) {
4 std::__for_each_segment(first, last, [&](auto inner_first, auto inner_last) {
5 std::for_each(inner_first, inner_last, func);
6 });
7 }
30
Code Generation Comparison
for loop ranges::for_each
31
Optimizing ranges::copy
1 std::vector<std::vector<int>> v = ...;
2 std::vector<int> out;
3
4 // 1
5 out.reserve(total_size);
6 for (const auto& inner : v) {
7 for (int i: inner) {
8 out.push_back(i);
9 }
10 }
11
12 // 2
13 out.resize(total_size);
14 std::ranges::copy(v | std::views::join, out.begin());
32
Benchmarking ranges::copy
Benchmark for loop copy speed up
[for_loop vs. copy]/32 490 ns 322 ns 1.5x
[for_loop vs. copy]/8192 160632 ns 63372 ns 2.5x
[for_loop vs. copy]/65536 1066403 ns 208083 ns 5.1x
33
join_view::iterator is a Segmented
Iterator
1 template <class Iter, class OutIter>
2 requires is_segmented_iterator<Iter>
3 pair<Iter, OutIter> __copy(Iter first, Iter last, OutIter result) {
4 std::__for_each_segment(first, last, [&](auto inner_first, auto inner_last) {
5 result = std::__copy(inner_first, inner_last, std::move(result)).second;
6 });
7 return std::make_pair(last, std::move(result));
8 }
1 template <class In, class Out>
2 requires can_lower_copy_assignment_to_memmove<In, Out>
3 pair<In*, Out*> __copy(In* first, In* last, Out* result) {
4 std::memmove(result, first, last - first);
5 return std::make_pair(last, result + n);
6 }
34
Takeaway 4
We constantly add optimisations to algorithms. Please use them
We use general concepts to capture optimisation opportunities
Optimisations are often generic and can compose with each other
35
flat_map Insertion
// flat_map<int, double>
class flat_map {
std::vector<int> keys_; // always sorted
std::vector<double> values_;
[[no_unique_address]] std::less<int> compare_;
};
1 std::flat_map<int, double> m1 = ...;
2 std::flat_map<int, double> m2 = ...;
3
4 // Insert the elements of m2 into m1
5 for (const auto& [key, val] : m2) {
6 m1.emplace(key, val);
7 }
What is the time complexity?
36
flat_map::insert_range
template<container-compatible-range<value_type> R>
constexpr void insert_range(R&& rg);
Complexity: N + MlogM, where N is size() before the operation and M is
ranges::distance(rg).
1 std::flat_map<int, double> m1 = ...;
2 std::flat_map<int, double> m2 = ...;
3
4 // Insert the elements of m2 into m1
5 m1.insert_range(m2);
37
P3567 insert_range(sorted_unique, rg)
template<container-compatible-range<value_type> R>
void insert_range(sorted_unique_t, R&& rg);
Complexity: Linear in N, where N is size() after the operation.
1 std::flat_map<int, double> m1 = ...;
2 std::flat_map<int, double> m2 = ...;
3
4 // Insert the elements of m2 into m1
5 m1.insert_range(std::sorted_unique, m2);
38
flat_map::insert_range Implementation
1 template<container-compatible-range<value_type> R>
2 constexpr void insert_range(R&& rg) {
3
4 __append(ranges::begin(rg), ranges::end(rg)); // O(M)
5
6 auto zv = ranges::views::zip(keys_, values_);
7 ranges::sort(zv.begin() + old_size, zv.end()); // O(MLogM)
8
9 ranges::inplace_merge(zv.begin(), zv.begin() + old_size, zv.end()); // O(M+N)
10
11 auto dup_start = ranges::unique(zv).begin(); // O(M+N)
12 __erase(dup_start); // O(M+N)
13 }
39
How to __append ?
1 template <class InputIterator, class Sentinel>
2 void __append(InputIterator first, Sentinel last) {
3 for (; first != last; ++first) {
4 std::pair<Key, Val> kv = *first;
5 keys_.insert(keys_.end(), std::move(kv.first));
6 values_.insert(values_.end(), std::move(kv.second));
7 }
8 }
This misses existing optimisations in vector::insert(pos, first,
last)
But we are given a range of “pairs”, not two ranges
Can we reuse these optimizations in some cases?
40
Introducing a Concept product_iterator
iterators that aggregate multiple underlying iterators
flat_map::iterator is product_iterator
1 template <class Iter>
2 requires is_product_iterator_of_size<Iter, 2>
3 void __append(Iter first, Iter last)
4 {
5 using Traits = product_iterator_traits<Iter>;
6
7 keys_.insert(keys_.end(),
8 Traits::template get_iterator<0>(first),
9 Traits::template get_iterator<0>(last));
10
11 values_.insert(values_.end(),
12 Traits::template get_iterator<1>(first),
13 Traits::template get_iterator<1>(last));
14 }
41
Benchmarking insert_range
Benchmark insert_pair product_iterator speed up
[insert_range]/32 149 ns 74 ns 2.0x
[insert_range]/8192 26682 ns 2995 ns 8.9x
[insert_range]/65536 226235 ns 27844 ns 8.1x
42
Are there any other product_iterator?
1 std::flat_map<int, double> m = ...;
2 std::vector<int> newKeys = ...;
3 std::vector<double> newValues = ...;
4
5 // Insert newKeys and newValues into m
m.insert_range(std::views::zip(newKeys, newValues));
zip_view::iterator is also a product_iterator
43
Takeaway 5
Use the most precise API for what you’re trying to achieve
insert_range instead of insert in a loop
Use sorted_unique if the inputs are already sorted
Use library facilities (e.g views::zip) to benefit from concept-based
optimisations
44
Testing in libc++
We test almost every single word in the standard
45
What Shall We Test?
constexpr expected(expected&& rhs) noexcept(see below);
Constraints:
is_move_constructible_v<T> is true and
is_move_constructible_v<E> is true.
Effects: If rhs.has_value() is true, direct-non-list-initializes val with std::move(*rhs). Otherwise, direct-non-list-initializes unex with std::
move(rhs.error()).
Postconditions: rhs.has_value() is unchanged; rhs.has_value() == this->has_value() is true.
Throws: Any exception thrown by the initialization of val or unex.
Remarks: The exception specification is equivalent to is_nothrow_move_constructible_v<T> &&
is_nothrow_move_constructible_v<E>.
This constructor is trivial if
is_trivially_move_constructible_v<T> is true and
is_trivially_move_constructible_v<E> is true.
46
Testing constexpr
1 constexpr bool test() {
2 // Test point 1
3 {
4 std::expected<MoveOnly, int> e1 = ...;
5 std::expected<MoveOnly, int> e2 = std::move(e1);
6 assert(e2.has_value());
7 assert(e2.value() == ... );
8 }
9 // more test points
10
11 return true;
12 }
13
14 int main() {
15 test();
16 static_assert(test());
17 }
Share compile time and run time tests
47
Testing Constraints
constexpr expected(expected&& rhs) noexcept(see below);
Constraints: is_move_constructible_v<T> is true and
is_move_constructible_v<E> is true.
Constraints translate to requires
1 struct Foo { Foo(Foo&&) = delete;};
2
3 static_assert(std::is_move_constructible_v<std::expected<int,int>>);
4 static_assert(!std::is_move_constructible_v<std::expected<Foo,int>>);
5 static_assert(!std::is_move_constructible_v<std::expected<int,Foo>>);
6 static_assert(!std::is_move_constructible_v<std::expected<Foo,Foo>>);
48
Testing Mandates
template<class U = remove_cv_t<T>> constexpr T value_or(U&& v) const &;
Mandates: is_copy_constructible_v<T> is true and
is_convertible_v<U, T> is true.
Mandates translate to static_assert
Test that usage that violates the mandate should not compile
const std::expected<NonCopyable, int> f1{5};
f1.value_or(5); // expected-note{{in instantiation of function template ...}}
// expected-error-re@*:* {{static assertion failed {{.*}}value_type has to be copy ...}}
-Xclang -verify
49
Testing {Hardened} Preconditions
constexpr const T* operator->() const noexcept;
constexpr T* operator->() noexcept;
Hardened preconditions: has_value() is true.
Preconditions/Hardened preconditions translate to
assert/contract_assert/other assert macros
std::expected<int, int> e{std::unexpect, 5};
TEST_LIBCPP_ASSERT_FAILURE(e.operator->(),
"expected::operator-> requires the expected to contain a value");
TEST_LIBCPP_ASSERT_FAILURE installs assertion handler, forks the
process and matches the child process stderr with the expected errors
50
Takeaway 6
Share the same test code for constexpr and runtime
Use negative tests to ensure that things fail as expected
-Xclang -verify is very useful to test static_asserts
51
Contribute to libc++
Getting Started
Github Issues
52
QRT is Hiring
https://www.qube-rt.com/careers/
53
If You Do Mix [[no_unique_address]] with
union, std::construct_at, or placement new
Be ready for bugs
54
Dude, Where is my_char ?
1 struct MyStruct {
2 [[no_unique_address]] std::expected<Foo, ErrCode> my_foo;
3 char my_char;
4 };
5
6 MyStruct s{.my_foo = Foo{}, .my_char = 'x'};
7
8 s.my_foo.emplace(Foo{});
1 // Assertion failure!
2 assert(s.my_char == 'x');
55
Wait, is my_char in the Padding?
template <class Val, class Err>
class expected {
union U {
[[no_unique_address]] Val val_;
[[no_unique_address]] Err err_;
};
[[no_unique_address]] U u_;
bool has_val_;
};
1 int3 | int2 | int1 | int0 | char | bool | has_val | my_char
2 <-------------- Foo Data --------------->
3 <--- Foo pad. --->
4 <----- std::expected<Foo, ErrCode> Data ---------->
5 <ex pad.>
6 <------------- std::expected<Foo, ErrCode> --------------->
7 <----------------------- MyStruct ------------------------>
1 s.my_foo.emplace(Foo{}); // It calls construct_at and clears the padding!
56
Stop Users from Reusing expected Paddings
1 template <class Val, class Err>
2 class expected {
3 struct repr {
4 union U {
5 [[no_unique_address]] Val val_;
6 [[no_unique_address]] Err err_;
7 };
8 [[no_unique_address]] U u_;
9 bool has_val_;
10 };
11
12 repr repr_; // No [[no_unique_address]] here
13 };
[[no_unique_address]] is not transitive
57
It Works
1 struct MyStruct {
2 [[no_unique_address]] std::expected<Foo, ErrCode> my_foo;
3 char my_char;
4 };
1 int3 | int2 | int1 | int0 | char | bool | has_val | xxxx | my_char | xxxx | xxxx | xxxx
2 <-------------- Foo Data --------------->
3 <--- Foo pad. --->
4 <------------------------ repr Data -------------->
5 <repr pad.>
6 <------------ std::expected<Foo, ErrCode> Data ----------->
7 <------------ std::expected<Foo, ErrCode> ---------------->
8 <----------------------- MyStruct Data ---------------------------->
9 <-MyStruct padding ->
10 <-------------------------------------- MyStruct -------------------------------------->
58
But Can We do Better?
1 struct MyStruct {
2 [[no_unique_address]] std::expected<int, ErrCode> my_int;
3 char my_char;
4 };
1 int3 | int2 | int1 | int0 | has_val | xxxx | xxxx | xxxx | my_char | xxxx | xxxx | xxxx
2 <--------------- repr Data --------->
3 <--- repr padding --->
4 <------------ std::expected<int, ErrCode> Data ---------->
5 <------------ std::expected<int, ErrCode> --------------->
6 <----------------------- MyStruct Data ---------------------------->
7 <-MyStruct padding ->
8 <-------------------------------------- MyStruct -------------------------------------->
1 s.my_int.emplace(42); // construct_at(int*, int) is very safe! int has no padding
59
Final Fix
1 template <class Val, class Err>
2 struct repr {
3 union U {
4 [[no_unique_address]] Val val_;
5 [[no_unique_address]] Err err_;
6 };
7 [[no_unique_address]] U u_;
8 bool has_val_;
9 };
10
11 template <class Val, class Err>
12 struct expected_base {
13 repr<Val, Err> repr_; // no [[no_unique_address]]
14 };
15 template <class Val, class Err> requires bool_is_not_in_padding
16 struct expected_base {
17 [[no_unique_address]] repr<Val, Err> repr_;
18 };
19
20 template <class Val, class Err>
21 class expected : expected_base<Val, Err> {};
60