// Synthesized C++
export module carbon_cat_main;
// Use modules tech to import a header,
// and make it available to Carbon.
export import "cat.h"
// Also make a hook available to Carbon
export extern "CarbonMagic"
void Call_Meow() {
// Synthesize the C++ use here,
// where it can be compiled as C++:
Meow();
}
// Synthesized Carbon
package Cpp api
fn Meow() {
// Call the synthesized low-level hook:
Call_Meow();
}
// Synthesized C++
export module carbon_cat_main;
// Use modules tech to import a header,
// and make it available to Carbon.
export import "cat.h"
// Also make a hook available to Carbon
export extern "CarbonMagic"
void Call_Meow() {
// Synthesize the C++ use here,
// where it can be compiled as C++:
Meow();
}
// Synthesized Carbon
package Cpp api
fn Meow() {
// Call the synthesized low-level hook:
Call_Meow();
}
// Synthesized Carbon
package Cpp api
fn Meow() {
// Call the synthesized low-level hook:
Call_Meow();
}
// Synthesized C++
export module carbon_cat_main;
// Use modules tech to import a header,
// and make it available to Carbon.
export import "cat.h"
// Also make a hook available to Carbon
export extern "CarbonMagic"
void Call_Meow() {
// Synthesize the C++ use here,
// where it can be compiled as C++:
Meow();
}
// Synthesized C++
export module carbon_cat_main;
// Use modules tech to import a header,
// and make it available to Carbon.
export import "cat.h"
// Also make a hook available to Carbon
export extern "CarbonMagic"
void Call_Meow() {
// Synthesize the C++ use here,
// where it can be compiled as C++:
Meow();
}
// Synthesized Carbon
package Cpp api
fn Meow() {
// Call the synthesized low-level hook:
Call_Meow();
}
// Synthesized C++
export module carbon_cat_sum;
export import "cat.h"
export extern "CarbonMagic"
Cat Call_Cat_Op_Plus(const Cat &lhs,
const Cat &rhs) {
// We compile the operator here, so we
// get whatever C++ ADL would find.
return lhs + rhs;
}
// Synthesized Carbon
package Cpp api
class Cat { ... }
// We can find ``operator+`` in C++,
// so we synthesize a Carbon operator.
impl Cat as Core.AddWith(Cat) {
fn Op[self: Self](rhs: Cat) -> Cat {
return Call_Cat_Op_Plus(self, rhs);
}
}
Use it in Carbon
// cat_sum.carbon
import Cpp library "cat.h"
fn SumCatsSomehow(c1: Cat,
c2: Cat) -> Cat {
// No idea why we're adding cats...
return c1 + c2;
// In Carbon, this calls ``Op`` below.
}
// Synthesized C++
export module carbon_cat_sum;
export import "cat.h"
export extern "CarbonMagic"
Cat Call_Cat_Op_Plus(const Cat &lhs,
const Cat &rhs) {
// We compile the operator here, so we
// get whatever C++ ADL would find.
return lhs + rhs;
}
// Synthesized Carbon
package Cpp api
class Cat { ... }
// We can find ``operator+`` in C++,
// so we synthesize a Carbon operator.
impl Cat as Core.AddWith(Cat) {
fn Op[self: Self](rhs: Cat) -> Cat {
return Call_Cat_Op_Plus(self, rhs);
}
}
Synthesize and compile a C++ use with Clang
// cat_sum.carbon
import Cpp library "cat.h"
fn SumCatsSomehow(c1: Cat,
c2: Cat) -> Cat {
// No idea why we're adding cats...
return c1 + c2;
// In Carbon, this calls ``Op`` below.
}
// Synthesized C++
export module carbon_cat_sum;
export import "cat.h"
export extern "CarbonMagic"
Cat Call_Cat_Op_Plus(const Cat &lhs,
const Cat &rhs) {
// We compile the operator here, so we
// get whatever C++ ADL would find.
return lhs + rhs;
}
// Synthesized Carbon
package Cpp api
class Cat { ... }
// We can find ``operator+`` in C++,
// so we synthesize a Carbon operator.
impl Cat as Core.AddWith(Cat) {
fn Op[self: Self](rhs: Cat) -> Cat {
return Call_Cat_Op_Plus(self, rhs);
}
}
Synthesize and compile a C++ use with Clang
// cat_sum.carbon
import Cpp library "cat.h"
fn SumCatsSomehow(c1: Cat,
c2: Cat) -> Cat {
// No idea why we're adding cats...
return c1 + c2;
// In Carbon, this calls ``Op`` below.
}
// Synthesized C++
export module carbon_cat_sum;
export import "cat.h"
export extern "CarbonMagic"
Cat Call_Cat_Op_Plus(const Cat &lhs,
const Cat &rhs) {
// We compile the operator here, so we
// get whatever C++ ADL would find.
return lhs + rhs;
}
// Synthesized Carbon
package Cpp api
class Cat { ... }
// We can find ``operator+`` in C++,
// so we synthesize a Carbon operator.
impl Cat as Core.AddWith(Cat) {
fn Op[self: Self](rhs: Cat) -> Cat {
return Call_Cat_Op_Plus(self, rhs);
}
}
// Synthesized C++
export module carbon_global_cats;
export import "cat.h"
// Generated for each instantiation
// of ``T`` and ``U``, here both are ``Cat``.
export extern "CarbonMagic"
void Call_Vector_Cat_Push_Cat(
Vector<Cat> *self,
Cat *x) {
// Provide C++ R-value-ref move:
self->Push(std::move(*x));
}
// Synthesized Carbon
package Cpp api
class Cat { ... }
class Vector(template T:! type) {
// ...
fn Push[addr self: Self*,
template U:! type](var x: U) {
Call_Vector_T_Push_U(self, &x);
}
}
var global_cats: Vector(Cat);
// Synthesized C++
export module carbon_global_cats;
export import "cat.h"
// Generated for each instantiation
// of ``T`` and ``U``, here both are ``Cat``.
export extern "CarbonMagic"
void Call_Vector_Cat_Push_Cat(
Vector<Cat> *self,
Cat *x) {
// Provide C++ R-value-ref move:
self->Push(std::move(*x));
}
// Synthesized Carbon
package Cpp api
class Cat { ... }
class Vector(template T:! type) {
// ...
fn Push[addr self: Self*,
template U:! type](var x: U) {
Call_Vector_T_Push_U(self, &x);
}
}
var global_cats: Vector(Cat);
// Synthesized C++
export module carbon_global_cats;
export import "cat.h"
// Generated for each instantiation
// of ``T`` and ``U``, here both are ``Cat``.
export extern "CarbonMagic"
void Call_Vector_Cat_Push_Cat(
Vector<Cat> *self,
Cat *x) {
// Provide C++ R-value-ref move:
self->Push(std::move(*x));
}
// Synthesized Carbon
package Cpp api
class Cat { ... }
class Vector(template T:! type) {
// ...
fn Push[addr self: Self*,
template U:! type](var x: U) {
Call_Vector_T_Push_U(self, &x);
}
}
var global_cats: Vector(Cat);
// Synthesized C++
export module carbon_global_cats;
export import "cat.h"
// Generated for each instantiation
// of ``T`` and ``U``, here both are ``Cat``.
export extern "CarbonMagic"
void Call_Vector_Cat_Push_Cat(
Vector<Cat> *self,
Cat *x) {
// Provide C++ R-value-ref move:
self->Push(std::move(*x));
}
// Synthesized Carbon
package Cpp api
class Cat { ... }
class Vector(template T:! type) {
// ...
fn Push[addr self: Self*,
template U:! type](var x: U) {
Call_Vector_T_Push_U(self, &x);
}
}
var global_cats: Vector(Cat);
There is a pattern to this approach:
Carbon constructs provide a Carbon API for C++ imports
C++ constructs implement the C++ behavior of that API
Carbon’s compiler synthesizes a low-level, simplified connection layer
Because the connection is never user-visible, it can cheat a lot
Example: generate manually during instantiation
Calling Carbon from C++? Same idea:
Carbon will build a C++ module or header to expose Carbon to C++
Synthesizing C++ constructs to model the C++ API for a Carbon import
Map through low-level connection layer to fully Carbon behavior
What about that low-level connection layer?
We already have it: LLVM!
LLVM is the glue that holds C++ interop together
Already know we can lower both Carbon and C++ into LLVM
Guaranteed to be able to represent everything
Unconstrained by source, can select optimal representation
LLVM’s optimizer can inline and optimize away overhead
Also provide a fallback of C++ source generation
Limited / partial coverage, and more overhead
Useful when bridging to other toolchains or new platforms
Want to enable shipping a binary Carbon library with a C++ header
This pattern enables so much more:
Bundling a C++ toolchain to build the C++ code
Allows a custom STL ABI to transparently map more types
Transparent mapping of views and non-owning wrappers on API boundaries
Ranges and iteration mapping
Inheritance, virtual dispatch, v-tables
Translating error handling both to & from exceptions
Memory safety
What do we mean by memory safety?
Bugs, safety, and safety bugs
Bugs: program behavior contrary to the author’s intent
Software, in practice, always has bugs – we must plan for them
Bugs, safety, and safety bugs
Bugs: program behavior contrary to the author’s intent
Software, in practice, always has bugs – we must plan for them
Safety: invariants or limits on program behavior in the face of bugs
Bugs, safety, and safety bugs
Bugs: program behavior contrary to the author’s intent
Software, in practice, always has bugs – we must plan for them
Safety: invariants or limits on program behavior in the face of bugs
Safety bugs: bugs where some aspect of program behavior has no
invariants or limits
Checking for an unexpected value and calling abort(): detects a bug, but
is safe
Calling std::unreachable() is also a bug, but unsafe and a safety bug
Bugs, safety, and safety bugs
Bugs: program behavior contrary to the author’s intent
Software, in practice, always has bugs – we must plan for them
Safety: invariants or limits on program behavior in the face of bugs
Safety bugs: bugs where some aspect of program behavior has no
invariants or limits
Checking for an unexpected value and calling abort(): detects a bug, but
is safe
Calling std::unreachable() is also a bug, but unsafe and a safety bug
Initial bug: the first deviation of program behavior
Buggy behavior often causes more buggy behavior – all are bugs
Our focus is on fixing the initial bug
Safety, bugs, and security vulnerabilities
Security vulnerabilities: ability of a malicious user to subvert a
program’s behavior, typically through exploiting bugs
Safety, bugs, and security vulnerabilities
Security vulnerabilities: ability of a malicious user to subvert a
program’s behavior, typically through exploiting bugs
Detecting: while still vulnerable, exploits of a bug can be detected or
tracked
Safety, bugs, and security vulnerabilities
Security vulnerabilities: ability of a malicious user to subvert a
program’s behavior, typically through exploiting bugs
Detecting: while still vulnerable, exploits of a bug can be detected or
tracked
Mitigating: making a vulnerability significantly more expensive,
difficult, or improbable to be exploited
Safety, bugs, and security vulnerabilities
Security vulnerabilities: ability of a malicious user to subvert a
program’s behavior, typically through exploiting bugs
Detecting: while still vulnerable, exploits of a bug can be detected or
tracked
Mitigating: making a vulnerability significantly more expensive,
difficult, or improbable to be exploited
Preventing: while still a bug, making it impossible to be a
vulnerability
Safety, bugs, and security vulnerabilities
Security vulnerabilities: ability of a malicious user to subvert a
program’s behavior, typically through exploiting bugs
Detecting: while still vulnerable, exploits of a bug can be detected or
tracked
Mitigating: making a vulnerability significantly more expensive,
difficult, or improbable to be exploited
Preventing: while still a bug, making it impossible to be a
vulnerability
Fixing: no longer a bug, much less a vulnerability
Safety, bugs, and security vulnerabilities
Security vulnerabilities: ability of a malicious user to subvert a
program’s behavior, typically through exploiting bugs
Detecting: while still vulnerable, exploits of a bug can be detected or
tracked
Mitigating: making a vulnerability significantly more expensive,
difficult, or improbable to be exploited
Preventing: while still a bug, making it impossible to be a
vulnerability
Fixing: no longer a bug, much less a vulnerability
Safety doesn’t require fixing bugs, but it can prevent or mitigate
vulnerabilities
Constructively-correct or proofs are a subset of safety techniques,
essentially limiting even forming a program in the face of bugs
Memory safety bugs and security
Memory safety bugs: Safety bugs that additionally read or write memory
A focus because they are the dominant cause of security vulnerabilities
Over 65% of high / critical vulnerabilities (sources
1,2,3,4,5,6)
Memory safety: limits program behavior to only read or write intended
memory, even in the face of bugs
Sufficient to mitigate and prevent these classes of vulnerabilities in
practice
Classes of memory safety bugs
Spatial: memory access outside of an allocated region
Classes of memory safety bugs
Spatial: memory access outside of an allocated region
Temporal: access after the lifetime of the object in memory
Classes of memory safety bugs
Spatial: memory access outside of an allocated region
Temporal: access after the lifetime of the object in memory
Type: accessing memory which isn’t a valid representation for a type
Classes of memory safety bugs
Spatial: memory access outside of an allocated region
Temporal: access after the lifetime of the object in memory
Type: accessing memory which isn’t a valid representation for a type
Initialization: reading memory before it is initialized
Classes of memory safety bugs
Spatial: memory access outside of an allocated region
Temporal: access after the lifetime of the object in memory
Type: accessing memory which isn’t a valid representation for a type
Initialization: reading memory before it is initialized
Data-Race: unsynchronized reads & writes by different threads
Suggested programming language approach to memory safety
A language is rigorously memory-safe if it:
has a well-delineated safe subset, and
provides spatial, temporal, type, and initialization safety in its safe
subset.
This should be the required minimum for programming languages going forward.
Details of rigorous memory safety
Safe subset must be a viable default, with unsafe being exceptional
Delineated unsafe constructs must be visible and auditable
Safety can be through any combination of compile-time and runtime protections
However, must prevent vulnerabilities, not just mitigate them
Details of rigorous memory safety
Data-race safety remains highly desirable but not a strict requirement:
It would increase the constraints on the available solutions
No evidence (yet) of comparable security risks when other safety is achieved
How can Carbon get us there starting from C++?
First, we need to introduce a safe subset
Best candidate for C++ is likely similar to Rust’s borrow checker
High performance: ensures safety at compile-time with the type system
Explored in the context of C++’s type system, w/ many barriers
fn swap_span(a: &mut [i32], b: &mut [i32]) {
for i in 0..a.len() {
std::mem::swap(&mut a[i], &mut b[i])
}
}
pub fn main() {
let mut v = vec![1, 2, 3, 4, 5, 6];
// Need to make a pointer, without borrowing ``v`` mutably.
let ptr: *mut i32 = v.as_mut_ptr();
// Bypassing the borrow checker so that we can make
// two independent borrowing references from it.
let first = unsafe { std::slice::from_raw_parts_mut(ptr, 3) };
let second = unsafe { std::slice::from_raw_parts_mut(ptr.add(3), 3) };
swap_span(first, second);
}
fn swap_span(a: &mut [i32], b: &mut [i32]) {
for i in 0..a.len() {
std::mem::swap(&mut a[i], &mut b[i])
}
}
pub fn main() {
let mut v = vec![1, 2, 3, 4, 5, 6];
let (first, second) = (|v: &mut Vec<i32>| {
// Need to make a pointer, without borrowing ``v`` mutably (again).
let ptr: *mut i32 = v.as_mut_ptr();
// Bypassing the borrow checker so that we can make
// two independent borrowing references from it.
let first = unsafe { std::slice::from_raw_parts_mut(ptr, 3) };
let second = unsafe { std::slice::from_raw_parts_mut(ptr.add(3), 3) };
(first, second)
})(&mut v); // Takes a mutable borrow on ``v`` here.
swap_span(first, second);
}
void swap_span(std::span<int> a,
std::span<int> b) {
for (size_t i = 0;
i < a.size();
i += 1) {
std::swap(a[i], b[i]);
}
}
int main() {
std::vector<int> v = {1, 2, 3,
4, 5, 6};
swap_span(
std::span(v).subspan(0, 3),
std::span(v).subspan(3, 3)
);
}
fn swap_span(a: &mut [i32],
b: &mut [i32]) {
for i in 0..a.len() {
std::mem::swap(&mut a[i], &mut b[i])
}
}
pub fn main() {
let mut v = vec![1, 2, 3, 4, 5, 6];
let (first, second) = (|v: &mut Vec<i32>| {
let ptr: *mut i32 = v.as_mut_ptr();
let first = unsafe {
std::slice::from_raw_parts_mut(ptr, 3)
};
let second = unsafe {
std::slice::from_raw_parts_mut(ptr.add(3),
3)
};
(first, second)
})(&mut v);
swap_span(first, second);
}
It needs different APIs to work well:
void swap_span(std::span<int> a,
std::span<int> b) {
for (size_t i = 0;
i < a.size();
i += 1) {
std::swap(a[i], b[i]);
}
}
int main() {
std::vector<int> v = {1, 2, 3, 4, 5, 6};
swap_span(
std::span(v).subspan(0, 3),
std::span(v).subspan(3, 3)
);
}
fn swap_span(a: &mut [i32],
b: &mut [i32]) {
for i in 0..a.len() {
std::mem::swap(&mut a[i], &mut b[i])
}
}
pub fn main() {
let mut v = vec![1, 2, 3, 4, 5, 6];
// Mutable borrows ``v`` once, but produces
// two independent mutable spans.
let (first, second) = v.split_at_mut(3);
swap_span(first, second);
}
Making a safe subset a reasonable default requires breaking changes
Current defaults in C++ are too unsafe to be realistically delineated
This means a large amount of breaking change
Need to move existing unsafe code towards separable constructs
Clear space for safe defaults throughout the language
Especially disruptive to pointers, references, and the STL
WG21 makes this essentially impossible. 😞
Carbon gives us a viable strategy:
Make unsafe Carbon a migration target from C++ w/ great interop
Evolve and extend Carbon to have a viable safe subset
Migrate unsafe C++ to unsafe Carbon at scale
Incrementally rewrite unsafe Carbon to safe Carbon
Need to separate the two migrations
Making C++ → Carbon also require unsafe → safe magnifies the costs
Especially if not all code or users need to move to safety
At that point, should probably just target Rust
Interesting space, and focus of experiment, is a two-phase approach
Chance to drop the initial cost and scale up overall migration
What will this look like for memory safety? 🤷
Let’s look at a simpler example: null-safety
Null-safety: type-system enforced null pointers
Null pointers are tracked in the type system explicitly
Code must explicitly check for null before dereferencing
Result: no more null pointer bugs
Still bugs, and still null pointers!
Remaining bugs are incorrectly checking or handling null
These are localized, don’t cross APIs, are amenable to static analysis, etc.
C++ references are partially null safe, but not enough
class Employer;
class Employee {
public:
// Problem: can call with a temporary!
Employee(const Employer& employer) : employer_(employer) {}
private:
// Problem: can't copy-assign or move-assign even when desired!
const Employer& employer_;
};
Can extend C++ to add an annotation
// Use a Clang extension to provide nullability.
template <typename T> using NonNull = T _Nonnull;
class Employer;
class Employee {
public:
Employee(const NonNull<Employer*> employer) : employer_(employer) {}
private:
const NonNull<Employer*> employer_;
};
Can extend C++ to add an annotation
// Use a Clang extension to provide nullability.
template <typename T> using NonNull = T _Nonnull;
template <typename T> using Nullable = T _Nullable;
class Employer;
class Employee {
public:
Employee(const NonNull<Employer*> employer) : employer_(employer) {}
void ChangeEmployer(const NonNull<Employer*> new_employer) {
previous_employer_ = employer_;
employer_ = new_employer;
}
private:
const NonNull<Employer*> employer_;
const Nullable<Employer*> previous_employer_ = nullptr;
};
Can even establish a way to shift the default!
// Use a Clang extension to provide nullability.
#pragma clang assume_pointers(Nonnull)
template <typename T> using Nullable = T _Nullable;
class Employer;
class Employee {
public:
Employee(const Employer* employer) : employer_(employer) {}
void ChangeEmployer(const Employer* new_employer) {
previous_employer_ = employer_;
employer_ = new_employer;
}
private:
const Employer* employer_;
const Nullable<Employer*> previous_employer_ = nullptr;
};
Limits of doing this in C++:
A lot of effort and distraction due to wrong defaults
Superficial simplicity, but deep complexity
Smart pointers: unique_ptr, shared_ptr, …
Conversions: const, derived-to-base, …
Expressive limits: can’t overload
And this is a best-case-scenario: Well factored, fairly simple code
Carbon models nullable pointers as optional pointers
Cannot dereference a nullable pointer: it’s not a pointer!
Trivial to make unwrapping syntax make the potential for null obvious
As a full type, fully supported in the type system (overloads, etc)
Can build up smart pointers to consistently incorporate this model
Even in simple cases, we get nicer syntax:
class Employer;
class Employee {
Make(employee: const Employer*) -> Employee {
return {.employee = employee};
}
void ChangeEmployer[addr self: Self*](new_employee: const Employer*) {
// If we allow direct assignment to an optional like C++ does:
self->previous_employer = self->employer;
self->employer = new_employer;
}
private var employer: const Employer*;
// Makes an optional pointer with ``T*?``, defaults to null.
private var previous_employer: const Employer*?;
};
Benefits of the model compound with advanced language features
Pattern matching can be designed for testing & unwrapping
Can layer control flow constructs, as in Rust, that further improve
if let to test and unwrap
let else to test and unwrap with early exit
Migration strategy for null-safety:
Clean up C++ to be close to desired model
May use extensions or annotation systems
May not get full fidelity, coverage, or benefits
Migration strategy for null-safety:
Clean up C++ to be close to desired model
May use extensions or annotation systems
May not get full fidelity, coverage, or benefits
Migrate at-scale from C++ to Carbon
Specifically with any remaining null-unsafety
Migration strategy for null-safety:
Clean up C++ to be close to desired model
May use extensions or annotation systems
May not get full fidelity, coverage, or benefits
Migrate at-scale from C++ to Carbon
Specifically with any remaining null-unsafety
Incrementally refactor towards null-safety
Redesign APIs as needed, leveraging language facilities
This is a pattern that we want to repeat
Reduce the gap and improve migration using C++ annotations & extensions
Large scale migration of code as-is from C++ to Carbon
Incremental and focused improvements with new features