or: an introduction to building simple and not-so-simple compilers with Rust and LLVM
Peter Marheine
2017-05-15
Rust Sydney
Formerly "Low level virtual machine"
Supports both ahead-of-time and just-in-time
Supports many systems:
.. and of course rustc
.
rustc
fn add(x: i32, y: i32) -> i33 {
(x as i33) + (y as i33)
}
target triple = "x86_64-unknown-linux"
define external i33 @add(i32 %x, i32 %y) nounwind readnone {
%xe = sext i32 %x to i33
%ye = sext i32 %y to i33
%result = add i33 %x, %y
ret i33 %result
}
.text
.globl add
add:
movsxd rcx, edi
movsxd rax, esi
add rax, rcx
ret
fn collatz(x: u32) -> bool {
if x == 1 {
return true;
}
let next = if x % 2 == 0 {
x / 2
} else {
(3 * x) + 1
}
collatz(next)
}
collatz(u32)
in IR
define "fastcc" i1 @collatz(i32 %x) {
%finished = icmp eq i32 %x, 1
br i1 %finished, label %Base, label %Continue
Base:
ret i1 1
Continue:
%next = alloca i32
%odd = urem i32 %x, 2
%odd1 = trunc i32 %odd to i1
br i1 %odd1, label %Odd, label %Even
collatz(u32)
continued
Odd:
%halved = udiv i32 %x, 2
store i32 %halved, i32* %next
br label %Recurse
Even:
%larger = mul i32 %x, 3
%larger1 = add i32 %larger, 1
store i32 %larger1, i32* %next
br label %Recurse
Recurse:
%nextval = load i32, i32* %next
%result = musttail call i1 @collatz(i32 %nextval)
ret i1 %result
}
%nextval = %halved
br label %Recurse
Even:
// ...
%nextval = %larger1
Recurse:
// use nextval
Native format for optimization.
Token | Action |
---|---|
m | Print "merth" |
e | Print "\n" |
r | Print " " |
t | Print random [a-z] [0, 13.4) times |
h | Jump to after the next 'h' |
_ | Do nothing |
fn print(s: *mut u8, len: u8)
fn rand_inrange(max: u8) -> u8
These are not supported by any CPU..
Just like C!
_start
/dev/urandom
main
exit(0)
We need to do syscalls
extern fn syscall(nr: i64, ...) -> i64;
define private i64 @syscall2(i64 %nr, i64 %p1, i64 %p)
inlinehint {
%1 = call i64 asm sideeffect "syscall",
"={rax},{rax},{rdi},{rsi}"
(i64 %nr, i64 %p1, i64 %p2)
ret i64 %1
}
extern fn open(path: *mut u8, flags: c_int) -> c_int
@__NR_open = private constant i64 2
define private i32 @open(i8 *%path0, i32 %flags0) {
%nr = load i64, i64* @__NR_open
%path = ptrtoint i8* %path0 to i64
%flags = zext i32 %flags0 to i64
%out0 = call i64 @syscall2(i64 %nr, i64 %path, i64 %flags)
%out = trunc i64 %out0 to i32
ret i32 %out
}
_start
.
declare void @main()
define void @exit(i32 %code) noreturn {}
define void @_start() noreturn {
// initialize RNG
call void @main()
call void @exit(i32 0)
unreachable
}
Feels like C: declare external functions, glue them together.
extern crate llvm_sys as llvm;
fn main() {
unsafe {
LLVM_InitializeNativeTarget();
LLVM_InitializeNativeAsmPrinter();
LLVM_InitializeNativeAsmParser();
let ctxt = LLVMContextCreate();
/* Use ctxt */
LLVMContextDispose(ctxt);
}
}
main
declare void @main()
let main_name = b"main\0".as_ptr() as *const _;
let main_module =
LLVMModuleCreateWithNameInContext(main_name, ctxt);
let ty_void = LLVMVoidType();
let ty_fn_main = LLVMFunctionType(ty_void,
/* ParamTypes */ ptr::null_mut(),
/* ParamCount */ 0,
/* IsVarArg */ 0);
let main_function = LLVMAddFunction(main_module,
main_name,
ty_fn_name);
fn LLVMPrintModuleToFile(M: LLVMModuleRef,
Filename: *const c_char,
ErrorMessage: *mut *mut c_char)
-> LLVMBool
fn LLVMPrintModuleToString(M: LLVMModuleRef) -> *mut c_char
Could we use io::Write
instead?
extern "C" typedef int (*cb_t)(const void *, size_t, void *);
class raw_callback_ostream : public llvm::raw_ostream {
cb_t callback;
void *callback_data;
public:
raw_callback_ostream(cb_t cb, void *cb_data) { /* ... */ }
private:
void write_impl(const char *p, size_t sz) override {
callback(p, sz, callback_data);
offset += sz;
}
};
extern "C" void PrintModuleIR(LLVMModuleRef M,
cb_t cb,
void *cb_data) {
raw_callback_ostream out(cb, cb_data);
out << *llvm::unwrap(M);
}
Write
to callbacks
extern "C" fn module_ir_printer<W>(src: *const u8,
size: libc::size_t,
state: *mut libc::c_void)
-> libc::c_int
where W: std::io::Write {
let (src, out) = unsafe {
(slice::from_raw_parts(src, size),
&mut *(state as *mut W))
};
let _res = out.write_all(src);
0
}
fn dump_module_ir<W>(module: LLVMModuleRef, mut out: W)
where W: std::io::Write {
unsafe {
PrintModuleIR(module,
module_ir_printer::<W>,
&mut out as *mut W as *mut libc::c_void);
}
}
let main_function = LLVMAddFunction(main_module,
main_name,
ty_fn_name);
dump_module_ir(main_module, std::io::stderr());
; ModuleID = 'main'
source_filename = "main"
declare void @main()
🎉
Builder
s
let bb_main =
LLVMAppendBasicBlockInContext(ctxt,
main_function,
b"\0".as_ptr() as *const _);
let b = LLVMCreateBuilderInContext(ctxt);
LLVMPositionBuilderAtEnd(b, bb_main);
LLVMBuildRetVoid(b);
LLVMDisposeBuilder(b);
define void @main() {
ret void
}
🎉🎉
_start
and print
functions
for x86_64 Linuxrand_inrange
function and RNG implmain
function
llc linux-x86_64.ll
as -o linux-x86_64.o linux-x86_64.s
llc random.ll
as -o random.o random.s
llc main.ll
as -o main.o main.s
ld main.o linux-x86_64.o random.o
./a.out
The generated code is inefficient!
_start: # @_start
sub rsp, 24
.Lcfi4:
xor esi, esi
movabs rdi, .Lurandom
lea rax, [rsp + 8]
mov qword ptr [rsp], rax # 8-byte Spill
call .Lopen
mov esi, 16
mov edx, esi
mov edi, eax
mov rsi, qword ptr [rsp] # 8-byte Reload
call .Lread
cmp rax, 16
jne .LBB7_2
llc -O2
llvm-link *.ll -o a.bc
llc -O2 a.bc
, etcWe can do better (but not right now)
let ty_void = LLVMVoidTypeInContext(ctxt);
let ty_i8 = LLVMIntTypeInContext(ctxt, 8);
let ty_i8p = LLVMPointerType(ty_i8, 0);
let param_types = [ty_i8p, ty_i8];
let ty_rt_print = LLVMFunctionType(
ty_void,
param_types.as_ptr() as *mut _,
param_types.len(),
0);
let rt_print = LLVMAddFunction(
llmod,
b"print\0".as_ptr() as *const _,
ty_rt_print);
declare void print(i8*, i8)
Other functions left as an exercise for the reader.
LLVMVoidType
↔ LLVMVoidTypeInContext
Implicit global context ↔ explicit
LLVMVerifyModule
LLVM_ENABLE_ASSERTIONS
main
let code: Iterator<char>;
while let Some(c) = code.next() {
match c {
'm' => { /* Print "merth" */ },
'e' => { /* Print newline */ },
'r' => { /* Print space */ },
't' => { /* Random string */ },
'h' => { loop { match code.next() {
Some('h') | None => break,
_ => continue,
} } },
_ => { /* Do nothing */ },
}
}
m
→ print("merth", 5)
let b: Builder;
let v_5i8 = LLVMConstInt(ty_i8, 5, 0);
let v_merth = LLVMBuildGlobalStringPtr(
b,
b"merth\0".as_ptr() as *const _,
b"MERTH\0".as_ptr() as *const _);
LLVMBuildCall(b,
rt_print,
[v_merth, v_5i8].as_ptr() as *mut _,
2,
b"\0".as_ptr() as *const _);
e
→ print("\n", 1)
let v_newline = ptr_to_const(
llmod,
ty_i8,
LLVMConstInt(ty_i8, 10, 0),
b"NEWLINE\0");
LLVMBuildCall(b,
rt_print,
[v_newline, v_1i8].as_ptr() as *mut _,
2,
b"\0".as_ptr() as *const _);
ptr_to_const
fn ptr_to_const(llmod: LLVMModuleRef,
ty: LLVMTypeRef,
value: LLVMValueRef,
name: &[u8])
-> LLVMValueRef {
let g = LLVMAddGlobal(llmod, ty, name.as_ptr() as *const _);
LLVMSetInitializer(g, value);
LLVMSetGlobalConstant(g, 1 /* true */);
LLVMConstInBoundsGEP(g, [v_const_0i8].as_ptr() as *mut _, 0)
}
GlobalStringPtr
This space intentionally left blank.
rand_inrange
+ rand_string
let len = rand_inrange(13.4 as i8 + 1);
rand_string(len);
let v_len = LLVMBuildCall(
b,
rt_rand_inrange,
[LLVMConstAdd(
LLVMConstFPToUI(
LLVMConstReal(LLVMFloatTypeInContext(ctxt),
13.4),
ty_i8),
v_1i8)
].as_ptr() as *mut _,
1,
b"\0".as_ptr() as *const _);
LLVMBuildCall(rt_rand_string, [v_len]);
FP is slow, do it all as const for speed.
static RT_SOURCES: &'static [&'static [u8]] = &[
include_bytes!("../runtime/random.ll")
];
let mbuf = LLVMCreateMemoryBufferWithMemoryRange(
code.as_ptr() as *const _,
code.len() - 1 as libc::size_t,
b"\0".as_ptr() as *const _,
/* RequiresNullTerminator */ 1);
let module: LLVMModuleRef;
let err_msg: *mut i8;
LLVMParseIRInContext(ctxt, mbuf, &mut module, &mut err_msg);
/* Error checking here */
static RT_TARGET_SOURCE: phf::Map<
&'static str,
&'static [u8]
> = ...;
Use phf
for excessively efficient
lookup tables
(built at compile-time)
let target = LLVMGetDefaultTargetTriple();
RT_TARGET_SOURCES.get(target);
let main_module: LLVMModuleRef;
for module in rt_modules {
// Destroys module
LLVMLinkModules2(main_module, module);
}
Easy as llvm-link
let triple = "x86_64-unknown-linux";
let target: LLVMTargetRef;
LLVMGetTargetFromTriple(triple, &mut target, ptr::null_mut());
let tm = LLVMCreateTargetMachine(target,
triple,
"", "",
LLVMCodeGenLevelAggressive,
LLVMRelocDefault,
LLVMCodeModelDefault);
Get a target, make a target machine
let mbuf: LLVMMemoryBufferRef;
LLVMTargetMachineEmitToMemoryBuffer(tm, llmod, ty,
ptr::null_mut(),
&mut mbuf);
let mut w: io::Write;
let code: &[u8] = slice::from_raw_parts(
LLVMGetBufferStart(mbuf),
LLVMGetBufferSize(mbuf)
);
w.write_all(code);
LLVMDisposeMemoryBuffer(mbuf);
Emit code to memory,
write to a file.
Not pictured: linker invocation
(as subprocess)
let fpm = LLVMCreateFunctionPassManagerForModule(llmod);
let mpm = LLVMCreatePassManager();
let pmb = LLVMPassManagerBuilderCreate();
LLVMPassManagerBuilderSetOptLevel(pmb, 2);
LLVMPassManagerBuilderUseInlinerWithThreshold(pmb, 225);
LLVMPassManagerBuilderPopulateModulePassManager(pmb, mpm);
LLVMPassManagerBuilderPopulateFunctionPassManager(pmb, fpm);
LLVMPassManagerBuilderDispose(pmb);
Pass manager wrangles optimizer passes
Including: DCE, GVN, constant propagation,
LICM, loop unrolling, inlining..
LLVMInitializeFunctionPassManager(fpm);
let mut func = LLVMGetFirstFunction(llmod);
while func != ptr::null_mut() {
LLVMRunFunctionPassManager(fpm, func);
func = LLVMGetNextFunction(func);
}
LLVMFinalizeFunctionPassManager(fpm);
Iterate over functions, optimizing each
LLVMRunPassManager(mpm, llmod);
let mut func = LLVMGetFirstFunction(llmod);
while func != ptr::null_mut() {
let func_name = CStr::from_ptr(LLVMGetValueName(func));
if func_name != "_start" {
LLVMSetLinkage(func, LLVMLinkage::LLVMPrivateLinkage);
}
func = LLVMGetNextFunction(func);
}
llvm-sys
merthc
Ask me questions now.