From 75cfe661c5adc8df69ef08078e1582dbe399e274 Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Wed, 28 Sep 2022 13:42:13 +0900 Subject: [PATCH 1/4] YJIT: Interleave inline and outlined code blocks Co-authored-by: Alan Wu Co-authored-by: Maxime Chevalier-Boisvert --- yjit/src/asm/mod.rs | 225 +++++++++++++++++++++++++++------ yjit/src/backend/arm64/mod.rs | 103 +++++++++------ yjit/src/backend/ir.rs | 17 +-- yjit/src/backend/tests.rs | 6 +- yjit/src/backend/x86_64/mod.rs | 32 ++++- yjit/src/codegen.rs | 80 +++++------- yjit/src/core.rs | 12 +- yjit/src/options.rs | 36 +++++- yjit/src/utils.rs | 9 +- 9 files changed, 360 insertions(+), 160 deletions(-) diff --git a/yjit/src/asm/mod.rs b/yjit/src/asm/mod.rs index 8356201ba6a2b8..441ce02d0920d5 100644 --- a/yjit/src/asm/mod.rs +++ b/yjit/src/asm/mod.rs @@ -1,9 +1,20 @@ +use std::cell::RefCell; +use std::cmp; use std::fmt; use std::mem; +use std::rc::Rc; +#[cfg(target_arch = "x86_64")] +use crate::backend::x86_64::JMP_PTR_BYTES; +#[cfg(target_arch = "aarch64")] +use crate::backend::arm64::JMP_PTR_BYTES; +use crate::backend::ir::Assembler; +use crate::backend::ir::Target; +use crate::virtualmem::WriteError; #[cfg(feature = "asm_comments")] use std::collections::BTreeMap; +use crate::codegen::CodegenGlobals; use crate::virtualmem::{VirtualMem, CodePtr}; // Lots of manual vertical alignment in there that rustfmt doesn't handle well. @@ -17,7 +28,8 @@ pub mod arm64; // /// Reference to an ASM label -struct LabelRef { +#[derive(Clone)] +pub struct LabelRef { // Position in the code block where the label reference exists pos: usize, @@ -36,7 +48,7 @@ struct LabelRef { /// Block of memory into which instructions can be assembled pub struct CodeBlock { // Memory for storing the encoded instructions - mem_block: VirtualMem, + mem_block: Rc>, // Memory block size mem_size: usize, @@ -44,6 +56,12 @@ pub struct CodeBlock { // Current writing position write_pos: usize, + // Size of a code page (inlined + outlined) + page_size: usize, + + // Size reserved for writing a jump to the next page + page_end_reserve: usize, + // Table of registered label addresses label_addrs: Vec, @@ -58,7 +76,6 @@ pub struct CodeBlock { asm_comments: BTreeMap>, // True for OutlinedCb - #[cfg(feature = "disasm")] pub outlined: bool, // Set if the CodeBlock is unable to output some instructions, @@ -67,27 +84,143 @@ pub struct CodeBlock { dropped_bytes: bool, } +/// Set of CodeBlock label states. Used for recovering the previous state. +pub struct LabelState { + label_addrs: Vec, + label_names: Vec, + label_refs: Vec, +} + impl CodeBlock { /// Make a new CodeBlock - pub fn new(mem_block: VirtualMem, outlined: bool) -> Self { - Self { - mem_size: mem_block.virtual_region_size(), + pub fn new(mem_block: Rc>, page_size: usize, outlined: bool) -> Self { + let mem_size = mem_block.borrow().virtual_region_size(); + let mut cb = Self { mem_block, + mem_size, write_pos: 0, + page_size, + page_end_reserve: JMP_PTR_BYTES, label_addrs: Vec::new(), label_names: Vec::new(), label_refs: Vec::new(), #[cfg(feature = "asm_comments")] asm_comments: BTreeMap::new(), - #[cfg(feature = "disasm")] outlined, dropped_bytes: false, + }; + cb.write_pos = cb.page_start(); + cb + } + + /// Move the CodeBlock to the next page. If it's on the furthest page, + /// move the other CodeBlock to the next page as well. + pub fn next_page(&mut self, base_ptr: CodePtr, jmp_ptr: F) -> bool { + let old_write_ptr = self.get_write_ptr(); + self.set_write_ptr(base_ptr); + self.without_page_end_reserve(|cb| assert!(cb.has_capacity(JMP_PTR_BYTES))); + + // Move self to the next page + let next_page_idx = self.write_pos / self.page_size + 1; + if !self.set_page(next_page_idx, &jmp_ptr) { + self.set_write_ptr(old_write_ptr); // rollback if there are no more pages + return false; + } + + // Move the other CodeBlock to the same page if it'S on the furthest page + self.other_cb().unwrap().set_page(next_page_idx, &jmp_ptr); + + return !self.dropped_bytes; + } + + /// Move the CodeBlock to page_idx only if it's not going backwards. + fn set_page(&mut self, page_idx: usize, jmp_ptr: &F) -> bool { + // Do not move the CodeBlock if page_idx points to an old position so that this + // CodeBlock will not overwrite existing code. + // TODO: We could move it to the last write_pos on that page if we keep track of + // the past write_pos of each page. + let mut dst_pos = self.page_size * page_idx + self.page_start(); + if self.page_size * page_idx < self.mem_size && self.write_pos < dst_pos { + // Reset dropped_bytes + self.dropped_bytes = false; + + // Convert dst_pos to dst_ptr + let src_pos = self.write_pos; + self.write_pos = dst_pos; + let dst_ptr = self.get_write_ptr(); + self.write_pos = src_pos; + + // Generate jmp_ptr from src_pos to dst_pos + self.without_page_end_reserve(|cb| { + cb.add_comment("jump to next page"); + jmp_ptr(cb, dst_ptr); + assert!(!cb.has_dropped_bytes()); + }); + + // Start the next code from dst_pos + self.write_pos = dst_pos; } + !self.dropped_bytes + } + + /// write_pos of the current page start + pub fn page_start_pos(&self) -> usize { + self.get_write_pos() / self.page_size * self.page_size + self.page_start() + } + + /// Offset of each page where CodeBlock should start writing + pub fn page_start(&self) -> usize { + let mut start = if self.inline() { + 0 + } else { + self.page_size / 2 + }; + if cfg!(debug_assertions) && !cfg!(test) { + // Leave illegal instructions at the beginning of each page to assert + // we're not accidentally crossing page boundaries. + start += JMP_PTR_BYTES; + } + start + } + + /// Offset of each page where CodeBlock should stop writing (exclusive) + pub fn page_end(&self) -> usize { + let page_end = if self.inline() { + self.page_size / 2 + } else { + self.page_size + }; + page_end - self.page_end_reserve // reserve space to jump to the next page + } + + /// Call a given function with page_end_reserve = 0 + pub fn without_page_end_reserve(&mut self, block: F) { + let old_page_end_reserve = self.page_end_reserve; + self.page_end_reserve = 0; + block(self); + self.page_end_reserve = old_page_end_reserve; + } + + /// Return the address ranges of a given address range that this CodeBlock can write. + pub fn writable_addrs(&self, start_ptr: CodePtr, end_ptr: CodePtr) -> Vec<(usize, usize)> { + let mut addrs = vec![]; + let mut start = start_ptr.raw_ptr() as usize; + let codeblock_end = self.get_ptr(self.get_mem_size()).raw_ptr() as usize; + let end = std::cmp::min(end_ptr.raw_ptr() as usize, codeblock_end); + while start < end { + let current_page = start / self.page_size * self.page_size; + let page_end = std::cmp::min(end, current_page + self.page_end()) as usize; + addrs.push((start, page_end)); + start = current_page + self.page_size + self.page_start(); + } + addrs } /// Check if this code block has sufficient remaining capacity pub fn has_capacity(&self, num_bytes: usize) -> bool { - self.write_pos + num_bytes < self.mem_size + let page_offset = self.write_pos % self.page_size; + let capacity = self.page_end().saturating_sub(page_offset); + num_bytes <= capacity } /// Add an assembly comment if the feature is on. @@ -121,8 +254,8 @@ impl CodeBlock { self.write_pos } - pub fn get_mem(&mut self) -> &mut VirtualMem { - &mut self.mem_block + pub fn write_mem(&self, write_ptr: CodePtr, byte: u8) -> Result<(), WriteError> { + self.mem_block.borrow_mut().write_byte(write_ptr, byte) } // Set the current write position @@ -134,49 +267,31 @@ impl CodeBlock { self.write_pos = pos; } - // Align the current write pointer to a multiple of bytes - pub fn align_pos(&mut self, multiple: u32) { - // Compute the alignment boundary that is lower or equal - // Do everything with usize - let multiple: usize = multiple.try_into().unwrap(); - let pos = self.get_write_ptr().raw_ptr() as usize; - let remainder = pos % multiple; - let prev_aligned = pos - remainder; - - if prev_aligned == pos { - // Already aligned so do nothing - } else { - // Align by advancing - let pad = multiple - remainder; - self.set_pos(self.get_write_pos() + pad); - } - } - // Set the current write position from a pointer pub fn set_write_ptr(&mut self, code_ptr: CodePtr) { - let pos = code_ptr.into_usize() - self.mem_block.start_ptr().into_usize(); + let pos = code_ptr.into_usize() - self.mem_block.borrow().start_ptr().into_usize(); self.set_pos(pos); } /// Get a (possibly dangling) direct pointer into the executable memory block pub fn get_ptr(&self, offset: usize) -> CodePtr { - self.mem_block.start_ptr().add_bytes(offset) + self.mem_block.borrow().start_ptr().add_bytes(offset) } /// Get a (possibly dangling) direct pointer to the current write position - pub fn get_write_ptr(&mut self) -> CodePtr { + pub fn get_write_ptr(&self) -> CodePtr { self.get_ptr(self.write_pos) } /// Write a single byte at the current position. pub fn write_byte(&mut self, byte: u8) { let write_ptr = self.get_write_ptr(); - - if self.mem_block.write_byte(write_ptr, byte).is_ok() { - self.write_pos += 1; - } else { + if !self.has_capacity(1) || self.mem_block.borrow_mut().write_byte(write_ptr, byte).is_err() { self.dropped_bytes = true; } + + // Always advance write_pos since arm64 PadEntryExit needs this to stop the loop. + self.write_pos += 1; } /// Write multiple bytes starting from the current position. @@ -242,6 +357,9 @@ impl CodeBlock { self.label_refs.push(LabelRef { pos: self.write_pos, label_idx, num_bytes, encode }); // Move past however many bytes the instruction takes up + if !self.has_capacity(num_bytes) { + self.dropped_bytes = true; // retry emitting the Insn after next_page + } self.write_pos += num_bytes; } @@ -274,14 +392,43 @@ impl CodeBlock { assert!(self.label_refs.is_empty()); } + pub fn clear_labels(&mut self) { + self.label_addrs.clear(); + self.label_names.clear(); + self.label_refs.clear(); + } + + pub fn get_label_state(&self) -> LabelState { + LabelState { + label_addrs: self.label_addrs.clone(), + label_names: self.label_names.clone(), + label_refs: self.label_refs.clone(), + } + } + + pub fn set_label_state(&mut self, state: LabelState) { + self.label_addrs = state.label_addrs; + self.label_names = state.label_names; + self.label_refs = state.label_refs; + } + pub fn mark_all_executable(&mut self) { - self.mem_block.mark_all_executable(); + self.mem_block.borrow_mut().mark_all_executable(); } - #[cfg(feature = "disasm")] pub fn inline(&self) -> bool { !self.outlined } + + pub fn other_cb(&self) -> Option<&'static mut Self> { + if !CodegenGlobals::has_instance() { + None + } else if self.inline() { + Some(CodegenGlobals::get_outlined_cb().unwrap()) + } else { + Some(CodegenGlobals::get_inline_cb()) + } + } } #[cfg(test)] @@ -295,7 +442,7 @@ impl CodeBlock { let mem_start: *const u8 = alloc.mem_start(); let virt_mem = VirtualMem::new(alloc, 1, mem_start as *mut u8, mem_size); - Self::new(virt_mem, false) + Self::new(Rc::new(RefCell::new(virt_mem)), 16 * 1024, false) } } @@ -303,7 +450,7 @@ impl CodeBlock { impl fmt::LowerHex for CodeBlock { fn fmt(&self, fmtr: &mut fmt::Formatter) -> fmt::Result { for pos in 0..self.write_pos { - let byte = unsafe { self.mem_block.start_ptr().raw_ptr().add(pos).read() }; + let byte = unsafe { self.mem_block.borrow().start_ptr().raw_ptr().add(pos).read() }; fmtr.write_fmt(format_args!("{:02x}", byte))?; } Ok(()) diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs index 0180737d4d640b..52103aee359f3c 100644 --- a/yjit/src/backend/arm64/mod.rs +++ b/yjit/src/backend/arm64/mod.rs @@ -4,7 +4,7 @@ use crate::asm::{CodeBlock}; use crate::asm::arm64::*; -use crate::codegen::{JITState}; +use crate::codegen::{JITState, CodegenGlobals}; use crate::cruby::*; use crate::backend::ir::*; use crate::virtualmem::CodePtr; @@ -36,6 +36,9 @@ pub const _C_RET_OPND: Opnd = Opnd::Reg(X0_REG); pub const C_SP_REG: A64Opnd = X31; pub const C_SP_STEP: i32 = 16; +// The number of bytes that are generated by emit_jmp_ptr +pub const JMP_PTR_BYTES: usize = 20; + /// Map Opnd to A64Opnd impl From for A64Opnd { fn from(opnd: Opnd) -> Self { @@ -567,7 +570,7 @@ impl Assembler /// Emit the required instructions to load the given value into the /// given register. Our goal here is to use as few instructions as /// possible to get this value into the register. - fn emit_load_value(cb: &mut CodeBlock, rd: A64Opnd, value: u64) -> i32 { + fn emit_load_value(cb: &mut CodeBlock, rd: A64Opnd, value: u64) -> usize { let mut current = value; if current <= 0xffff { @@ -680,6 +683,31 @@ impl Assembler ldr_post(cb, opnd, A64Opnd::new_mem(64, C_SP_REG, C_SP_STEP)); } + fn emit_jmp_ptr(cb: &mut CodeBlock, dst_ptr: CodePtr) { + let src_addr = cb.get_write_ptr().into_i64(); + let dst_addr = dst_ptr.into_i64(); + + // If the offset is short enough, then we'll use the + // branch instruction. Otherwise, we'll move the + // destination into a register and use the branch + // register instruction. + let num_insns = if b_offset_fits_bits((dst_addr - src_addr) / 4) { + b(cb, InstructionOffset::from_bytes((dst_addr - src_addr) as i32)); + 1 + } else { + let num_insns = emit_load_value(cb, Assembler::SCRATCH0, dst_addr as u64); + br(cb, Assembler::SCRATCH0); + num_insns + 1 + }; + + // Make sure it's always a consistent number of + // instructions in case it gets patched and has to + // use the other branch. + for _ in num_insns..(JMP_PTR_BYTES / 4) { + nop(cb); + } + } + // dbg!(&self.insns); // List of GC offsets @@ -687,7 +715,13 @@ impl Assembler // For each instruction let start_write_pos = cb.get_write_pos(); - for insn in &self.insns { + let mut i: usize = 0; + while let Some(insn) = self.insns.get(i) { + let src_ptr = cb.get_write_ptr(); + let had_dropped_bytes = cb.has_dropped_bytes(); + let old_label_state = cb.get_label_state(); + let mut insn_gc_offsets: Vec = Vec::new(); + match insn { Insn::Comment(text) => { if cfg!(feature = "asm_comments") { @@ -796,7 +830,7 @@ impl Assembler cb.write_bytes(&value.as_u64().to_le_bytes()); let ptr_offset: u32 = (cb.get_write_pos() as u32) - (SIZEOF_VALUE as u32); - gc_offsets.push(ptr_offset); + insn_gc_offsets.push(ptr_offset); }, Opnd::None => { unreachable!("Attempted to load from None operand"); @@ -904,28 +938,7 @@ impl Assembler Insn::Jmp(target) => { match target { Target::CodePtr(dst_ptr) => { - let src_addr = cb.get_write_ptr().into_i64(); - let dst_addr = dst_ptr.into_i64(); - - // If the offset is short enough, then we'll use the - // branch instruction. Otherwise, we'll move the - // destination into a register and use the branch - // register instruction. - let num_insns = if b_offset_fits_bits((dst_addr - src_addr) / 4) { - b(cb, InstructionOffset::from_bytes((dst_addr - src_addr) as i32)); - 0 - } else { - let num_insns = emit_load_value(cb, Self::SCRATCH0, dst_addr as u64); - br(cb, Self::SCRATCH0); - num_insns - }; - - // Make sure it's always a consistent number of - // instructions in case it gets patched and has to - // use the other branch. - for _ in num_insns..4 { - nop(cb); - } + emit_jmp_ptr(cb, *dst_ptr); }, Target::Label(label_idx) => { // Here we're going to save enough space for @@ -997,13 +1010,21 @@ impl Assembler csel(cb, out.into(), truthy.into(), falsy.into(), Condition::GE); } Insn::LiveReg { .. } => (), // just a reg alloc signal, no code - Insn::PadEntryExit => { - let jmp_len = 5 * 4; // Op::Jmp may emit 5 instructions - while (cb.get_write_pos() - start_write_pos) < jmp_len { + Insn::PadInvalPatch => { + while (cb.get_write_pos().saturating_sub(std::cmp::max(start_write_pos, cb.page_start_pos()))) < JMP_PTR_BYTES { nop(cb); } } }; + + // On failure, jump to the next page and retry the current insn + if !had_dropped_bytes && cb.has_dropped_bytes() && cb.next_page(src_ptr, emit_jmp_ptr) { + // Reset cb states before retrying the current Insn + cb.set_label_state(old_label_state); + } else { + i += 1; + gc_offsets.append(&mut insn_gc_offsets); + } } gc_offsets @@ -1020,21 +1041,23 @@ impl Assembler assert!(label_idx == idx); } - let start_write_pos = cb.get_write_pos(); + let start_ptr = cb.get_write_ptr(); let gc_offsets = asm.arm64_emit(cb); - if !cb.has_dropped_bytes() { + if cb.has_dropped_bytes() { + cb.clear_labels(); + } else { cb.link_labels(); - } - // Invalidate icache for newly written out region so we don't run stale code. - #[cfg(not(test))] - { - let start = cb.get_ptr(start_write_pos).raw_ptr(); - let write_ptr = cb.get_write_ptr().raw_ptr(); - let codeblock_end = cb.get_ptr(cb.get_mem_size()).raw_ptr(); - let end = std::cmp::min(write_ptr, codeblock_end); - unsafe { rb_yjit_icache_invalidate(start as _, end as _) }; + // Invalidate icache for newly written out region so we don't run stale code. + // It should invalidate only the code ranges of the current cb because the code + // ranges of the other cb might have a memory region that is still PROT_NONE. + #[cfg(not(test))] + cb.without_page_end_reserve(|cb| { + for (start, end) in cb.writable_addrs(start_ptr, cb.get_write_ptr()) { + unsafe { rb_yjit_icache_invalidate(start as _, end as _) }; + } + }); } gc_offsets diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs index ba7e3721884340..e11235aec964b4 100644 --- a/yjit/src/backend/ir.rs +++ b/yjit/src/backend/ir.rs @@ -5,6 +5,7 @@ use std::cell::Cell; use std::fmt; use std::convert::From; +use std::io::Write; use std::mem::take; use crate::cruby::{VALUE}; use crate::virtualmem::{CodePtr}; @@ -433,9 +434,9 @@ pub enum Insn { // binary OR operation. Or { left: Opnd, right: Opnd, out: Opnd }, - /// Pad nop instructions to accomodate Op::Jmp in case the block is - /// invalidated. - PadEntryExit, + /// Pad nop instructions to accomodate Op::Jmp in case the block or the insn + /// is invalidated. + PadInvalPatch, // Mark a position in the generated code PosMarker(PosMarkerFn), @@ -521,7 +522,7 @@ impl Insn { Insn::Mov { .. } => "Mov", Insn::Not { .. } => "Not", Insn::Or { .. } => "Or", - Insn::PadEntryExit => "PadEntryExit", + Insn::PadInvalPatch => "PadEntryExit", Insn::PosMarker(_) => "PosMarker", Insn::RShift { .. } => "RShift", Insn::Store { .. } => "Store", @@ -658,7 +659,7 @@ impl<'a> Iterator for InsnOpndIterator<'a> { Insn::Jz(_) | Insn::Label(_) | Insn::LeaLabel { .. } | - Insn::PadEntryExit | + Insn::PadInvalPatch | Insn::PosMarker(_) => None, Insn::CPopInto(opnd) | Insn::CPush(opnd) | @@ -755,7 +756,7 @@ impl<'a> InsnOpndMutIterator<'a> { Insn::Jz(_) | Insn::Label(_) | Insn::LeaLabel { .. } | - Insn::PadEntryExit | + Insn::PadInvalPatch | Insn::PosMarker(_) => None, Insn::CPopInto(opnd) | Insn::CPush(opnd) | @@ -1474,8 +1475,8 @@ impl Assembler { out } - pub fn pad_entry_exit(&mut self) { - self.push_insn(Insn::PadEntryExit); + pub fn pad_inval_patch(&mut self) { + self.push_insn(Insn::PadInvalPatch); } //pub fn pos_marker(&mut self, marker_fn: F) diff --git a/yjit/src/backend/tests.rs b/yjit/src/backend/tests.rs index 1df726c4686e81..6bdbb09d941664 100644 --- a/yjit/src/backend/tests.rs +++ b/yjit/src/backend/tests.rs @@ -220,7 +220,7 @@ fn test_jcc_ptr() { let (mut asm, mut cb) = setup_asm(); - let side_exit = Target::CodePtr((5 as *mut u8).into()); + let side_exit = Target::CodePtr(((cb.get_write_ptr().raw_ptr() as usize + 4) as *mut u8).into()); let not_mask = asm.not(Opnd::mem(32, EC, RUBY_OFFSET_EC_INTERRUPT_MASK)); asm.test( Opnd::mem(32, EC, RUBY_OFFSET_EC_INTERRUPT_FLAG), @@ -237,7 +237,7 @@ fn test_jmp_ptr() { let (mut asm, mut cb) = setup_asm(); - let stub = Target::CodePtr((5 as *mut u8).into()); + let stub = Target::CodePtr(((cb.get_write_ptr().raw_ptr() as usize + 4) as *mut u8).into()); asm.jmp(stub); asm.compile_with_num_regs(&mut cb, 0); @@ -248,7 +248,7 @@ fn test_jo() { let (mut asm, mut cb) = setup_asm(); - let side_exit = Target::CodePtr((5 as *mut u8).into()); + let side_exit = Target::CodePtr(((cb.get_write_ptr().raw_ptr() as usize + 4) as *mut u8).into()); let arg1 = Opnd::mem(64, SP, 0); let arg0 = Opnd::mem(64, SP, 8); diff --git a/yjit/src/backend/x86_64/mod.rs b/yjit/src/backend/x86_64/mod.rs index 2f770c2eac7923..6d3bb481de0f0f 100644 --- a/yjit/src/backend/x86_64/mod.rs +++ b/yjit/src/backend/x86_64/mod.rs @@ -9,6 +9,7 @@ use crate::asm::x86_64::*; use crate::codegen::{JITState}; use crate::cruby::*; use crate::backend::ir::*; +use crate::codegen::CodegenGlobals; // Use the x86 register type for this platform pub type Reg = X86Reg; @@ -32,6 +33,9 @@ pub const _C_ARG_OPNDS: [Opnd; 6] = [ pub const C_RET_REG: Reg = RAX_REG; pub const _C_RET_OPND: Opnd = Opnd::Reg(RAX_REG); +// The number of bytes that are generated by jmp_ptr +pub const JMP_PTR_BYTES: usize = 6; + /// Map Opnd to X86Opnd impl From for X86Opnd { fn from(opnd: Opnd) -> Self { @@ -372,7 +376,13 @@ impl Assembler // For each instruction let start_write_pos = cb.get_write_pos(); - for insn in &self.insns { + let mut i: usize = 0; + while let Some(insn) = self.insns.get(i) { + let src_ptr = cb.get_write_ptr(); + let had_dropped_bytes = cb.has_dropped_bytes(); + let old_label_state = cb.get_label_state(); + let mut insn_gc_offsets: Vec = Vec::new(); + match insn { Insn::Comment(text) => { if cfg!(feature = "asm_comments") { @@ -458,7 +468,7 @@ impl Assembler if !val.special_const_p() { // The pointer immediate is encoded as the last part of the mov written out let ptr_offset: u32 = (cb.get_write_pos() as u32) - (SIZEOF_VALUE as u32); - gc_offsets.push(ptr_offset); + insn_gc_offsets.push(ptr_offset); } } }, @@ -648,11 +658,10 @@ impl Assembler emit_csel(cb, *truthy, *falsy, *out, cmovl); } Insn::LiveReg { .. } => (), // just a reg alloc signal, no code - Insn::PadEntryExit => { - // We assume that our Op::Jmp usage that gets invalidated is <= 5 - let code_size: u32 = (cb.get_write_pos() - start_write_pos).try_into().unwrap(); - if code_size < 5 { - nop(cb, 5 - code_size); + Insn::PadInvalPatch => { + let code_size = cb.get_write_pos().saturating_sub(std::cmp::max(start_write_pos, cb.page_start_pos())); + if code_size < JMP_PTR_BYTES { + nop(cb, (JMP_PTR_BYTES - code_size) as u32); } } @@ -662,6 +671,15 @@ impl Assembler // instructions can never make it to the emit stage. _ => panic!("unsupported instruction passed to x86 backend: {:?}", insn) }; + + // On failure, jump to the next page and retry the current insn + if !had_dropped_bytes && cb.has_dropped_bytes() && cb.next_page(src_ptr, jmp_ptr) { + // Reset cb states before retrying the current Insn + cb.set_label_state(old_label_state); + } else { + i += 1; + gc_offsets.append(&mut insn_gc_offsets); + } } gc_offsets diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index f8379a71596984..a12e4751d3aa49 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -13,13 +13,15 @@ use crate::utils::*; use CodegenStatus::*; use InsnOpnd::*; - +use std::cell::RefCell; +use std::cell::RefMut; use std::cmp; use std::collections::HashMap; use std::ffi::CStr; use std::mem::{self, size_of}; use std::os::raw::c_uint; use std::ptr; +use std::rc::Rc; use std::slice; pub use crate::virtualmem::CodePtr; @@ -296,6 +298,7 @@ fn jit_prepare_routine_call( /// Record the current codeblock write position for rewriting into a jump into /// the outlined block later. Used to implement global code invalidation. fn record_global_inval_patch(asm: &mut Assembler, outline_block_target_pos: CodePtr) { + asm.pad_inval_patch(); asm.pos_marker(move |code_ptr| { CodegenGlobals::push_global_inval_patch(code_ptr, outline_block_target_pos); }); @@ -606,19 +609,6 @@ fn gen_pc_guard(asm: &mut Assembler, iseq: IseqPtr, insn_idx: u32) { /// Compile an interpreter entry block to be inserted into an iseq /// Returns None if compilation fails. pub fn gen_entry_prologue(cb: &mut CodeBlock, iseq: IseqPtr, insn_idx: u32) -> Option { - const MAX_PROLOGUE_SIZE: usize = 1024; - - // Check if we have enough executable memory - if !cb.has_capacity(MAX_PROLOGUE_SIZE) { - return None; - } - - let old_write_pos = cb.get_write_pos(); - - // TODO: figure out if this is actually beneficial for performance - // Align the current write position to cache line boundaries - cb.align_pos(64); - let code_ptr = cb.get_write_ptr(); let mut asm = Assembler::new(); @@ -660,10 +650,11 @@ pub fn gen_entry_prologue(cb: &mut CodeBlock, iseq: IseqPtr, insn_idx: u32) -> O asm.compile(cb); - // Verify MAX_PROLOGUE_SIZE - assert!(cb.get_write_pos() - old_write_pos <= MAX_PROLOGUE_SIZE); - - return Some(code_ptr); + if (cb.has_dropped_bytes()) { + None + } else { + Some(code_ptr) + } } // Generate code to check for interrupts and take a side-exit. @@ -853,7 +844,7 @@ pub fn gen_single_block( { let mut block = jit.block.borrow_mut(); if block.entry_exit.is_some() { - asm.pad_entry_exit(); + asm.pad_inval_patch(); } // Compile code into the code block @@ -6538,29 +6529,13 @@ static mut CODEGEN_GLOBALS: Option = None; impl CodegenGlobals { /// Initialize the codegen globals pub fn init() { - // Executable memory size in MiB - let mem_size = get_option!(exec_mem_size) * 1024 * 1024; + // Executable memory and code page size in bytes + let mem_size = get_option!(exec_mem_size); + let code_page_size = get_option!(code_page_size); #[cfg(not(test))] let (mut cb, mut ocb) = { - // TODO(alan): we can error more gracefully when the user gives - // --yjit-exec-mem=absurdly-large-number - // - // 2 GiB. It's likely a bug if we generate this much code. - const MAX_BUFFER_SIZE: usize = 2 * 1024 * 1024 * 1024; - assert!(mem_size <= MAX_BUFFER_SIZE); - let mem_size_u32 = mem_size as u32; - let half_size = mem_size / 2; - - let page_size = unsafe { rb_yjit_get_page_size() }; - let assert_page_aligned = |ptr| assert_eq!( - 0, - ptr as usize % page_size.as_usize(), - "Start of virtual address block should be page-aligned", - ); - - let virt_block: *mut u8 = unsafe { rb_yjit_reserve_addr_space(mem_size_u32) }; - let second_half = virt_block.wrapping_add(half_size); + let virt_block: *mut u8 = unsafe { rb_yjit_reserve_addr_space(mem_size as u32) }; // Memory protection syscalls need page-aligned addresses, so check it here. Assuming // `virt_block` is page-aligned, `second_half` should be page-aligned as long as the @@ -6569,26 +6544,25 @@ impl CodegenGlobals { // // Basically, we don't support x86-64 2MiB and 1GiB pages. ARMv8 can do up to 64KiB // (2¹⁶ bytes) pages, which should be fine. 4KiB pages seem to be the most popular though. - assert_page_aligned(virt_block); - assert_page_aligned(second_half); + let page_size = unsafe { rb_yjit_get_page_size() }; + assert_eq!( + virt_block as usize % page_size.as_usize(), 0, + "Start of virtual address block should be page-aligned", + ); + assert_eq!(code_page_size % page_size.as_usize(), 0, "code_page_size was not page-aligned"); use crate::virtualmem::*; - let first_half = VirtualMem::new( + let mem_block = VirtualMem::new( SystemAllocator {}, page_size, virt_block, - half_size - ); - let second_half = VirtualMem::new( - SystemAllocator {}, - page_size, - second_half, - half_size + mem_size, ); + let mem_block = Rc::new(RefCell::new(mem_block)); - let cb = CodeBlock::new(first_half, false); - let ocb = OutlinedCb::wrap(CodeBlock::new(second_half, true)); + let cb = CodeBlock::new(mem_block.clone(), code_page_size, false); + let ocb = OutlinedCb::wrap(CodeBlock::new(mem_block, code_page_size, true)); (cb, ocb) }; @@ -6696,6 +6670,10 @@ impl CodegenGlobals { unsafe { CODEGEN_GLOBALS.as_mut().unwrap() } } + pub fn has_instance() -> bool { + unsafe { CODEGEN_GLOBALS.as_mut().is_some() } + } + /// Get a mutable reference to the inline code block pub fn get_inline_cb() -> &'static mut CodeBlock { &mut CodegenGlobals::get_instance().inline_cb diff --git a/yjit/src/core.rs b/yjit/src/core.rs index c8078bb6e358ee..83412939526cee 100644 --- a/yjit/src/core.rs +++ b/yjit/src/core.rs @@ -662,7 +662,7 @@ pub extern "C" fn rb_yjit_iseq_update_references(payload: *mut c_void) { if new_addr != object { for (byte_idx, &byte) in new_addr.as_u64().to_le_bytes().iter().enumerate() { let byte_code_ptr = value_code_ptr.add_bytes(byte_idx); - cb.get_mem().write_byte(byte_code_ptr, byte) + cb.write_mem(byte_code_ptr, byte) .expect("patching existing code should be within bounds"); } } @@ -1896,7 +1896,9 @@ pub fn gen_branch( // Call the branch generation function asm.mark_branch_start(&branchref); - gen_fn(asm, branch.dst_addrs[0].unwrap(), branch.dst_addrs[1], BranchShape::Default); + if let Some(dst_addr) = branch.dst_addrs[0] { + gen_fn(asm, dst_addr, branch.dst_addrs[1], BranchShape::Default); + } asm.mark_branch_end(&branchref); } @@ -1935,6 +1937,7 @@ pub fn gen_direct_jump(jit: &JITState, ctx: &Context, target0: BlockId, asm: &mu branch.shape = BranchShape::Default; // Call the branch generation function + asm.comment("gen_direct_jmp: existing block"); asm.mark_branch_start(&branchref); gen_jump_branch(asm, branch.dst_addrs[0].unwrap(), None, BranchShape::Default); asm.mark_branch_end(&branchref); @@ -1945,6 +1948,7 @@ pub fn gen_direct_jump(jit: &JITState, ctx: &Context, target0: BlockId, asm: &mu branch.shape = BranchShape::Next0; // The branch is effectively empty (a noop) + asm.comment("gen_direct_jmp: fallthrough"); asm.mark_branch_start(&branchref); asm.mark_branch_end(&branchref); } @@ -1983,7 +1987,9 @@ pub fn defer_compilation( // Call the branch generation function asm.mark_branch_start(&branch_rc); - gen_jump_branch(asm, branch.dst_addrs[0].unwrap(), None, BranchShape::Default); + if let Some(dst_addr) = branch.dst_addrs[0] { + gen_jump_branch(asm, dst_addr, None, BranchShape::Default); + } asm.mark_branch_end(&branch_rc); } diff --git a/yjit/src/options.rs b/yjit/src/options.rs index f73dca67de21bb..d2b43ecb262401 100644 --- a/yjit/src/options.rs +++ b/yjit/src/options.rs @@ -4,9 +4,14 @@ use std::ffi::CStr; #[derive(Clone, PartialEq, Eq, Debug)] #[repr(C)] pub struct Options { - // Size of the executable memory block to allocate in MiB + // Size of the executable memory block to allocate in bytes + // Note that the command line argument is expressed in MiB and not bytes pub exec_mem_size: usize, + // Size of each executable memory code page in bytes + // Note that the command line argument is expressed in KiB and not bytes + pub code_page_size: usize, + // Number of method calls after which to start generating code // Threshold==1 means compile on first execution pub call_threshold: usize, @@ -48,7 +53,8 @@ pub struct Options { // Initialize the options to default values pub static mut OPTIONS: Options = Options { - exec_mem_size: 256, + exec_mem_size: 256 * 1024 * 1024, + code_page_size: 16 * 1024, call_threshold: 10, greedy_versioning: false, no_type_prop: false, @@ -118,8 +124,30 @@ pub fn parse_option(str_ptr: *const std::os::raw::c_char) -> Option<()> { match (opt_name, opt_val) { ("", "") => (), // Simply --yjit - ("exec-mem-size", _) => match opt_val.parse() { - Ok(n) => unsafe { OPTIONS.exec_mem_size = n }, + ("exec-mem-size", _) => match opt_val.parse::() { + Ok(n) => { + if n == 0 || n > 2 * 1024 * 1024 { + return None + } + + // Convert from MiB to bytes internally for convenience + unsafe { OPTIONS.exec_mem_size = n * 1024 * 1024 } + } + Err(_) => { + return None; + } + }, + + ("code-page-size", _) => match opt_val.parse::() { + Ok(n) => { + // Enforce bounds checks and that n is divisible by 4KiB + if n < 4 || n > 256 || n % 4 != 0 { + return None + } + + // Convert from KiB to bytes internally for convenience + unsafe { OPTIONS.code_page_size = n * 1024 } + } Err(_) => { return None; } diff --git a/yjit/src/utils.rs b/yjit/src/utils.rs index cabebb7dccda4c..b156c9d5eda721 100644 --- a/yjit/src/utils.rs +++ b/yjit/src/utils.rs @@ -74,14 +74,13 @@ pub(crate) use offset_of; // This should work fine on ASCII strings and anything else // that is considered legal UTF-8, including embedded nulls. fn ruby_str_to_rust(v: VALUE) -> String { - // Make sure the CRuby encoding is UTF-8 compatible - let encoding = unsafe { rb_ENCODING_GET(v) } as u32; - assert!(encoding == RUBY_ENCINDEX_ASCII_8BIT || encoding == RUBY_ENCINDEX_UTF_8 || encoding == RUBY_ENCINDEX_US_ASCII); - let str_ptr = unsafe { rb_RSTRING_PTR(v) } as *mut u8; let str_len: usize = unsafe { rb_RSTRING_LEN(v) }.try_into().unwrap(); let str_slice: &[u8] = unsafe { slice::from_raw_parts(str_ptr, str_len) }; - String::from_utf8(str_slice.to_vec()).unwrap() // does utf8 validation + match String::from_utf8(str_slice.to_vec()) { + Ok(utf8) => utf8, + Err(_) => String::new(), + } } // Location is the file defining the method, colon, method name. From a722743f4d4f2cc071556bdcb505354a1853b270 Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Fri, 14 Oct 2022 12:56:03 -0700 Subject: [PATCH 2/4] Explain what set_pos does [ci skip] --- yjit/src/asm/mod.rs | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/yjit/src/asm/mod.rs b/yjit/src/asm/mod.rs index 441ce02d0920d5..1ab813964c8f5b 100644 --- a/yjit/src/asm/mod.rs +++ b/yjit/src/asm/mod.rs @@ -137,8 +137,23 @@ impl CodeBlock { fn set_page(&mut self, page_idx: usize, jmp_ptr: &F) -> bool { // Do not move the CodeBlock if page_idx points to an old position so that this // CodeBlock will not overwrite existing code. - // TODO: We could move it to the last write_pos on that page if we keep track of - // the past write_pos of each page. + // + // Let's say this is the current situation: + // cb: [page1, page2, page3 (write_pos)], ocb: [page1, page2, page3 (write_pos)] + // + // When cb needs to patch page1, this will be temporarily changed to: + // cb: [page1 (write_pos), page2, page3], ocb: [page1, page2, page3 (write_pos)] + // + // While patching page1, cb may need to jump to page2. What set_page currently does is: + // cb: [page1, page2 (write_pos), page3], ocb: [page1, page2, page3 (write_pos)] + // instead of: + // cb: [page1, page2 (write_pos), page3], ocb: [page1, page2 (write_pos), page3] + // because moving ocb's write_pos from page3 to the beginning of page2 will let ocb's + // write_pos point to existing code in page2, which might let ocb overwrite it later. + // + // We could remember the last write_pos in page2 and let set_page use that position, + // but you need to waste some space for keeping write_pos for every single page. + // It doesn't seem necessary for performance either. So we're currently not doing it. let mut dst_pos = self.page_size * page_idx + self.page_start(); if self.page_size * page_idx < self.mem_size && self.write_pos < dst_pos { // Reset dropped_bytes From 861c01615bdc9059e22fff933bc3465c4458642d Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Fri, 14 Oct 2022 12:59:51 -0700 Subject: [PATCH 3/4] Rename i to insn_idx --- yjit/src/backend/arm64/mod.rs | 6 +++--- yjit/src/backend/x86_64/mod.rs | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs index 52103aee359f3c..5df072ed387ad7 100644 --- a/yjit/src/backend/arm64/mod.rs +++ b/yjit/src/backend/arm64/mod.rs @@ -715,8 +715,8 @@ impl Assembler // For each instruction let start_write_pos = cb.get_write_pos(); - let mut i: usize = 0; - while let Some(insn) = self.insns.get(i) { + let mut insn_idx: usize = 0; + while let Some(insn) = self.insns.get(insn_idx) { let src_ptr = cb.get_write_ptr(); let had_dropped_bytes = cb.has_dropped_bytes(); let old_label_state = cb.get_label_state(); @@ -1022,7 +1022,7 @@ impl Assembler // Reset cb states before retrying the current Insn cb.set_label_state(old_label_state); } else { - i += 1; + insn_idx += 1; gc_offsets.append(&mut insn_gc_offsets); } } diff --git a/yjit/src/backend/x86_64/mod.rs b/yjit/src/backend/x86_64/mod.rs index 6d3bb481de0f0f..3843cc0b3fe34f 100644 --- a/yjit/src/backend/x86_64/mod.rs +++ b/yjit/src/backend/x86_64/mod.rs @@ -376,8 +376,8 @@ impl Assembler // For each instruction let start_write_pos = cb.get_write_pos(); - let mut i: usize = 0; - while let Some(insn) = self.insns.get(i) { + let mut insns_idx: usize = 0; + while let Some(insn) = self.insns.get(insns_idx) { let src_ptr = cb.get_write_ptr(); let had_dropped_bytes = cb.has_dropped_bytes(); let old_label_state = cb.get_label_state(); @@ -677,7 +677,7 @@ impl Assembler // Reset cb states before retrying the current Insn cb.set_label_state(old_label_state); } else { - i += 1; + insns_idx += 1; gc_offsets.append(&mut insn_gc_offsets); } } From 16c630a94b088202c7debb2c4afa44d1c0ad70ef Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Fri, 14 Oct 2022 14:41:00 -0700 Subject: [PATCH 4/4] Clear labels on x86_64 as well --- yjit/src/backend/x86_64/mod.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/yjit/src/backend/x86_64/mod.rs b/yjit/src/backend/x86_64/mod.rs index 3843cc0b3fe34f..1e5a4e6f946948 100644 --- a/yjit/src/backend/x86_64/mod.rs +++ b/yjit/src/backend/x86_64/mod.rs @@ -698,7 +698,9 @@ impl Assembler let gc_offsets = asm.x86_emit(cb); - if !cb.has_dropped_bytes() { + if cb.has_dropped_bytes() { + cb.clear_labels(); + } else { cb.link_labels(); }