Thanks to visit codestin.com
Credit goes to github.com

Skip to content
91 changes: 76 additions & 15 deletions turbopack/crates/turbo-rcstr/src/dynamic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,37 @@ use std::{num::NonZeroU8, ptr::NonNull};
use triomphe::Arc;

use crate::{
INLINE_TAG, INLINE_TAG_INIT, LEN_OFFSET, RcStr, TAG_MASK,
INLINE_TAG, INLINE_TAG_INIT, LEN_OFFSET, RcStr, STATIC_TAG, TAG_MASK,
tagged_value::{MAX_INLINE_LEN, TaggedValue},
};

pub(crate) struct PrehashedString {
pub value: String,
pub enum Payload {
String(String),
Ref(&'static str),
}

impl Payload {
pub(crate) fn as_str(&self) -> &str {
match self {
Payload::String(s) => s,
Payload::Ref(s) => s,
}
}
pub(crate) fn into_string(self) -> String {
match self {
Payload::String(s) => s,
Payload::Ref(r) => r.to_string(),
}
}
}
impl PartialEq for Payload {
fn eq(&self, other: &Self) -> bool {
self.as_str() == other.as_str()
}
}

pub struct PrehashedString {
pub value: Payload,
/// This is not the actual `fxhash`, but rather it's a value that passed to
/// `write_u64` of [rustc_hash::FxHasher].
pub hash: u64,
Expand Down Expand Up @@ -46,7 +71,7 @@ pub(crate) fn new_atom<T: AsRef<str> + Into<String>>(text: T) -> RcStr {
let hash = hash_bytes(text.as_ref().as_bytes());

let entry: Arc<PrehashedString> = Arc::new(PrehashedString {
value: text.into(),
value: Payload::String(text.into()),
hash,
});
let entry = Arc::into_raw(entry);
Expand All @@ -61,6 +86,22 @@ pub(crate) fn new_atom<T: AsRef<str> + Into<String>>(text: T) -> RcStr {
}
}

#[inline(always)]
pub(crate) fn new_static_atom(string: &'static PrehashedString) -> RcStr {
let mut entry = string as *const PrehashedString;
debug_assert!(0 == entry as u8 & TAG_MASK);
// Tag it as a static pointer
entry = ((entry as usize) | STATIC_TAG as usize) as *mut PrehashedString;
let ptr: NonNull<PrehashedString> = unsafe {
// Safety: Box::into_raw returns a non-null pointer
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The safety comment for NonNull::new_unchecked incorrectly states "Box::into_raw returns a non-null pointer" but this function doesn't use Box::into_raw.

View Details

Analysis

The safety comment on line 96 is misleading and incorrect. The function new_static_atom takes a reference to a static PrehashedString and converts it to a tagged pointer, but it never uses Box::into_raw. The comment appears to be copied from another function.

The actual safety requirement for NonNull::new_unchecked(entry as *mut _) is that entry is non-null. In this case, safety is ensured because:

  1. We start with string as *const PrehashedString where string is a &'static PrehashedString (references are never null)
  2. We then tag it with | STATIC_TAG as usize, which only affects the lower bits and preserves the non-null property
  3. The conversion as *mut PrehashedString and as *mut _ preserves non-null-ness

The incorrect comment could mislead future maintainers about the actual safety invariants.


Recommendation

Replace the incorrect safety comment with an accurate one:

// Safety: entry is non-null because it's derived from a valid reference and tagging preserves non-null-ness
NonNull::new_unchecked(entry as *mut _)

NonNull::new_unchecked(entry as *mut _)
};

RcStr {
unsafe_data: TaggedValue::new_ptr(ptr),
}
}

/// Attempts to construct an RcStr but only if it can be constructed inline.
/// This is primarily useful in constant contexts.
#[doc(hidden)]
Expand Down Expand Up @@ -90,7 +131,7 @@ const SEED2: u64 = 0x13198a2e03707344;
const PREVENT_TRIVIAL_ZERO_COLLAPSE: u64 = 0xa4093822299f31d0;

#[inline]
fn multiply_mix(x: u64, y: u64) -> u64 {
const fn multiply_mix(x: u64, y: u64) -> u64 {
#[cfg(target_pointer_width = "64")]
{
// We compute the full u64 x u64 -> u128 product, this is a single mul
Expand Down Expand Up @@ -131,6 +172,26 @@ fn multiply_mix(x: u64, y: u64) -> u64 {
}
}

// Const compatible helper function to read a u64 from a byte array at a given offset
const fn read_u64_le(bytes: &[u8], offset: usize) -> u64 {
(bytes[offset] as u64)
| ((bytes[offset + 1] as u64) << 8)
| ((bytes[offset + 2] as u64) << 16)
| ((bytes[offset + 3] as u64) << 24)
| ((bytes[offset + 4] as u64) << 32)
| ((bytes[offset + 5] as u64) << 40)
| ((bytes[offset + 6] as u64) << 48)
| ((bytes[offset + 7] as u64) << 56)
}

// Const compatible helper function to read a u32 from a byte array at a given offset
const fn read_u32_le(bytes: &[u8], offset: usize) -> u32 {
(bytes[offset] as u32)
| ((bytes[offset + 1] as u32) << 8)
| ((bytes[offset + 2] as u32) << 16)
| ((bytes[offset + 3] as u32) << 24)
}

/// Copied from `hash_bytes` of `rustc-hash`.
///
/// See: https://github.com/rust-lang/rustc-hash/blob/dc5c33f1283de2da64d8d7a06401d91aded03ad4/src/lib.rs#L252-L297
Expand All @@ -149,19 +210,20 @@ fn multiply_mix(x: u64, y: u64) -> u64 {
/// We don't bother avalanching here as we'll feed this hash into a
/// multiplication after which we take the high bits, which avalanches for us.
#[inline]
fn hash_bytes(bytes: &[u8]) -> u64 {
#[doc(hidden)]
pub const fn hash_bytes(bytes: &[u8]) -> u64 {
let len = bytes.len();
let mut s0 = SEED1;
let mut s1 = SEED2;

if len <= 16 {
// XOR the input into s0, s1.
if len >= 8 {
s0 ^= u64::from_le_bytes(bytes[0..8].try_into().unwrap());
s1 ^= u64::from_le_bytes(bytes[len - 8..].try_into().unwrap());
s0 ^= read_u64_le(bytes, 0);
s1 ^= read_u64_le(bytes, len - 8);
} else if len >= 4 {
s0 ^= u32::from_le_bytes(bytes[0..4].try_into().unwrap()) as u64;
s1 ^= u32::from_le_bytes(bytes[len - 4..].try_into().unwrap()) as u64;
s0 ^= read_u32_le(bytes, 0) as u64;
s1 ^= read_u32_le(bytes, len - 4) as u64;
} else if len > 0 {
let lo = bytes[0];
let mid = bytes[len / 2];
Expand All @@ -173,8 +235,8 @@ fn hash_bytes(bytes: &[u8]) -> u64 {
// Handle bulk (can partially overlap with suffix).
let mut off = 0;
while off < len - 16 {
let x = u64::from_le_bytes(bytes[off..off + 8].try_into().unwrap());
let y = u64::from_le_bytes(bytes[off + 8..off + 16].try_into().unwrap());
let x = read_u64_le(bytes, off);
let y = read_u64_le(bytes, off + 8);

// Replace s1 with a mix of s0, x, and y, and s0 with s1.
// This ensures the compiler can unroll this loop into two
Expand All @@ -188,9 +250,8 @@ fn hash_bytes(bytes: &[u8]) -> u64 {
off += 16;
}

let suffix = &bytes[len - 16..];
s0 ^= u64::from_le_bytes(suffix[0..8].try_into().unwrap());
s1 ^= u64::from_le_bytes(suffix[8..16].try_into().unwrap());
s0 ^= read_u64_le(bytes, len - 16);
s1 ^= read_u64_le(bytes, len - 8);
}

multiply_mix(s0, s1) ^ (len as u64)
Expand Down
Loading
Loading