magnus/r_string.rs
1//! Types for working with Ruby’s String class.
2
3use std::{
4 borrow::Cow,
5 cmp::Ordering,
6 ffi::CString,
7 fmt, io,
8 iter::Iterator,
9 mem::transmute,
10 os::raw::{c_char, c_long},
11 path::{Path, PathBuf},
12 ptr, slice, str,
13};
14
15#[cfg(ruby_gte_3_0)]
16use rb_sys::rb_str_to_interned_str;
17use rb_sys::{
18 self, rb_enc_str_coderange, rb_enc_str_new, rb_str_buf_append, rb_str_buf_new, rb_str_capacity,
19 rb_str_cat, rb_str_cmp, rb_str_comparable, rb_str_conv_enc, rb_str_drop_bytes, rb_str_dump,
20 rb_str_ellipsize, rb_str_new, rb_str_new_frozen, rb_str_new_shared, rb_str_offset, rb_str_plus,
21 rb_str_replace, rb_str_scrub, rb_str_shared_replace, rb_str_split, rb_str_strlen, rb_str_times,
22 rb_str_to_str, rb_str_update, rb_utf8_str_new, rb_utf8_str_new_static, ruby_coderange_type,
23 ruby_rstring_flags, ruby_value_type, RSTRING_LEN, RSTRING_PTR, VALUE,
24};
25
26use crate::{
27 encoding::{Coderange, EncodingCapable, RbEncoding},
28 error::{protect, Error},
29 into_value::{IntoValue, IntoValueFromNative},
30 object::Object,
31 r_array::RArray,
32 try_convert::TryConvert,
33 value::{
34 private::{self, ReprValue as _},
35 NonZeroValue, ReprValue, Value,
36 },
37 Ruby,
38};
39
40/// # `RString`
41///
42/// Functions that can be used to create Ruby `String`s.
43///
44/// See also the [`RString`] type.
45impl Ruby {
46 /// Create a new Ruby string from the Rust string `s`.
47 ///
48 /// The encoding of the Ruby string will be UTF-8.
49 ///
50 /// # Examples
51 ///
52 /// ```
53 /// use magnus::{rb_assert, Error, Ruby};
54 ///
55 /// fn example(ruby: &Ruby) -> Result<(), Error> {
56 /// let val = ruby.str_new("example");
57 /// rb_assert!(ruby, r#"val == "example""#, val);
58 ///
59 /// Ok(())
60 /// }
61 /// # Ruby::init(example).unwrap()
62 /// ```
63 pub fn str_new(&self, s: &str) -> RString {
64 let len = s.len();
65 let ptr = s.as_ptr();
66 unsafe {
67 RString::from_rb_value_unchecked(rb_utf8_str_new(ptr as *const c_char, len as c_long))
68 }
69 }
70
71 /// Implementation detail of [`r_string`].
72 #[doc(hidden)]
73 #[inline]
74 pub unsafe fn str_new_lit(&self, ptr: *const c_char, len: c_long) -> RString {
75 RString::from_rb_value_unchecked(rb_utf8_str_new_static(ptr, len))
76 }
77
78 /// Create a new Ruby string with capacity `n`.
79 ///
80 /// The encoding will be set to ASCII-8BIT (aka BINARY). See also
81 /// [`with_capacity`](RString::with_capacity).
82 ///
83 /// # Examples
84 ///
85 /// ```
86 /// use magnus::{rb_assert, Error, Ruby};
87 ///
88 /// fn example(ruby: &Ruby) -> Result<(), Error> {
89 /// let buf = ruby.str_buf_new(4096);
90 /// buf.cat(&[13, 14, 10, 13, 11, 14, 14, 15]);
91 /// rb_assert!(ruby, r#"buf == "\r\x0E\n\r\v\x0E\x0E\x0F""#, buf);
92 ///
93 /// Ok(())
94 /// }
95 /// # Ruby::init(example).unwrap()
96 /// ```
97 pub fn str_buf_new(&self, n: usize) -> RString {
98 unsafe { RString::from_rb_value_unchecked(rb_str_buf_new(n as c_long)) }
99 }
100
101 /// Create a new Ruby string with capacity `n`.
102 ///
103 /// The encoding will be set to UTF-8. See also
104 /// [`buf_new`](RString::buf_new).
105 ///
106 /// # Examples
107 ///
108 /// ```
109 /// use magnus::{rb_assert, Error, Ruby};
110 ///
111 /// fn example(ruby: &Ruby) -> Result<(), Error> {
112 /// let s = ruby.str_with_capacity(9);
113 /// s.cat("foo");
114 /// s.cat("bar");
115 /// s.cat("baz");
116 /// rb_assert!(ruby, r#"s == "foobarbaz""#, s);
117 ///
118 /// Ok(())
119 /// }
120 /// # Ruby::init(example).unwrap()
121 /// ```
122 pub fn str_with_capacity(&self, n: usize) -> RString {
123 let s = self.str_buf_new(n);
124 s.enc_associate(self.utf8_encindex()).unwrap();
125 s
126 }
127
128 /// Create a new Ruby string from the Rust slice `s`.
129 ///
130 /// The encoding of the Ruby string will be set to ASCII-8BIT (aka BINARY).
131 ///
132 /// # Examples
133 ///
134 /// ```
135 /// use magnus::{rb_assert, Error, Ruby};
136 ///
137 /// fn example(ruby: &Ruby) -> Result<(), Error> {
138 /// let buf = ruby.str_from_slice(&[13, 14, 10, 13, 11, 14, 14, 15]);
139 /// rb_assert!(ruby, r#"buf == "\r\x0E\n\r\v\x0E\x0E\x0F""#, buf);
140 ///
141 /// Ok(())
142 /// }
143 /// # Ruby::init(example).unwrap()
144 /// ```
145 pub fn str_from_slice(&self, s: &[u8]) -> RString {
146 let len = s.len();
147 let ptr = s.as_ptr();
148 unsafe { RString::from_rb_value_unchecked(rb_str_new(ptr as *const c_char, len as c_long)) }
149 }
150
151 /// Create a new Ruby string from the value `s` with the encoding `enc`.
152 ///
153 /// # Examples
154 ///
155 /// ```
156 /// use magnus::{rb_assert, Error, Ruby};
157 ///
158 /// fn example(ruby: &Ruby) -> Result<(), Error> {
159 /// let val = ruby.enc_str_new("example", ruby.usascii_encoding());
160 /// rb_assert!(ruby, r#"val == "example""#, val);
161 ///
162 /// Ok(())
163 /// }
164 /// # Ruby::init(example).unwrap()
165 /// ```
166 ///
167 /// ```
168 /// use magnus::{rb_assert, Error, Ruby};
169 ///
170 /// fn example(ruby: &Ruby) -> Result<(), Error> {
171 /// let val = ruby.enc_str_new([255, 128, 128], ruby.ascii8bit_encoding());
172 /// rb_assert!(
173 /// ruby,
174 /// r#"val == "\xFF\x80\x80".force_encoding("BINARY")"#,
175 /// val
176 /// );
177 ///
178 /// Ok(())
179 /// }
180 /// # Ruby::init(example).unwrap()
181 /// ```
182 pub fn enc_str_new<T, E>(&self, s: T, enc: E) -> RString
183 where
184 T: AsRef<[u8]>,
185 E: Into<RbEncoding>,
186 {
187 let s = s.as_ref();
188 let len = s.len();
189 let ptr = s.as_ptr();
190 unsafe {
191 RString::from_rb_value_unchecked(rb_enc_str_new(
192 ptr as *const c_char,
193 len as c_long,
194 enc.into().as_ptr(),
195 ))
196 }
197 }
198
199 /// Create a new Ruby string from the Rust char `c`.
200 ///
201 /// The encoding of the Ruby string will be UTF-8.
202 ///
203 /// # Examples
204 ///
205 /// ```
206 /// use magnus::{rb_assert, Error, Ruby};
207 ///
208 /// fn example(ruby: &Ruby) -> Result<(), Error> {
209 /// let c = ruby.str_from_char('a');
210 /// rb_assert!(ruby, r#"c == "a""#, c);
211 ///
212 /// Ok(())
213 /// }
214 /// # Ruby::init(example).unwrap()
215 /// ```
216 ///
217 /// ```
218 /// use magnus::{rb_assert, Error, Ruby};
219 ///
220 /// fn example(ruby: &Ruby) -> Result<(), Error> {
221 /// let c = ruby.str_from_char('🦀');
222 /// rb_assert!(ruby, r#"c == "🦀""#, c);
223 ///
224 /// Ok(())
225 /// }
226 /// # Ruby::init(example).unwrap()
227 /// ```
228 pub fn str_from_char(&self, c: char) -> RString {
229 let mut buf = [0; 4];
230 self.str_new(c.encode_utf8(&mut buf[..]))
231 }
232
233 /// Create a new Ruby string containing the codepoint `code` in the
234 /// encoding `enc`.
235 ///
236 /// The encoding of the Ruby string will be the passed encoding `enc`.
237 ///
238 /// # Examples
239 ///
240 /// ```
241 /// use magnus::{rb_assert, Error, Ruby};
242 ///
243 /// fn example(ruby: &Ruby) -> Result<(), Error> {
244 /// let c = ruby.chr(97, ruby.usascii_encoding())?;
245 /// rb_assert!(ruby, r#"c == "a""#, c);
246 ///
247 /// Ok(())
248 /// }
249 /// # Ruby::init(example).unwrap()
250 /// ```
251 ///
252 /// ```
253 /// use magnus::{rb_assert, Error, Ruby};
254 ///
255 /// fn example(ruby: &Ruby) -> Result<(), Error> {
256 /// let c = ruby.chr(129408, ruby.utf8_encoding())?;
257 /// rb_assert!(ruby, r#"c == "🦀""#, c);
258 ///
259 /// Ok(())
260 /// }
261 /// # Ruby::init(example).unwrap()
262 /// ```
263 pub fn chr<T>(&self, code: u32, enc: T) -> Result<RString, Error>
264 where
265 T: Into<RbEncoding>,
266 {
267 enc.into().chr(code)
268 }
269}
270
271/// A Value pointer to a RString struct, Ruby's internal representation of
272/// strings.
273///
274/// See the [`ReprValue`] and [`Object`] traits for additional methods
275/// available on this type. See [`Ruby`](Ruby#rstring) for methods to create an
276/// `RString`.
277#[derive(Clone, Copy)]
278#[repr(transparent)]
279pub struct RString(NonZeroValue);
280
281impl RString {
282 /// Return `Some(RString)` if `val` is a `RString`, `None` otherwise.
283 ///
284 /// # Examples
285 ///
286 /// ```
287 /// use magnus::{eval, RString};
288 /// # let _cleanup = unsafe { magnus::embed::init() };
289 ///
290 /// assert!(RString::from_value(eval(r#""example""#).unwrap()).is_some());
291 /// assert!(RString::from_value(eval(":example").unwrap()).is_none());
292 /// ```
293 #[inline]
294 pub fn from_value(val: Value) -> Option<Self> {
295 unsafe {
296 (val.rb_type() == ruby_value_type::RUBY_T_STRING)
297 .then(|| Self(NonZeroValue::new_unchecked(val)))
298 }
299 }
300
301 pub(crate) fn ref_from_value(val: &Value) -> Option<&Self> {
302 unsafe {
303 (val.rb_type() == ruby_value_type::RUBY_T_STRING)
304 .then(|| &*(val as *const _ as *const RString))
305 }
306 }
307
308 #[inline]
309 pub(crate) unsafe fn from_rb_value_unchecked(val: VALUE) -> Self {
310 Self(NonZeroValue::new_unchecked(Value::new(val)))
311 }
312
313 /// Create a new Ruby string from the Rust string `s`.
314 ///
315 /// The encoding of the Ruby string will be UTF-8.
316 ///
317 /// # Panics
318 ///
319 /// Panics if called from a non-Ruby thread. See [`Ruby::str_new`] for the
320 /// non-panicking version.
321 ///
322 /// # Examples
323 ///
324 /// ```
325 /// # #![allow(deprecated)]
326 /// use magnus::{rb_assert, RString};
327 /// # let _cleanup = unsafe { magnus::embed::init() };
328 ///
329 /// let val = RString::new("example");
330 /// rb_assert!(r#"val == "example""#, val);
331 /// ```
332 #[cfg_attr(
333 not(feature = "old-api"),
334 deprecated(note = "please use `Ruby::str_new` instead")
335 )]
336 #[cfg_attr(docsrs, doc(cfg(feature = "old-api")))]
337 #[inline]
338 pub fn new(s: &str) -> Self {
339 get_ruby!().str_new(s)
340 }
341
342 /// Implementation detail of [`r_string`].
343 #[doc(hidden)]
344 #[inline]
345 pub unsafe fn new_lit(ptr: *const c_char, len: c_long) -> Self {
346 get_ruby!().str_new_lit(ptr, len)
347 }
348
349 /// Create a new Ruby string with capacity `n`.
350 ///
351 /// The encoding will be set to ASCII-8BIT (aka BINARY). See also
352 /// [`with_capacity`](RString::with_capacity).
353 ///
354 /// # Panics
355 ///
356 /// Panics if called from a non-Ruby thread. See [`Ruby::str_buf_new`] for
357 /// the non-panicking version.
358 ///
359 /// # Examples
360 ///
361 /// ```
362 /// # #![allow(deprecated)]
363 /// use magnus::{rb_assert, RString};
364 /// # let _cleanup = unsafe { magnus::embed::init() };
365 ///
366 /// let buf = RString::buf_new(4096);
367 /// buf.cat(&[13, 14, 10, 13, 11, 14, 14, 15]);
368 /// rb_assert!(r#"buf == "\r\x0E\n\r\v\x0E\x0E\x0F""#, buf);
369 /// ```
370 #[cfg_attr(
371 not(feature = "old-api"),
372 deprecated(note = "please use `Ruby::str_buf_new` instead")
373 )]
374 #[cfg_attr(docsrs, doc(cfg(feature = "old-api")))]
375 #[inline]
376 pub fn buf_new(n: usize) -> Self {
377 get_ruby!().str_buf_new(n)
378 }
379
380 /// Create a new Ruby string with capacity `n`.
381 ///
382 /// The encoding will be set to UTF-8. See also
383 /// [`buf_new`](RString::buf_new).
384 ///
385 /// # Panics
386 ///
387 /// Panics if called from a non-Ruby thread. See
388 /// [`Ruby::str_with_capacity`] for the non-panicking version.
389 ///
390 /// # Examples
391 ///
392 /// ```
393 /// # #![allow(deprecated)]
394 /// use magnus::{rb_assert, RString};
395 /// # let _cleanup = unsafe { magnus::embed::init() };
396 ///
397 /// let s = RString::with_capacity(9);
398 /// s.cat("foo");
399 /// s.cat("bar");
400 /// s.cat("baz");
401 /// rb_assert!(r#"s == "foobarbaz""#, s);
402 /// ```
403 #[cfg_attr(
404 not(feature = "old-api"),
405 deprecated(note = "please use `Ruby::str_with_capacity` instead")
406 )]
407 #[cfg_attr(docsrs, doc(cfg(feature = "old-api")))]
408 #[inline]
409 pub fn with_capacity(n: usize) -> Self {
410 get_ruby!().str_with_capacity(n)
411 }
412
413 /// Create a new Ruby string from the Rust slice `s`.
414 ///
415 /// The encoding of the Ruby string will be set to ASCII-8BIT (aka BINARY).
416 ///
417 /// # Panics
418 ///
419 /// Panics if called from a non-Ruby thread. See [`Ruby::str_from_slice`]
420 /// for the non-panicking version.
421 ///
422 /// # Examples
423 ///
424 /// ```
425 /// # #![allow(deprecated)]
426 /// use magnus::{rb_assert, RString};
427 /// # let _cleanup = unsafe { magnus::embed::init() };
428 ///
429 /// let buf = RString::from_slice(&[13, 14, 10, 13, 11, 14, 14, 15]);
430 /// rb_assert!(r#"buf == "\r\x0E\n\r\v\x0E\x0E\x0F""#, buf);
431 /// ```
432 #[cfg_attr(
433 not(feature = "old-api"),
434 deprecated(note = "please use `Ruby::str_from_slice` instead")
435 )]
436 #[cfg_attr(docsrs, doc(cfg(feature = "old-api")))]
437 #[inline]
438 pub fn from_slice(s: &[u8]) -> Self {
439 get_ruby!().str_from_slice(s)
440 }
441
442 /// Create a new Ruby string from the value `s` with the encoding `enc`.
443 ///
444 /// # Panics
445 ///
446 /// Panics if called from a non-Ruby thread. See [`Ruby::enc_str_new`] for
447 /// the non-panicking version.
448 ///
449 /// # Examples
450 ///
451 /// ```
452 /// # #![allow(deprecated)]
453 /// use magnus::{encoding::RbEncoding, rb_assert, RString};
454 /// # let _cleanup = unsafe { magnus::embed::init() };
455 ///
456 /// let val = RString::enc_new("example", RbEncoding::usascii());
457 /// rb_assert!(r#"val == "example""#, val);
458 /// ```
459 ///
460 /// ```
461 /// # #![allow(deprecated)]
462 /// use magnus::{encoding::RbEncoding, rb_assert, RString};
463 /// # let _cleanup = unsafe { magnus::embed::init() };
464 ///
465 /// let val = RString::enc_new([255, 128, 128], RbEncoding::ascii8bit());
466 /// rb_assert!(r#"val == "\xFF\x80\x80".force_encoding("BINARY")"#, val);
467 /// ```
468 #[cfg_attr(
469 not(feature = "old-api"),
470 deprecated(note = "please use `Ruby::enc_str_new` instead")
471 )]
472 #[cfg_attr(docsrs, doc(cfg(feature = "old-api")))]
473 #[inline]
474 pub fn enc_new<T, E>(s: T, enc: E) -> Self
475 where
476 T: AsRef<[u8]>,
477 E: Into<RbEncoding>,
478 {
479 get_ruby!().enc_str_new(s, enc)
480 }
481
482 /// Create a new Ruby string from the Rust char `c`.
483 ///
484 /// The encoding of the Ruby string will be UTF-8.
485 ///
486 /// # Panics
487 ///
488 /// Panics if called from a non-Ruby thread. See [`Ruby::str_from_char`]
489 /// for the non-panicking version.
490 ///
491 /// # Examples
492 ///
493 /// ```
494 /// # #![allow(deprecated)]
495 /// use magnus::{rb_assert, RString};
496 /// # let _cleanup = unsafe { magnus::embed::init() };
497 ///
498 /// let c = RString::from_char('a');
499 /// rb_assert!(r#"c == "a""#, c);
500 /// ```
501 ///
502 /// ```
503 /// # #![allow(deprecated)]
504 /// use magnus::{rb_assert, RString};
505 /// # let _cleanup = unsafe { magnus::embed::init() };
506 ///
507 /// let c = RString::from_char('🦀');
508 /// rb_assert!(r#"c == "🦀""#, c);
509 /// ```
510 #[cfg_attr(
511 not(feature = "old-api"),
512 deprecated(note = "please use `Ruby::str_from_char` instead")
513 )]
514 #[cfg_attr(docsrs, doc(cfg(feature = "old-api")))]
515 #[inline]
516 pub fn from_char(c: char) -> Self {
517 get_ruby!().str_from_char(c)
518 }
519
520 /// Create a new Ruby string containing the codepoint `code` in the
521 /// encoding `enc`.
522 ///
523 /// The encoding of the Ruby string will be the passed encoding `enc`.
524 ///
525 /// # Panics
526 ///
527 /// Panics if called from a non-Ruby thread. See [`Ruby::chr`] for the
528 /// non-panicking version.
529 ///
530 /// # Examples
531 ///
532 /// ```
533 /// # #![allow(deprecated)]
534 /// use magnus::{encoding::RbEncoding, rb_assert, RString};
535 /// # let _cleanup = unsafe { magnus::embed::init() };
536 ///
537 /// let c = RString::chr(97, RbEncoding::usascii()).unwrap();
538 /// rb_assert!(r#"c == "a""#, c);
539 /// ```
540 ///
541 /// ```
542 /// # #![allow(deprecated)]
543 /// use magnus::{encoding::RbEncoding, rb_assert, RString};
544 /// # let _cleanup = unsafe { magnus::embed::init() };
545 ///
546 /// let c = RString::chr(129408, RbEncoding::utf8()).unwrap();
547 /// rb_assert!(r#"c == "🦀""#, c);
548 /// ```
549 #[cfg_attr(
550 not(feature = "old-api"),
551 deprecated(note = "please use `Ruby::chr` instead")
552 )]
553 #[cfg_attr(docsrs, doc(cfg(feature = "old-api")))]
554 #[inline]
555 pub fn chr<T>(code: u32, enc: T) -> Result<Self, Error>
556 where
557 T: Into<RbEncoding>,
558 {
559 get_ruby!().chr(code, enc)
560 }
561
562 /// Create a new Ruby string that shares the same backing data as `s`.
563 ///
564 /// Both string objects will point at the same underlying data until one is
565 /// modified, and only then will the data be duplicated. This operation is
566 /// cheep, and useful for cases where you may need to modify a string, but
567 /// don't want to mutate a value passed to your function.
568 ///
569 /// # Examples
570 ///
571 /// ```
572 /// use magnus::{rb_assert, Error, RString, Ruby};
573 ///
574 /// fn example(ruby: &Ruby) -> Result<(), Error> {
575 /// let s = ruby.str_new("example");
576 /// let dup = RString::new_shared(s);
577 /// rb_assert!(ruby, "s == dup", s, dup);
578 /// // mutating one doesn't mutate both
579 /// dup.cat("foo");
580 /// rb_assert!(ruby, "s != dup", s, dup);
581 ///
582 /// Ok(())
583 /// }
584 /// # Ruby::init(example).unwrap()
585 /// ```
586 pub fn new_shared(s: Self) -> Self {
587 unsafe { Self::from_rb_value_unchecked(rb_str_new_shared(s.as_rb_value())) }
588 }
589
590 /// Create a new Ruby string that is a frozen copy of `s`.
591 ///
592 /// This can be used to get a copy of a string that is guaranteed not to be
593 /// modified while you are referencing it.
594 ///
595 /// # Examples
596 ///
597 /// ```
598 /// use magnus::{rb_assert, Error, RString, Ruby};
599 ///
600 /// fn example(ruby: &Ruby) -> Result<(), Error> {
601 /// let orig = ruby.str_new("example");
602 /// let frozen = RString::new_frozen(orig);
603 /// rb_assert!(ruby, r#"frozen == "example""#, frozen);
604 /// // mutating original doesn't impact the frozen copy
605 /// orig.cat("foo");
606 /// rb_assert!(ruby, r#"frozen == "example""#, frozen);
607 ///
608 /// Ok(())
609 /// }
610 /// # Ruby::init(example).unwrap()
611 /// ```
612 pub fn new_frozen(s: Self) -> Self {
613 unsafe { Self::from_rb_value_unchecked(rb_str_new_frozen(s.as_rb_value())) }
614 }
615
616 /// Return `self` as a slice of bytes.
617 ///
618 /// # Safety
619 ///
620 /// This is directly viewing memory owned and managed by Ruby. Ruby may
621 /// modify or free the memory backing the returned slice, the caller must
622 /// ensure this does not happen.
623 ///
624 /// Ruby must not be allowed to garbage collect or modify `self` while a
625 /// reference to the slice is held.
626 ///
627 /// # Examples
628 ///
629 /// ```
630 /// use magnus::{Error, Ruby};
631 ///
632 /// fn example(ruby: &Ruby) -> Result<(), Error> {
633 /// let s = ruby.str_new("example");
634 /// // safe as we don't give Ruby the chance to mess with the string while
635 /// // we hold a reference to the slice.
636 /// unsafe { assert_eq!(s.as_slice(), [101, 120, 97, 109, 112, 108, 101]) };
637 ///
638 /// Ok(())
639 /// }
640 /// # Ruby::init(example).unwrap()
641 /// ```
642 pub unsafe fn as_slice(&self) -> &[u8] {
643 self.as_slice_unconstrained()
644 }
645
646 unsafe fn as_slice_unconstrained<'a>(self) -> &'a [u8] {
647 debug_assert_value!(self);
648 slice::from_raw_parts(
649 RSTRING_PTR(self.as_rb_value()) as *const u8,
650 RSTRING_LEN(self.as_rb_value()) as _,
651 )
652 }
653
654 /// Return an iterator over `self`'s codepoints.
655 ///
656 /// # Safety
657 ///
658 /// The returned iterator references memory owned and managed by Ruby. Ruby
659 /// may modify or free that memory, the caller must ensure this does not
660 /// happen at any time while still holding a reference to the iterator.
661 ///
662 /// # Examples
663 ///
664 /// ```
665 /// use magnus::{Error, RString, Ruby};
666 ///
667 /// fn example(ruby: &Ruby) -> Result<(), Error> {
668 /// let s = ruby.str_new("🦀 café");
669 ///
670 /// let codepoints = unsafe {
671 /// // ensure string isn't mutated during iteration by creating a
672 /// // frozen copy and iterating over that
673 /// let f = RString::new_frozen(s);
674 /// f.codepoints().collect::<Result<Vec<_>, Error>>()?
675 /// };
676 ///
677 /// assert_eq!(codepoints, [129408, 32, 99, 97, 102, 233]);
678 ///
679 /// Ok(())
680 /// }
681 /// # Ruby::init(example).unwrap()
682 /// ```
683 pub unsafe fn codepoints(&self) -> Codepoints<'_> {
684 Codepoints {
685 slice: self.as_slice(),
686 encoding: self.enc_get().into(),
687 }
688 }
689
690 /// Return an iterator over `self`'s chars as slices of bytes.
691 ///
692 /// # Safety
693 ///
694 /// The returned iterator references memory owned and managed by Ruby. Ruby
695 /// may modify or free that memory, the caller must ensure this does not
696 /// happen at any time while still holding a reference to the iterator.
697 ///
698 /// # Examples
699 ///
700 /// ```
701 /// use magnus::{Error, RString, Ruby};
702 ///
703 /// fn example(ruby: &Ruby) -> Result<(), Error> {
704 /// let s = ruby.str_new("🦀 café");
705 ///
706 /// // ensure string isn't mutated during iteration by creating a frozen
707 /// // copy and iterating over that
708 /// let f = RString::new_frozen(s);
709 /// let codepoints = unsafe { f.char_bytes().collect::<Vec<_>>() };
710 ///
711 /// assert_eq!(
712 /// codepoints,
713 /// [
714 /// &[240, 159, 166, 128][..],
715 /// &[32],
716 /// &[99],
717 /// &[97],
718 /// &[102],
719 /// &[195, 169]
720 /// ]
721 /// );
722 ///
723 /// Ok(())
724 /// }
725 /// # Ruby::init(example).unwrap()
726 /// ```
727 pub unsafe fn char_bytes(&self) -> CharBytes<'_> {
728 CharBytes {
729 slice: self.as_slice(),
730 encoding: self.enc_get().into(),
731 }
732 }
733
734 /// Converts a character offset to a byte offset.
735 ///
736 /// # Examples
737 ///
738 /// ```
739 /// use magnus::{Error, Ruby};
740 ///
741 /// fn example(ruby: &Ruby) -> Result<(), Error> {
742 /// let s = ruby.str_new("🌊🦀🏝️");
743 /// assert_eq!(s.offset(1), 4);
744 /// assert_eq!(s.offset(2), 8);
745 ///
746 /// Ok(())
747 /// }
748 /// # Ruby::init(example).unwrap()
749 /// ```
750 pub fn offset(self, pos: usize) -> usize {
751 unsafe { rb_str_offset(self.as_rb_value(), pos as c_long) as usize }
752 }
753
754 /// Returns true if the encoding for this string is UTF-8 or US-ASCII,
755 /// false otherwise.
756 ///
757 /// The encoding on a Ruby String is just a label, it provides no guarantee
758 /// that the String really is valid UTF-8.
759 ///
760 /// # Examples
761 ///
762 /// ```
763 /// use magnus::{eval, Error, RString, Ruby};
764 ///
765 /// fn example(ruby: &Ruby) -> Result<(), Error> {
766 /// let s: RString = eval!(ruby, r#""café""#)?;
767 /// assert!(s.is_utf8_compatible_encoding());
768 ///
769 /// Ok(())
770 /// }
771 /// # Ruby::init(example).unwrap()
772 /// ```
773 ///
774 /// ```
775 /// use magnus::{eval, Error, RString, Ruby};
776 ///
777 /// fn example(ruby: &Ruby) -> Result<(), Error> {
778 /// let s: RString = eval!(ruby, r#""café".encode("ISO-8859-1")"#)?;
779 /// assert!(!s.is_utf8_compatible_encoding());
780 ///
781 /// Ok(())
782 /// }
783 /// # Ruby::init(example).unwrap()
784 /// ```
785 pub fn is_utf8_compatible_encoding(self) -> bool {
786 let handle = Ruby::get_with(self);
787 let encindex = self.enc_get();
788 // us-ascii is a 100% compatible subset of utf8
789 encindex == handle.utf8_encindex() || encindex == handle.usascii_encindex()
790 }
791
792 /// Returns a new string by reencoding `self` from its current encoding to
793 /// the given `enc`.
794 ///
795 /// # Examples
796 ///
797 /// ```
798 /// use magnus::{eval, Error, RString, Ruby};
799 ///
800 /// fn example(ruby: &Ruby) -> Result<(), Error> {
801 /// let s: RString = eval!(ruby, r#""café".encode("ISO-8859-1")"#)?;
802 /// // safe as we don't give Ruby the chance to mess with the string while
803 /// // we hold a reference to the slice.
804 /// unsafe { assert_eq!(s.as_slice(), &[99, 97, 102, 233]) };
805 /// let e = s.conv_enc(ruby.utf8_encoding())?;
806 /// unsafe { assert_eq!(e.as_slice(), &[99, 97, 102, 195, 169]) };
807 ///
808 /// Ok(())
809 /// }
810 /// # Ruby::init(example).unwrap()
811 /// ```
812 pub fn conv_enc<T>(self, enc: T) -> Result<Self, Error>
813 where
814 T: Into<RbEncoding>,
815 {
816 protect(|| unsafe {
817 Self::from_rb_value_unchecked(rb_str_conv_enc(
818 self.as_rb_value(),
819 ptr::null_mut(),
820 enc.into().as_ptr(),
821 ))
822 })
823 }
824
825 /// Returns a string omitting 'broken' parts of the string according to its
826 /// encoding.
827 ///
828 /// If `replacement` is `Some(RString)` and 'broken' portion will be
829 /// replaced with that string. When `replacement` is `None` an encoding
830 /// specific default will be used.
831 ///
832 /// If `self` is not 'broken' and no replacement was made, returns
833 /// `Ok(None)`.
834 ///
835 /// # Examples
836 ///
837 /// ```
838 /// use magnus::{Error, Ruby};
839 ///
840 /// fn example(ruby: &Ruby) -> Result<(), Error> {
841 /// // 156 is invalid for utf-8
842 /// let s = ruby.enc_str_new([156, 57, 57], ruby.utf8_encoding());
843 /// assert_eq!(s.scrub(None)?.unwrap().to_string()?, "�99");
844 /// assert_eq!(
845 /// s.scrub(Some(ruby.str_new("?")))?.unwrap().to_string()?,
846 /// "?99"
847 /// );
848 /// assert_eq!(s.scrub(Some(ruby.str_new("")))?.unwrap().to_string()?, "99");
849 ///
850 /// Ok(())
851 /// }
852 /// # Ruby::init(example).unwrap()
853 /// ```
854 pub fn scrub(self, replacement: Option<Self>) -> Result<Option<Self>, Error> {
855 let val = protect(|| unsafe {
856 Value::new(rb_str_scrub(
857 self.as_rb_value(),
858 replacement
859 .map(|r| r.as_rb_value())
860 .unwrap_or_else(|| Ruby::get_with(self).qnil().as_rb_value()),
861 ))
862 })?;
863 if val.is_nil() {
864 Ok(None)
865 } else {
866 unsafe { Ok(Some(Self(NonZeroValue::new_unchecked(val)))) }
867 }
868 }
869
870 /// Returns the cached coderange value that describes how `self` relates to
871 /// its encoding.
872 ///
873 /// See also [`enc_coderange_scan`](RString::enc_coderange_scan).
874 ///
875 /// # Examples
876 ///
877 /// ```
878 /// use magnus::{encoding::Coderange, prelude::*, Error, Ruby};
879 ///
880 /// fn example(ruby: &Ruby) -> Result<(), Error> {
881 /// // Coderange is unknown on creation.
882 /// let s = ruby.str_new("test");
883 /// assert_eq!(s.enc_coderange(), Coderange::Unknown);
884 ///
885 /// // Methods that operate on the string using the encoding will set the
886 /// // coderange as a side effect.
887 /// let _: usize = s.funcall("length", ())?;
888 /// assert_eq!(s.enc_coderange(), Coderange::SevenBit);
889 ///
890 /// // Operations with two strings with known coderanges will set it
891 /// // appropriately.
892 /// let t = ruby.str_new("🦀");
893 /// let _: usize = t.funcall("length", ())?;
894 /// assert_eq!(t.enc_coderange(), Coderange::Valid);
895 /// s.buf_append(t)?;
896 /// assert_eq!(s.enc_coderange(), Coderange::Valid);
897 ///
898 /// // Operations that modify the string with an unknown coderange will
899 /// // set the coderange back to unknown.
900 /// s.cat([128]);
901 /// assert_eq!(s.enc_coderange(), Coderange::Unknown);
902 ///
903 /// // Which may leave the string with a broken encoding.
904 /// let _: usize = s.funcall("length", ())?;
905 /// assert_eq!(s.enc_coderange(), Coderange::Broken);
906 ///
907 /// Ok(())
908 /// }
909 /// # Ruby::init(example).unwrap()
910 /// ```
911 pub fn enc_coderange(self) -> Coderange {
912 unsafe {
913 transmute(
914 (self.r_basic_unchecked().as_ref().flags
915 & ruby_coderange_type::RUBY_ENC_CODERANGE_MASK as VALUE) as u32,
916 )
917 }
918 }
919
920 /// Scans `self` to establish its coderange.
921 ///
922 /// If the coderange is already known, simply returns the known value.
923 /// See also [`enc_coderange`](RString::enc_coderange).
924 ///
925 /// # Examples
926 ///
927 /// ```
928 /// use magnus::{encoding::Coderange, Error, Ruby};
929 ///
930 /// fn example(ruby: &Ruby) -> Result<(), Error> {
931 /// let s = ruby.str_new("test");
932 /// assert_eq!(s.enc_coderange_scan(), Coderange::SevenBit);
933 ///
934 /// Ok(())
935 /// }
936 /// # Ruby::init(example).unwrap()
937 /// ```
938 pub fn enc_coderange_scan(self) -> Coderange {
939 unsafe { transmute(rb_enc_str_coderange(self.as_rb_value()) as u32) }
940 }
941
942 /// Clear `self`'s cached coderange, setting it to `Unknown`.
943 ///
944 /// # Examples
945 ///
946 /// ```
947 /// use magnus::{encoding::Coderange, prelude::*, Error, Ruby};
948 ///
949 /// fn example(ruby: &Ruby) -> Result<(), Error> {
950 /// let s = ruby.str_new("🦀");
951 /// // trigger setting coderange
952 /// let _: usize = s.funcall("length", ())?;
953 /// assert_eq!(s.enc_coderange(), Coderange::Valid);
954 ///
955 /// s.enc_coderange_clear();
956 /// assert_eq!(s.enc_coderange(), Coderange::Unknown);
957 ///
958 /// Ok(())
959 /// }
960 /// # Ruby::init(example).unwrap()
961 /// ```
962 pub fn enc_coderange_clear(self) {
963 unsafe {
964 self.r_basic_unchecked().as_mut().flags &=
965 !(ruby_coderange_type::RUBY_ENC_CODERANGE_MASK as VALUE)
966 }
967 }
968
969 /// Sets `self`'s cached coderange.
970 ///
971 /// Rather than using the method it is recommended to set the coderange to
972 /// `Unknown` with [`enc_coderange_clear`](RString::enc_coderange_clear)
973 /// and let Ruby determine the coderange lazily when needed.
974 ///
975 /// # Safety
976 ///
977 /// This must be set correctly. `SevenBit` if all codepoints are within
978 /// 0..=127, `Valid` if the string is valid for its encoding, or `Broken`
979 /// if it is not. `Unknown` can be set safely with
980 /// [`enc_coderange_clear`](RString::enc_coderange_clear).
981 ///
982 /// # Examples
983 ///
984 /// ```
985 /// use magnus::{encoding::Coderange, prelude::*, Error, RString, Ruby};
986 ///
987 /// fn crabbify(ruby: &Ruby, s: RString) -> Result<(), Error> {
988 /// if s.enc_get() != ruby.utf8_encindex() {
989 /// return Err(Error::new(
990 /// ruby.exception_encoding_error(),
991 /// "expected utf-8",
992 /// ));
993 /// }
994 /// let original = s.enc_coderange();
995 /// // ::cat() will clear the coderange
996 /// s.cat("🦀");
997 /// // we added a multibyte char, so if we started with `SevenBit` it
998 /// // should be upgraded to `Valid`, and if it was `Valid` it is still
999 /// // `Valid`.
1000 /// if original == Coderange::SevenBit || original == Coderange::Valid {
1001 /// unsafe {
1002 /// s.enc_coderange_set(Coderange::Valid);
1003 /// }
1004 /// }
1005 /// Ok(())
1006 /// }
1007 ///
1008 /// fn example(ruby: &Ruby) -> Result<(), Error> {
1009 /// let s = ruby.str_new("test");
1010 /// // trigger setting coderange
1011 /// let _: usize = s.funcall("length", ())?;
1012 ///
1013 /// crabbify(ruby, s)?;
1014 /// assert_eq!(s.enc_coderange(), Coderange::Valid);
1015 ///
1016 /// Ok(())
1017 /// }
1018 /// # Ruby::init(example).unwrap()
1019 /// ```
1020 pub unsafe fn enc_coderange_set(self, cr: Coderange) {
1021 self.enc_coderange_clear();
1022 self.r_basic_unchecked().as_mut().flags |= cr as VALUE;
1023 }
1024
1025 /// Returns a Rust `&str` reference to the value of `self`.
1026 ///
1027 /// Returns `None` if `self`'s encoding is not UTF-8 (or US-ASCII), or if
1028 /// the string is not valid UTF-8.
1029 ///
1030 /// # Safety
1031 ///
1032 /// This is directly viewing memory owned and managed by Ruby. Ruby may
1033 /// modify or free the memory backing the returned str, the caller must
1034 /// ensure this does not happen.
1035 ///
1036 /// Ruby must not be allowed to garbage collect or modify `self` while a
1037 /// reference to the str is held.
1038 ///
1039 /// # Examples
1040 ///
1041 /// ```
1042 /// use magnus::{Error, Ruby};
1043 ///
1044 /// fn example(ruby: &Ruby) -> Result<(), Error> {
1045 /// let s = ruby.str_new("example");
1046 /// // safe as we don't give Ruby the chance to mess with the string while
1047 /// // we hold a reference to the slice.
1048 /// unsafe { assert_eq!(s.test_as_str().unwrap(), "example") };
1049 ///
1050 /// Ok(())
1051 /// }
1052 /// # Ruby::init(example).unwrap()
1053 /// ```
1054 pub unsafe fn test_as_str(&self) -> Option<&str> {
1055 self.test_as_str_unconstrained()
1056 }
1057
1058 /// Returns a Rust `&str` reference to the value of `self`.
1059 ///
1060 /// Errors if `self`'s encoding is not UTF-8 (or US-ASCII), or if the
1061 /// string is not valid UTF-8.
1062 ///
1063 /// # Safety
1064 ///
1065 /// This is directly viewing memory owned and managed by Ruby. Ruby may
1066 /// modify or free the memory backing the returned str, the caller must
1067 /// ensure this does not happen.
1068 ///
1069 /// Ruby must not be allowed to garbage collect or modify `self` while a
1070 /// reference to the str is held.
1071 ///
1072 /// # Examples
1073 ///
1074 /// ```
1075 /// use magnus::{Error, Ruby};
1076 ///
1077 /// fn example(ruby: &Ruby) -> Result<(), Error> {
1078 /// let s = ruby.str_new("example");
1079 /// // safe as we don't give Ruby the chance to mess with the string while
1080 /// // we hold a reference to the slice.
1081 /// unsafe { assert_eq!(s.as_str()?, "example") };
1082 ///
1083 /// Ok(())
1084 /// }
1085 /// # Ruby::init(example).unwrap()
1086 /// ```
1087 pub unsafe fn as_str(&self) -> Result<&str, Error> {
1088 self.as_str_unconstrained()
1089 }
1090
1091 unsafe fn test_as_str_unconstrained<'a>(self) -> Option<&'a str> {
1092 let handle = Ruby::get_with(self);
1093 let enc = self.enc_get();
1094 let cr = self.enc_coderange_scan();
1095 ((self.is_utf8_compatible_encoding()
1096 && (cr == Coderange::SevenBit || cr == Coderange::Valid))
1097 || (enc == handle.ascii8bit_encindex() && cr == Coderange::SevenBit))
1098 .then(|| str::from_utf8_unchecked(self.as_slice_unconstrained()))
1099 }
1100
1101 unsafe fn as_str_unconstrained<'a>(self) -> Result<&'a str, Error> {
1102 self.test_as_str_unconstrained().ok_or_else(|| {
1103 let msg: Cow<'static, str> = if self.is_utf8_compatible_encoding() {
1104 format!(
1105 "expected utf-8, got {}",
1106 RbEncoding::from(self.enc_get()).name()
1107 )
1108 .into()
1109 } else {
1110 "invalid byte sequence in UTF-8".into()
1111 };
1112 Error::new(Ruby::get_with(self).exception_encoding_error(), msg)
1113 })
1114 }
1115
1116 /// Returns `self` as a Rust string, ignoring the Ruby encoding and
1117 /// dropping any non-UTF-8 characters. If `self` is valid UTF-8 this will
1118 /// return a `&str` reference.
1119 ///
1120 /// # Safety
1121 ///
1122 /// This may return a direct view of memory owned and managed by Ruby. Ruby
1123 /// may modify or free the memory backing the returned str, the caller must
1124 /// ensure this does not happen.
1125 ///
1126 /// Ruby must not be allowed to garbage collect or modify `self` while a
1127 /// reference to the str is held.
1128 ///
1129 /// # Examples
1130 ///
1131 /// ```
1132 /// use magnus::{Error, Ruby};
1133 ///
1134 /// fn example(ruby: &Ruby) -> Result<(), Error> {
1135 /// let s = ruby.str_new("example");
1136 /// // safe as we don't give Ruby the chance to mess with the string while
1137 /// // we hold a reference to the slice.
1138 /// unsafe { assert_eq!(s.to_string_lossy(), "example") };
1139 ///
1140 /// Ok(())
1141 /// }
1142 /// # Ruby::init(example).unwrap()
1143 /// ```
1144 #[allow(clippy::wrong_self_convention)]
1145 pub unsafe fn to_string_lossy(&self) -> Cow<'_, str> {
1146 String::from_utf8_lossy(self.as_slice())
1147 }
1148
1149 /// Returns `self` as an owned Rust `String`. The Ruby string will be
1150 /// reencoded as UTF-8 if required. Errors if the string can not be encoded
1151 /// as UTF-8.
1152 ///
1153 /// # Examples
1154 ///
1155 /// ```
1156 /// use magnus::{Error, Ruby};
1157 ///
1158 /// fn example(ruby: &Ruby) -> Result<(), Error> {
1159 /// let s = ruby.str_new("example");
1160 /// assert_eq!(s.to_string()?, "example");
1161 ///
1162 /// Ok(())
1163 /// }
1164 /// # Ruby::init(example).unwrap()
1165 /// ```
1166 pub fn to_string(self) -> Result<String, Error> {
1167 let handle = Ruby::get_with(self);
1168 unsafe {
1169 if let Some(str) = self.test_as_str() {
1170 Ok(str.to_owned())
1171 } else {
1172 Ok(self.conv_enc(handle.utf8_encoding())?.as_str()?.to_owned())
1173 }
1174 }
1175 }
1176
1177 /// Returns `self` as an owned Rust `Bytes`.
1178 ///
1179 /// # Examples
1180 ///
1181 /// ```
1182 /// use bytes::Bytes;
1183 /// use magnus::{Error, Ruby};
1184 ///
1185 /// fn example(ruby: &Ruby) -> Result<(), Error> {
1186 /// let s = ruby.str_new("example");
1187 /// assert_eq!(s.to_bytes(), Bytes::from("example"));
1188 ///
1189 /// Ok(())
1190 /// }
1191 /// # Ruby::init(example).unwrap()
1192 /// ```
1193 #[cfg_attr(docsrs, doc(cfg(feature = "bytes")))]
1194 #[cfg(feature = "bytes")]
1195 pub fn to_bytes(self) -> bytes::Bytes {
1196 let vec = unsafe { self.as_slice().to_vec() };
1197 vec.into()
1198 }
1199
1200 /// Converts `self` to a [`char`]. Errors if the string is more than one
1201 /// character or can not be encoded as UTF-8.
1202 ///
1203 /// # Examples
1204 ///
1205 /// ```
1206 /// use magnus::{Error, Ruby};
1207 ///
1208 /// fn example(ruby: &Ruby) -> Result<(), Error> {
1209 /// let s = ruby.str_new("a");
1210 /// assert_eq!(s.to_char()?, 'a');
1211 ///
1212 /// Ok(())
1213 /// }
1214 /// # Ruby::init(example).unwrap()
1215 /// ```
1216 pub fn to_char(self) -> Result<char, Error> {
1217 let handle = Ruby::get_with(self);
1218 let utf8 = if self.is_utf8_compatible_encoding() {
1219 self
1220 } else {
1221 self.conv_enc(handle.utf8_encoding())?
1222 };
1223 unsafe {
1224 str::from_utf8(utf8.as_slice())
1225 .map_err(|e| Error::new(handle.exception_encoding_error(), format!("{}", e)))?
1226 .parse()
1227 .map_err(|e| {
1228 Error::new(
1229 handle.exception_type_error(),
1230 format!("could not convert string to char, {}", e),
1231 )
1232 })
1233 }
1234 }
1235
1236 /// Returns a quoted version of the `self`.
1237 ///
1238 /// This can be thought of as the opposite of `eval`. A string returned
1239 /// from `dump` can be safely passed to `eval` and will result in a string
1240 /// with the exact same contents as the original.
1241 ///
1242 /// # Examples
1243 ///
1244 /// ```
1245 /// use magnus::{Error, Ruby};
1246 ///
1247 /// fn example(ruby: &Ruby) -> Result<(), Error> {
1248 /// let s = ruby.str_new("🦀 café");
1249 /// assert_eq!(s.dump()?.to_string()?, r#""\u{1F980} caf\u00E9""#);
1250 ///
1251 /// Ok(())
1252 /// }
1253 /// # Ruby::init(example).unwrap()
1254 /// ```
1255 pub fn dump(self) -> Result<Self, Error> {
1256 protect(|| unsafe { RString::from_rb_value_unchecked(rb_str_dump(self.as_rb_value())) })
1257 }
1258
1259 /// Returns whether `self` is a frozen interned string. Interned strings
1260 /// are usually string literals with the in files with the
1261 /// `# frozen_string_literal: true` 'magic comment'.
1262 ///
1263 /// # Examples
1264 ///
1265 /// ```
1266 /// use magnus::{eval, Error, RString, Ruby};
1267 ///
1268 /// fn example(ruby: &Ruby) -> Result<(), Error> {
1269 /// let s: RString = eval!(
1270 /// ruby,
1271 /// r#"
1272 /// ## frozen_string_literal: true
1273 ///
1274 /// "example"
1275 /// "#
1276 /// )?;
1277 /// assert!(s.is_interned());
1278 ///
1279 /// Ok(())
1280 /// }
1281 /// # Ruby::init(example).unwrap()
1282 /// ```
1283 ///
1284 /// ```
1285 /// use magnus::{eval, Error, RString, Ruby};
1286 ///
1287 /// fn example(ruby: &Ruby) -> Result<(), Error> {
1288 /// let s: RString = eval!(ruby, r#""example""#)?;
1289 /// assert!(!s.is_interned());
1290 ///
1291 /// Ok(())
1292 /// }
1293 /// # Ruby::init(example).unwrap()
1294 /// ```
1295 pub fn is_interned(self) -> bool {
1296 unsafe {
1297 self.r_basic_unchecked().as_ref().flags & ruby_rstring_flags::RSTRING_FSTR as VALUE != 0
1298 }
1299 }
1300
1301 /// Interns `self`, returning an interned string.
1302 ///
1303 /// Finds or creates an interned string, modifying `self` so that its
1304 /// parent is the returned string.
1305 ///
1306 /// Interned strings with the same value will use the same backing memory.
1307 ///
1308 /// # Examples
1309 ///
1310 /// ```
1311 /// use magnus::{rb_assert, Error, Ruby};
1312 ///
1313 /// fn example(ruby: &Ruby) -> Result<(), Error> {
1314 /// let example = ruby.str_new("example");
1315 /// let interned = example.to_interned_str();
1316 /// rb_assert!("interned == example", interned, example);
1317 ///
1318 /// Ok(())
1319 /// }
1320 /// # Ruby::init(example).unwrap()
1321 /// ```
1322 #[cfg(any(ruby_gte_3_0, docsrs))]
1323 #[cfg_attr(docsrs, doc(cfg(ruby_gte_3_0)))]
1324 pub fn to_interned_str(self) -> RString {
1325 unsafe { RString::from_rb_value_unchecked(rb_str_to_interned_str(self.as_rb_value())) }
1326 }
1327
1328 /// Mutate `self`, adding `other` to the end. Errors if `self` and
1329 /// `other`'s encodings are not compatible.
1330 ///
1331 /// # Examples
1332 ///
1333 /// ```
1334 /// use magnus::{Error, Ruby};
1335 ///
1336 /// fn example(ruby: &Ruby) -> Result<(), Error> {
1337 /// let a = ruby.str_new("foo");
1338 /// let b = ruby.str_new("bar");
1339 /// a.buf_append(b)?;
1340 /// assert_eq!(a.to_string()?, "foobar");
1341 ///
1342 /// Ok(())
1343 /// }
1344 /// # Ruby::init(example).unwrap()
1345 /// ```
1346 pub fn buf_append(self, other: Self) -> Result<(), Error> {
1347 protect(|| unsafe {
1348 Value::new(rb_str_buf_append(self.as_rb_value(), other.as_rb_value()))
1349 })?;
1350 Ok(())
1351 }
1352
1353 /// Mutate `self`, adding `buf` to the end.
1354 ///
1355 /// Note: This ignore's `self`'s encoding, and may result in `self`
1356 /// containing invalid bytes for its encoding. It's assumed this will more
1357 /// often be used with ASCII-8BIT (aka BINARY) encoded strings. See
1358 /// [`buf_new`](RString::buf_new) and [`from_slice`](RString::from_slice).
1359 ///
1360 /// # Examples
1361 ///
1362 /// ```
1363 /// use magnus::{Error, Ruby};
1364 ///
1365 /// fn example(ruby: &Ruby) -> Result<(), Error> {
1366 /// let buf = ruby.str_buf_new(4096);
1367 /// buf.cat(&[102, 111, 111]);
1368 /// buf.cat("bar");
1369 /// assert_eq!(buf.to_string()?, "foobar");
1370 ///
1371 /// Ok(())
1372 /// }
1373 /// # Ruby::init(example).unwrap()
1374 /// ```
1375 pub fn cat<T: AsRef<[u8]>>(self, buf: T) {
1376 let buf = buf.as_ref();
1377 let len = buf.len();
1378 let ptr = buf.as_ptr();
1379 unsafe {
1380 rb_str_cat(self.as_rb_value(), ptr as *const c_char, len as c_long);
1381 }
1382 }
1383
1384 /// Replace the contents and encoding of `self` with those of `other`.
1385 ///
1386 /// # Examples
1387 ///
1388 /// ```
1389 /// use magnus::{Error, Ruby};
1390 ///
1391 /// fn example(ruby: &Ruby) -> Result<(), Error> {
1392 /// let a = ruby.str_new("foo");
1393 /// let b = ruby.str_new("bar");
1394 /// a.replace(b)?;
1395 /// assert_eq!(a.to_string()?, "bar");
1396 ///
1397 /// Ok(())
1398 /// }
1399 /// # Ruby::init(example).unwrap()
1400 /// ```
1401 pub fn replace(self, other: Self) -> Result<(), Error> {
1402 protect(|| {
1403 unsafe { rb_str_replace(self.as_rb_value(), other.as_rb_value()) };
1404 Ruby::get_with(self).qnil()
1405 })?;
1406 Ok(())
1407 }
1408
1409 /// Modify `self` to share the same backing data as `other`.
1410 ///
1411 /// Both string objects will point at the same underlying data until one is
1412 /// modified, and only then will the data be duplicated.
1413 ///
1414 /// See also [`replace`](RString::replace) and
1415 /// [`new_shared`](RString::new_shared).
1416 ///
1417 /// # Examples
1418 ///
1419 /// ```
1420 /// use magnus::{Error, Ruby};
1421 ///
1422 /// fn example(ruby: &Ruby) -> Result<(), Error> {
1423 /// let a = ruby.str_new("foo");
1424 /// let b = ruby.str_new("bar");
1425 /// a.shared_replace(b)?;
1426 /// assert_eq!(a.to_string()?, "bar");
1427 /// // mutating one doesn't mutate both
1428 /// b.cat("foo");
1429 /// assert_eq!(a.to_string()?, "bar");
1430 ///
1431 /// Ok(())
1432 /// }
1433 /// # Ruby::init(example).unwrap()
1434 /// ```
1435 pub fn shared_replace(self, other: Self) -> Result<(), Error> {
1436 protect(|| {
1437 unsafe { rb_str_shared_replace(self.as_rb_value(), other.as_rb_value()) };
1438 Ruby::get_with(self).qnil()
1439 })?;
1440 Ok(())
1441 }
1442
1443 /// Replace a portion of `self` with `other`.
1444 ///
1445 /// `beg` is the offset of the portion of `self` to replace. Negative
1446 /// values offset from the end of the string.
1447 /// `len` is the length of the portion of `self` to replace. It does not
1448 /// need to match the length of `other`, `self` will be expanded or
1449 /// contracted as needed to accommodate `other`.
1450 ///
1451 /// # Examples
1452 ///
1453 /// ```
1454 /// use magnus::{Error, Ruby};
1455 ///
1456 /// fn example(ruby: &Ruby) -> Result<(), Error> {
1457 /// let s = ruby.str_new("foo");
1458 /// s.update(-1, 1, ruby.str_new("x"))?;
1459 /// assert_eq!(s.to_string()?, "fox");
1460 ///
1461 /// let s = ruby.str_new("splat");
1462 /// s.update(0, 3, ruby.str_new("b"))?;
1463 /// assert_eq!(s.to_string()?, "bat");
1464 ///
1465 /// let s = ruby.str_new("corncob");
1466 /// s.update(1, 5, ruby.str_new("ra"))?;
1467 /// assert_eq!(s.to_string()?, "crab");
1468 ///
1469 /// Ok(())
1470 /// }
1471 /// # Ruby::init(example).unwrap()
1472 /// ```
1473 pub fn update(self, beg: isize, len: usize, other: Self) -> Result<(), Error> {
1474 protect(|| {
1475 unsafe {
1476 rb_str_update(
1477 self.as_rb_value(),
1478 beg as c_long,
1479 len as c_long,
1480 other.as_rb_value(),
1481 )
1482 };
1483 Ruby::get_with(self).qnil()
1484 })?;
1485 Ok(())
1486 }
1487
1488 /// Create a new string by appending `other` to `self`.
1489 ///
1490 /// # Examples
1491 ///
1492 /// ```
1493 /// use magnus::{Error, Ruby};
1494 ///
1495 /// fn example(ruby: &Ruby) -> Result<(), Error> {
1496 /// let a = ruby.str_new("foo");
1497 /// let b = ruby.str_new("bar");
1498 /// assert_eq!(a.plus(b)?.to_string()?, "foobar");
1499 /// assert_eq!(a.to_string()?, "foo");
1500 /// assert_eq!(b.to_string()?, "bar");
1501 ///
1502 /// Ok(())
1503 /// }
1504 /// # Ruby::init(example).unwrap()
1505 /// ```
1506 pub fn plus(self, other: Self) -> Result<Self, Error> {
1507 protect(|| unsafe {
1508 Self::from_rb_value_unchecked(rb_str_plus(self.as_rb_value(), other.as_rb_value()))
1509 })
1510 }
1511
1512 /// Create a new string by repeating `self` `num` times.
1513 ///
1514 /// # Examples
1515 ///
1516 /// ```
1517 /// use magnus::{Error, Ruby};
1518 ///
1519 /// fn example(ruby: &Ruby) -> Result<(), Error> {
1520 /// assert_eq!(ruby.str_new("foo").times(3).to_string()?, "foofoofoo");
1521 ///
1522 /// Ok(())
1523 /// }
1524 /// # Ruby::init(example).unwrap()
1525 /// ```
1526 pub fn times(self, num: usize) -> Self {
1527 let num = Ruby::get_with(self).into_value(num);
1528 unsafe {
1529 Self::from_rb_value_unchecked(rb_str_times(self.as_rb_value(), num.as_rb_value()))
1530 }
1531 }
1532
1533 /// Shrink `self` by `len` bytes.
1534 ///
1535 /// # Examples
1536 ///
1537 /// ```
1538 /// use magnus::{Error, Ruby};
1539 ///
1540 /// fn example(ruby: &Ruby) -> Result<(), Error> {
1541 /// let s = ruby.str_new("foobar");
1542 /// s.drop_bytes(3)?;
1543 /// assert_eq!(s.to_string()?, "bar");
1544 ///
1545 /// Ok(())
1546 /// }
1547 /// # Ruby::init(example).unwrap()
1548 /// ```
1549 pub fn drop_bytes(self, len: usize) -> Result<(), Error> {
1550 protect(|| {
1551 unsafe { rb_str_drop_bytes(self.as_rb_value(), len as c_long) };
1552 Ruby::get_with(self).qnil()
1553 })?;
1554 Ok(())
1555 }
1556
1557 /// Returns the number of bytes in `self`.
1558 ///
1559 /// See also [`length`](RString::length).
1560 ///
1561 /// # Examples
1562 ///
1563 /// ```
1564 /// use magnus::{Error, Ruby};
1565 ///
1566 /// fn example(ruby: &Ruby) -> Result<(), Error> {
1567 /// let s = ruby.str_new("🦀 Hello, Ferris");
1568 /// assert_eq!(s.len(), 18);
1569 ///
1570 /// Ok(())
1571 /// }
1572 /// # Ruby::init(example).unwrap()
1573 /// ```
1574 pub fn len(self) -> usize {
1575 debug_assert_value!(self);
1576 unsafe { RSTRING_LEN(self.as_rb_value()) as usize }
1577 }
1578
1579 /// Returns the number of characters in `self`.
1580 ///
1581 /// See also [`len`](RString::len).
1582 ///
1583 /// # Examples
1584 ///
1585 /// ```
1586 /// use magnus::{Error, Ruby};
1587 ///
1588 /// fn example(ruby: &Ruby) -> Result<(), Error> {
1589 /// let s = ruby.str_new("🦀 Hello, Ferris");
1590 /// assert_eq!(s.length(), 15);
1591 ///
1592 /// Ok(())
1593 /// }
1594 /// # Ruby::init(example).unwrap()
1595 /// ```
1596 pub fn length(self) -> usize {
1597 unsafe { rb_str_strlen(self.as_rb_value()) as usize }
1598 }
1599
1600 /// Returns the capacity of `self`.
1601 ///
1602 /// # Examples
1603 ///
1604 /// ```
1605 /// use magnus::{Error, Ruby};
1606 ///
1607 /// fn example(ruby: &Ruby) -> Result<(), Error> {
1608 /// let s = ruby.str_with_capacity(9);
1609 /// s.cat("foo");
1610 /// assert_eq!(3, s.len());
1611 /// assert!(s.capacity() >= 9);
1612 ///
1613 /// Ok(())
1614 /// }
1615 /// # Ruby::init(example).unwrap()
1616 /// ```
1617 pub fn capacity(self) -> usize {
1618 unsafe { rb_str_capacity(self.as_rb_value()) as usize }
1619 }
1620
1621 /// Return whether self contains any characters or not.
1622 ///
1623 /// # Examples
1624 ///
1625 /// ```
1626 /// use magnus::{Error, Ruby};
1627 ///
1628 /// fn example(ruby: &Ruby) -> Result<(), Error> {
1629 /// let s = ruby.str_new("");
1630 /// assert!(s.is_empty());
1631 ///
1632 /// Ok(())
1633 /// }
1634 /// # Ruby::init(example).unwrap()
1635 /// ```
1636 pub fn is_empty(self) -> bool {
1637 self.len() == 0
1638 }
1639
1640 /// Compares `self` with `other` to establish an ordering.
1641 ///
1642 /// # Examples
1643 ///
1644 /// ```
1645 /// use std::cmp::Ordering;
1646 ///
1647 /// use magnus::{Error, Ruby};
1648 ///
1649 /// fn example(ruby: &Ruby) -> Result<(), Error> {
1650 /// let a = ruby.str_new("a");
1651 /// let b = ruby.str_new("b");
1652 /// assert_eq!(Ordering::Less, a.cmp(b));
1653 ///
1654 /// Ok(())
1655 /// }
1656 /// # Ruby::init(example).unwrap()
1657 /// ```
1658 ///
1659 /// Note that `std::cmp::Ordering` can be cast to `i{8,16,32,64,size}` to
1660 /// get the Ruby standard `-1`/`0`/`+1` for comparison results.
1661 ///
1662 /// ```
1663 /// assert_eq!(std::cmp::Ordering::Less as i64, -1);
1664 /// assert_eq!(std::cmp::Ordering::Equal as i64, 0);
1665 /// assert_eq!(std::cmp::Ordering::Greater as i64, 1);
1666 /// ```
1667 #[allow(clippy::should_implement_trait)]
1668 pub fn cmp(self, other: Self) -> Ordering {
1669 unsafe { rb_str_cmp(self.as_rb_value(), other.as_rb_value()) }.cmp(&0)
1670 }
1671
1672 /// Returns whether there is a total order of strings in the encodings of
1673 /// `self` and `other`.
1674 ///
1675 /// If this function returns `true` for `self` and `other` then the
1676 /// ordering returned from [`cmp`](RString::cmp) for those strings is
1677 /// 'correct'. If `false`, while stable, the ordering may not follow
1678 /// established rules.
1679 pub fn comparable(self, other: Self) -> bool {
1680 unsafe { rb_str_comparable(self.as_rb_value(), other.as_rb_value()) != 0 }
1681 }
1682
1683 /// Shorten `self` to `len`, adding "...".
1684 ///
1685 /// If `self` is shorter than `len` the returned value will be `self`.
1686 ///
1687 /// # Examples
1688 ///
1689 /// ```
1690 /// use magnus::{Error, Ruby};
1691 ///
1692 /// fn example(ruby: &Ruby) -> Result<(), Error> {
1693 /// let s = ruby.str_new("foobarbaz");
1694 /// assert_eq!(s.ellipsize(6).to_string()?, "foo...");
1695 ///
1696 /// Ok(())
1697 /// }
1698 /// # Ruby::init(example).unwrap()
1699 /// ```
1700 pub fn ellipsize(self, len: usize) -> Self {
1701 unsafe {
1702 RString::from_rb_value_unchecked(rb_str_ellipsize(self.as_rb_value(), len as c_long))
1703 }
1704 }
1705
1706 /// Split `self` around the given delimiter.
1707 ///
1708 /// If `delim` is an empty string then `self` is split into characters.
1709 /// If `delim` is solely whitespace then `self` is split around whitespace,
1710 /// with leading, trailing, and runs of contiguous whitespace ignored.
1711 /// Otherwise, `self` is split around `delim`.
1712 ///
1713 /// # Examples
1714 ///
1715 /// ```
1716 /// use magnus::{prelude::*, Error, Ruby};
1717 ///
1718 /// fn example(ruby: &Ruby) -> Result<(), Error> {
1719 /// let s = ruby.str_new(" foo bar baz ");
1720 /// assert_eq!(
1721 /// Vec::<String>::try_convert(s.split("").as_value())?,
1722 /// vec![" ", "f", "o", "o", " ", " ", "b", "a", "r", " ", " ", "b", "a", "z", " "]
1723 /// );
1724 /// assert_eq!(
1725 /// Vec::<String>::try_convert(s.split(" ").as_value())?,
1726 /// vec!["foo", "bar", "baz"]
1727 /// );
1728 /// assert_eq!(
1729 /// Vec::<String>::try_convert(s.split(" bar ").as_value())?,
1730 /// vec![" foo ", " baz "]
1731 /// );
1732 ///
1733 /// Ok(())
1734 /// }
1735 /// # Ruby::init(example).unwrap()
1736 /// ```
1737 pub fn split(self, delim: &str) -> RArray {
1738 let delim = CString::new(delim).unwrap();
1739 unsafe { RArray::from_rb_value_unchecked(rb_str_split(self.as_rb_value(), delim.as_ptr())) }
1740 }
1741}
1742
1743impl fmt::Display for RString {
1744 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1745 write!(f, "{}", unsafe { self.to_s_infallible() })
1746 }
1747}
1748
1749impl fmt::Debug for RString {
1750 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1751 write!(f, "{}", self.inspect())
1752 }
1753}
1754
1755impl EncodingCapable for RString {}
1756
1757impl io::Write for RString {
1758 fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
1759 let len = buf.len();
1760 self.cat(buf);
1761 Ok(len)
1762 }
1763
1764 fn flush(&mut self) -> io::Result<()> {
1765 Ok(())
1766 }
1767}
1768
1769/// Conversions from Rust types into [`RString`].
1770pub trait IntoRString: Sized {
1771 /// Convert `self` into [`RString`].
1772 ///
1773 /// # Panics
1774 ///
1775 /// Panics if called from a non-Ruby thread. See
1776 /// [`IntoRString::into_r_string_with`] for the non-panicking version.
1777 #[cfg_attr(
1778 not(feature = "old-api"),
1779 deprecated(note = "please use `IntoRString::into_r_string_with` instead")
1780 )]
1781 #[cfg_attr(docsrs, doc(cfg(feature = "old-api")))]
1782 #[inline]
1783 fn into_r_string(self) -> RString {
1784 self.into_r_string_with(&get_ruby!())
1785 }
1786
1787 /// Convert `self` into [`RString`].
1788 ///
1789 /// # Safety
1790 ///
1791 /// This method should only be called from a Ruby thread.
1792 unsafe fn into_r_string_unchecked(self) -> RString {
1793 self.into_r_string_with(&Ruby::get_unchecked())
1794 }
1795
1796 /// Convert `self` into [`RString`].
1797 fn into_r_string_with(self, handle: &Ruby) -> RString;
1798}
1799
1800impl IntoRString for RString {
1801 fn into_r_string_with(self, _: &Ruby) -> RString {
1802 self
1803 }
1804}
1805
1806impl IntoRString for &str {
1807 fn into_r_string_with(self, handle: &Ruby) -> RString {
1808 handle.str_new(self)
1809 }
1810}
1811
1812impl IntoRString for String {
1813 fn into_r_string_with(self, handle: &Ruby) -> RString {
1814 handle.str_new(&self)
1815 }
1816}
1817
1818#[cfg(unix)]
1819impl IntoRString for &Path {
1820 fn into_r_string_with(self, handle: &Ruby) -> RString {
1821 use std::os::unix::ffi::OsStrExt;
1822 handle.str_from_slice(self.as_os_str().as_bytes())
1823 }
1824}
1825
1826#[cfg(windows)]
1827impl IntoRString for &Path {
1828 fn into_r_string_with(self, handle: &Ruby) -> RString {
1829 use std::os::windows::ffi::OsStrExt;
1830 if let Some(utf16) = handle.find_encoding("UTF-16LE") {
1831 let bytes: Vec<u8> = self
1832 .as_os_str()
1833 .encode_wide()
1834 .flat_map(|c| c.to_le_bytes())
1835 .collect();
1836 handle.enc_str_new(bytes, utf16)
1837 } else {
1838 handle.str_new(self.to_string_lossy().as_ref())
1839 }
1840 }
1841}
1842
1843#[cfg(not(any(unix, windows)))]
1844impl IntoRString for &Path {
1845 fn into_r_string_with(self, handle: &Ruby) -> RString {
1846 handle.str_new(self.to_string_lossy().as_ref())
1847 }
1848}
1849
1850impl IntoRString for PathBuf {
1851 fn into_r_string_with(self, handle: &Ruby) -> RString {
1852 self.as_path().into_r_string_with(handle)
1853 }
1854}
1855
1856impl IntoValue for RString {
1857 #[inline]
1858 fn into_value_with(self, _: &Ruby) -> Value {
1859 self.0.get()
1860 }
1861}
1862
1863impl IntoValue for &str {
1864 #[inline]
1865 fn into_value_with(self, handle: &Ruby) -> Value {
1866 handle.str_new(self).into_value_with(handle)
1867 }
1868}
1869
1870unsafe impl IntoValueFromNative for &str {}
1871
1872#[cfg(feature = "bytes")]
1873impl IntoValue for bytes::Bytes {
1874 #[inline]
1875 fn into_value_with(self, handle: &Ruby) -> Value {
1876 handle.str_from_slice(self.as_ref()).into_value_with(handle)
1877 }
1878}
1879
1880impl IntoValue for String {
1881 #[inline]
1882 fn into_value_with(self, handle: &Ruby) -> Value {
1883 handle.str_new(self.as_str()).into_value_with(handle)
1884 }
1885}
1886
1887unsafe impl IntoValueFromNative for String {}
1888
1889impl IntoValue for char {
1890 #[inline]
1891 fn into_value_with(self, handle: &Ruby) -> Value {
1892 handle.str_from_char(self).into_value_with(handle)
1893 }
1894}
1895
1896unsafe impl IntoValueFromNative for char {}
1897
1898impl IntoValue for &Path {
1899 #[inline]
1900 fn into_value_with(self, handle: &Ruby) -> Value {
1901 self.into_r_string_with(handle).into_value_with(handle)
1902 }
1903}
1904
1905unsafe impl IntoValueFromNative for &Path {}
1906
1907impl IntoValue for PathBuf {
1908 #[inline]
1909 fn into_value_with(self, handle: &Ruby) -> Value {
1910 self.as_path()
1911 .into_r_string_with(handle)
1912 .into_value_with(handle)
1913 }
1914}
1915
1916unsafe impl IntoValueFromNative for PathBuf {}
1917
1918impl Object for RString {}
1919
1920unsafe impl private::ReprValue for RString {}
1921
1922impl ReprValue for RString {}
1923
1924impl TryConvert for RString {
1925 fn try_convert(val: Value) -> Result<Self, Error> {
1926 match Self::from_value(val) {
1927 Some(i) => Ok(i),
1928 None => protect(|| {
1929 debug_assert_value!(val);
1930 unsafe { Self::from_rb_value_unchecked(rb_str_to_str(val.as_rb_value())) }
1931 }),
1932 }
1933 }
1934}
1935
1936/// An iterator over a Ruby string's codepoints.
1937pub struct Codepoints<'a> {
1938 slice: &'a [u8],
1939 encoding: RbEncoding,
1940}
1941
1942impl Iterator for Codepoints<'_> {
1943 type Item = Result<u32, Error>;
1944
1945 fn next(&mut self) -> Option<Self::Item> {
1946 if self.slice.is_empty() {
1947 return None;
1948 }
1949 match self.encoding.codepoint_len(self.slice) {
1950 Ok((codepoint, len)) => {
1951 self.slice = &self.slice[len..];
1952 Some(Ok(codepoint))
1953 }
1954 Err(e) => {
1955 self.slice = &self.slice[self.slice.len()..];
1956 Some(Err(e))
1957 }
1958 }
1959 }
1960}
1961
1962/// An iterator over a Ruby string's chars as slices of bytes.
1963pub struct CharBytes<'a> {
1964 slice: &'a [u8],
1965 encoding: RbEncoding,
1966}
1967
1968impl<'a> Iterator for CharBytes<'a> {
1969 type Item = &'a [u8];
1970
1971 fn next(&mut self) -> Option<Self::Item> {
1972 if self.slice.is_empty() {
1973 return None;
1974 }
1975 let len = self.encoding.mbclen(self.slice);
1976 let bytes = &self.slice[..len];
1977 self.slice = &self.slice[len..];
1978 Some(bytes)
1979 }
1980}
1981
1982/// Create a [`RString`] from a Rust str literal.
1983///
1984/// # Panics
1985///
1986/// Panics if called from a non-Ruby thread.
1987///
1988/// # Examples
1989///
1990/// ```
1991/// use magnus::{r_string, rb_assert, Error, Ruby};
1992///
1993/// fn example(ruby: &Ruby) -> Result<(), Error> {
1994/// let s = r_string!("Hello, world!");
1995/// rb_assert!(ruby, r#"s == "Hello, world!""#, s);
1996///
1997/// Ok(())
1998/// }
1999/// # Ruby::init(example).unwrap()
2000/// ```
2001#[macro_export]
2002macro_rules! r_string {
2003 ($lit:expr) => {{
2004 $crate::r_string!($crate::Ruby::get().unwrap(), $lit)
2005 }};
2006 ($ruby:expr, $lit:expr) => {{
2007 let s = concat!($lit, "\0");
2008 let len = s.len() - 1;
2009 unsafe { $ruby.str_new_lit(s.as_ptr() as *const _, len as _) }
2010 }};
2011}