Struct RbEncoding

Source

pub struct RbEncoding(/* private fields */);

Expand description

Ruby’s internal encoding type.

This type contains the data for an encoding, and is used with operations such as converting a string from one encoding to another, or reading a string character by character.

See Ruby for methods to get an RbEncoding.

Implementations§

Source §

impl RbEncoding

Source

pub fn ascii8bit() -> Self

Available on crate feature old-api only.

Returns the encoding that represents ASCII-8BIT a.k.a. binary.

§Panics

Panics if called from a non-Ruby thread. See Ruby::ascii8bit_encoding for the non-panicking version.

Source

pub fn utf8() -> Self

Available on crate feature old-api only.

Returns the encoding that represents UTF-8.

§Panics

Panics if called from a non-Ruby thread. See Ruby::utf8_encoding for the non-panicking version.

Source

pub fn usascii() -> Self

Available on crate feature old-api only.

Returns the encoding that represents US-ASCII.

§Panics

Panics if called from a non-Ruby thread. See Ruby::usascii_encoding for the non-panicking version.

Source

pub fn locale() -> Self

Available on crate feature old-api only.

Returns the encoding that represents the process’ current locale.

This is dynamic. If you change the process’ locale that should also change the return value of this function.

§Panics

Panics if called from a non-Ruby thread. See Ruby::locale_encoding for the non-panicking version.

Source

pub fn filesystem() -> Self

Available on crate feature old-api only.

Returns the filesystem encoding.

This is the encoding that Ruby expects data from the OS’ file system to be encoded as, such as directory names.

§Panics

Panics if called from a non-Ruby thread. See Ruby::filesystem_encoding for the non-panicking version.

Source

pub fn default_external() -> Self

Available on crate feature old-api only.

Returns the default external encoding.

This is the encoding used for anything out-of-process, such as reading from files or sockets.

§Panics

Panics if called from a non-Ruby thread. See Ruby::default_external_encoding for the non-panicking version.

Source

pub fn default_internal() -> Option<Self>

Available on crate feature old-api only.

Returns the default internal encoding.

If set, any out-of-process data is transcoded from the default external encoding to the default internal encoding.

§Panics

Panics if called from a non-Ruby thread. See Ruby::default_internal_encoding for the non-panicking version.

Source

pub fn find(name: &str) -> Option<Self>

Available on crate feature old-api only.

Returns the encoding with the name or alias name.

§Panics

Panics if called from a non-Ruby thread. See Ruby::find_encoding for the non-panicking version.

§Examples

use magnus::encoding::RbEncoding;

assert_eq!(RbEncoding::find("UTF-8").unwrap().name(), "UTF-8");
assert_eq!(RbEncoding::find("BINARY").unwrap().name(), "ASCII-8BIT");

Source

pub fn name(&self) -> &str

Returns the canonical name of the encoding.

§Examples

use magnus::{Error, Ruby};

fn example(ruby: &Ruby) -> Result<(), Error> {
    assert_eq!(ruby.utf8_encoding().name(), "UTF-8");
    assert_eq!(ruby.find_encoding("UTF-16").unwrap().name(), "UTF-16");

    Ok(())
}

§Panics

Panics if the name is not valid UTF-8. Encoding names are expected to be ASCII only.

Source

pub fn mbminlen(&self) -> usize

Returns the minimum number of bytes the encoding needs to represent a single character.

§Examples

use magnus::{Error, Ruby};

fn example(ruby: &Ruby) -> Result<(), Error> {
    assert_eq!(ruby.usascii_encoding().mbminlen(), 1);
    assert_eq!(ruby.utf8_encoding().mbminlen(), 1);

    Ok(())
}

Source

pub fn mbmaxlen(&self) -> usize

Returns the maximum number of bytes the encoding may need to represent a single character.

§Examples

use magnus::{Error, Ruby};

fn example(ruby: &Ruby) -> Result<(), Error> {
    assert_eq!(ruby.usascii_encoding().mbmaxlen(), 1);
    assert_eq!(ruby.utf8_encoding().mbmaxlen(), 4);

    Ok(())
}

Source

pub fn mbclen(&self, slice: &[u8]) -> usize

Returns the number of bytes of the first character in slice.

If the first byte of slice is mid way through a character this will return the number of bytes until the next character boundry.

If the slice ends before the last byte of the character this will return the number of bytes until the end of the slice.

See also fast_mbclen and precise_mbclen.

§Examples

use magnus::{
    encoding::{EncodingCapable, RbEncoding},
    Error, Ruby,
};

fn example(ruby: &Ruby) -> Result<(), Error> {
    let s = ruby.str_new("🦀 café");
    let encoding: RbEncoding = s.enc_get().into();
    let mut chars = 0;

    unsafe {
        let mut bytes = s.as_slice();
        assert_eq!(bytes.len(), 10);

        while !bytes.is_empty() {
            chars += 1;
            let len = encoding.mbclen(bytes);
            bytes = &bytes[len..];
        }
    }

    assert_eq!(chars, 6);

    Ok(())
}

Source

pub fn fast_mbclen(&self, slice: &[u8]) -> usize

Returns the number of bytes of the first character in slice.

If the first byte of slice is mid way through a character this will return the number of bytes until the next character boundary.

If the slice ends before the last byte of the character this will return the theoretical number of bytes until the end of the character, which will be past the end of the slice. If the string has been read from an IO source this may indicate more data needs to be read.

§Examples

use magnus::{
    encoding::{EncodingCapable, RbEncoding},
    Error, Ruby,
};

fn example(ruby: &Ruby) -> Result<(), Error> {
    let s = ruby.str_new("🦀 café");
    let encoding: RbEncoding = s.enc_get().into();
    let mut chars = 0;

    unsafe {
        let mut bytes = s.as_slice();
        assert_eq!(bytes.len(), 10);

        while !bytes.is_empty() {
            chars += 1;
            let len = encoding.fast_mbclen(bytes);
            bytes = &bytes[len..];
        }
    }

    assert_eq!(chars, 6);

    Ok(())
}

Source

pub fn precise_mbclen(&self, slice: &[u8]) -> MbcLen

Returns the number of bytes of the first character in slice.

§Examples

use magnus::{
    encoding::{EncodingCapable, MbcLen, RbEncoding},
    Error, Ruby,
};

fn example(ruby: &Ruby) -> Result<(), Error> {
    let s = ruby.str_new("🦀 café");
    let encoding: RbEncoding = s.enc_get().into();
    let mut chars = 0;

    unsafe {
        let mut bytes = s.as_slice();
        assert_eq!(bytes.len(), 10);

        while !bytes.is_empty() {
            chars += 1;
            match encoding.precise_mbclen(bytes) {
                MbcLen::CharFound(len) => bytes = &bytes[len..],
                MbcLen::NeedMore(len) => panic!("Met end of string expecting {} bytes", len),
                MbcLen::Invalid => panic!("corrupted string"),
            }
        }
    }

    assert_eq!(chars, 6);

    Ok(())
}

Source

pub fn ascget(&self, slice: &[u8]) -> Option<(u8, usize)>

If the first character in slice is included in ASCII return it and its encoded length in slice, otherwise returns None.

Typically the length will be 1, but some encodings such as UTF-16 will encode ASCII characters in 2 bytes.

§Examples

use magnus::{
    encoding::{EncodingCapable, RbEncoding},
    Error, Ruby,
};

fn example(ruby: &Ruby) -> Result<(), Error> {
    let s = ruby.str_new("example");
    let encoding: RbEncoding = s.enc_get().into();
    let mut chars = Vec::new();

    unsafe {
        let mut bytes = s.as_slice();

        while !bytes.is_empty() {
            match encoding.ascget(bytes) {
                Some((char, len)) => {
                    chars.push(char);
                    bytes = &bytes[len..];
                }
                None => panic!("string not ASCII"),
            }
        }
    }

    assert_eq!(chars, [101, 120, 97, 109, 112, 108, 101]);

    Ok(())
}

Source

pub fn codepoint_len(&self, slice: &[u8]) -> Result<(u32, usize), Error>

Returns the codepoint and length in bytes of the first character in slice.

§Examples

use magnus::{
    encoding::{EncodingCapable, RbEncoding},
    Error, Ruby,
};

fn example(ruby: &Ruby) -> Result<(), Error> {
    let s = ruby.str_new("🦀 café");
    let encoding: RbEncoding = s.enc_get().into();
    let mut codepoints = Vec::new();

    unsafe {
        let mut bytes = s.as_slice();

        while !bytes.is_empty() {
            let (codepoint, len) = encoding.codepoint_len(bytes)?;
            codepoints.push(codepoint);
            bytes = &bytes[len..];
        }
    }

    assert_eq!(codepoints, [129408, 32, 99, 97, 102, 233]);

    Ok(())
}

Source

pub fn codelen(&self, code: u32) -> Result<usize, Error>

Returns the number of bytes required to represent the code point code in the encoding of self.

§Examples

use magnus::{Error, Ruby};

fn example(ruby: &Ruby) -> Result<(), Error> {
    assert_eq!(ruby.utf8_encoding().codelen(97)?, 1);
    assert_eq!(ruby.utf8_encoding().codelen(129408)?, 4);

    Ok(())
}

Source

pub fn chr(&self, code: u32) -> Result<RString, Error>

Encode the codepoint code as a series of bytes in the encoding self and return the result as a Ruby string.

§Examples

use magnus::{eval, Error, Ruby};

fn example(ruby: &Ruby) -> Result<(), Error> {
    let c = ruby.usascii_encoding().chr(97)?;
    let res: bool = eval!(ruby, r#"c == "a""#, c)?;
    assert!(res);

    Ok(())
}

use magnus::{eval, Error, Ruby};

fn example(ruby: &Ruby) -> Result<(), Error> {
    let c = ruby.utf8_encoding().chr(129408)?;
    let res: bool = eval!(ruby, r#"c == "🦀""#, c)?;
    assert!(res);

    Ok(())
}

Source

pub fn is_mbc_newline(&self, slice: &[u8]) -> bool

Returns true if the first character in slice is a newline in the encoding self, false otherwise.

§Examples

use magnus::{Error, Ruby};

fn example(ruby: &Ruby) -> Result<(), Error> {
    assert!(ruby.utf8_encoding().is_mbc_newline(&[10]));
    assert!(!ruby.utf8_encoding().is_mbc_newline(&[32]));

    Ok(())
}

Source

pub fn is_code_ctype(&self, code: u32, ctype: CType) -> bool

Returns whether the given codepoint code is of the character type ctype in the encoding self.

§Examples

use magnus::{encoding::CType, Error, Ruby};

fn example(ruby: &Ruby) -> Result<(), Error> {
    assert!(ruby.utf8_encoding().is_code_ctype(9, CType::Space)); // "\t"
    assert!(ruby.utf8_encoding().is_code_ctype(32, CType::Space)); // " "
    assert!(!ruby.utf8_encoding().is_code_ctype(65, CType::Space)); // "A"
    assert!(ruby.utf8_encoding().is_code_ctype(65, CType::Alnum)); // "A"
    assert!(ruby.utf8_encoding().is_code_ctype(65, CType::Upper)); // "A"

    Ok(())
}