pub struct RbEncoding(/* private fields */);
Expand description
Ruby’s internal encoding type.
This type contains the data for an encoding, and is used with operations such as converting a string from one encoding to another, or reading a string character by character.
See Ruby
for methods to get an RbEncoding
.
Implementations§
Source§impl RbEncoding
impl RbEncoding
Sourcepub fn ascii8bit() -> Self
Available on crate feature old-api
only.
pub fn ascii8bit() -> Self
old-api
only.Returns the encoding that represents ASCII-8BIT a.k.a. binary.
§Panics
Panics if called from a non-Ruby thread. See
Ruby::ascii8bit_encoding
for the non-panicking version.
Sourcepub fn utf8() -> Self
Available on crate feature old-api
only.
pub fn utf8() -> Self
old-api
only.Returns the encoding that represents UTF-8.
§Panics
Panics if called from a non-Ruby thread. See Ruby::utf8_encoding
for the non-panicking version.
Sourcepub fn usascii() -> Self
Available on crate feature old-api
only.
pub fn usascii() -> Self
old-api
only.Returns the encoding that represents US-ASCII.
§Panics
Panics if called from a non-Ruby thread. See Ruby::usascii_encoding
for the non-panicking version.
Sourcepub fn locale() -> Self
Available on crate feature old-api
only.
pub fn locale() -> Self
old-api
only.Returns the encoding that represents the process’ current locale.
This is dynamic. If you change the process’ locale that should also change the return value of this function.
§Panics
Panics if called from a non-Ruby thread. See Ruby::locale_encoding
for the non-panicking version.
Sourcepub fn filesystem() -> Self
Available on crate feature old-api
only.
pub fn filesystem() -> Self
old-api
only.Returns the filesystem encoding.
This is the encoding that Ruby expects data from the OS’ file system to be encoded as, such as directory names.
§Panics
Panics if called from a non-Ruby thread. See
Ruby::filesystem_encoding
for the non-panicking version.
Sourcepub fn default_external() -> Self
Available on crate feature old-api
only.
pub fn default_external() -> Self
old-api
only.Returns the default external encoding.
This is the encoding used for anything out-of-process, such as reading from files or sockets.
§Panics
Panics if called from a non-Ruby thread. See
Ruby::default_external_encoding
for the non-panicking version.
Sourcepub fn default_internal() -> Option<Self>
Available on crate feature old-api
only.
pub fn default_internal() -> Option<Self>
old-api
only.Returns the default internal encoding.
If set, any out-of-process data is transcoded from the default external encoding to the default internal encoding.
§Panics
Panics if called from a non-Ruby thread. See
Ruby::default_internal_encoding
for the non-panicking version.
Sourcepub fn find(name: &str) -> Option<Self>
Available on crate feature old-api
only.
pub fn find(name: &str) -> Option<Self>
old-api
only.Returns the encoding with the name or alias name
.
§Panics
Panics if called from a non-Ruby thread. See Ruby::find_encoding
for the non-panicking version.
§Examples
use magnus::encoding::RbEncoding;
assert_eq!(RbEncoding::find("UTF-8").unwrap().name(), "UTF-8");
assert_eq!(RbEncoding::find("BINARY").unwrap().name(), "ASCII-8BIT");
Sourcepub fn name(&self) -> &str
pub fn name(&self) -> &str
Returns the canonical name of the encoding.
§Examples
use magnus::{Error, Ruby};
fn example(ruby: &Ruby) -> Result<(), Error> {
assert_eq!(ruby.utf8_encoding().name(), "UTF-8");
assert_eq!(ruby.find_encoding("UTF-16").unwrap().name(), "UTF-16");
Ok(())
}
§Panics
Panics if the name is not valid UTF-8. Encoding names are expected to be ASCII only.
Sourcepub fn mbminlen(&self) -> usize
pub fn mbminlen(&self) -> usize
Returns the minimum number of bytes the encoding needs to represent a single character.
§Examples
use magnus::{Error, Ruby};
fn example(ruby: &Ruby) -> Result<(), Error> {
assert_eq!(ruby.usascii_encoding().mbminlen(), 1);
assert_eq!(ruby.utf8_encoding().mbminlen(), 1);
Ok(())
}
Sourcepub fn mbmaxlen(&self) -> usize
pub fn mbmaxlen(&self) -> usize
Returns the maximum number of bytes the encoding may need to represent a single character.
§Examples
use magnus::{Error, Ruby};
fn example(ruby: &Ruby) -> Result<(), Error> {
assert_eq!(ruby.usascii_encoding().mbmaxlen(), 1);
assert_eq!(ruby.utf8_encoding().mbmaxlen(), 4);
Ok(())
}
Sourcepub fn mbclen(&self, slice: &[u8]) -> usize
pub fn mbclen(&self, slice: &[u8]) -> usize
Returns the number of bytes of the first character in slice
.
If the first byte of slice
is mid way through a character this will
return the number of bytes until the next character boundry.
If the slice ends before the last byte of the character this will return the number of bytes until the end of the slice.
See also fast_mbclen
and
precise_mbclen
.
§Examples
use magnus::{
encoding::{EncodingCapable, RbEncoding},
Error, Ruby,
};
fn example(ruby: &Ruby) -> Result<(), Error> {
let s = ruby.str_new("🦀 café");
let encoding: RbEncoding = s.enc_get().into();
let mut chars = 0;
unsafe {
let mut bytes = s.as_slice();
assert_eq!(bytes.len(), 10);
while !bytes.is_empty() {
chars += 1;
let len = encoding.mbclen(bytes);
bytes = &bytes[len..];
}
}
assert_eq!(chars, 6);
Ok(())
}
Sourcepub fn fast_mbclen(&self, slice: &[u8]) -> usize
pub fn fast_mbclen(&self, slice: &[u8]) -> usize
Returns the number of bytes of the first character in slice
.
If the first byte of slice
is mid way through a character this will
return the number of bytes until the next character boundary.
If the slice ends before the last byte of the character this will return the theoretical number of bytes until the end of the character, which will be past the end of the slice. If the string has been read from an IO source this may indicate more data needs to be read.
See also mbclen
and
precise_mbclen
.
§Examples
use magnus::{
encoding::{EncodingCapable, RbEncoding},
Error, Ruby,
};
fn example(ruby: &Ruby) -> Result<(), Error> {
let s = ruby.str_new("🦀 café");
let encoding: RbEncoding = s.enc_get().into();
let mut chars = 0;
unsafe {
let mut bytes = s.as_slice();
assert_eq!(bytes.len(), 10);
while !bytes.is_empty() {
chars += 1;
let len = encoding.fast_mbclen(bytes);
bytes = &bytes[len..];
}
}
assert_eq!(chars, 6);
Ok(())
}
Sourcepub fn precise_mbclen(&self, slice: &[u8]) -> MbcLen
pub fn precise_mbclen(&self, slice: &[u8]) -> MbcLen
Returns the number of bytes of the first character in slice
.
See also mbclen
and
fast_mbclen
.
§Examples
use magnus::{
encoding::{EncodingCapable, MbcLen, RbEncoding},
Error, Ruby,
};
fn example(ruby: &Ruby) -> Result<(), Error> {
let s = ruby.str_new("🦀 café");
let encoding: RbEncoding = s.enc_get().into();
let mut chars = 0;
unsafe {
let mut bytes = s.as_slice();
assert_eq!(bytes.len(), 10);
while !bytes.is_empty() {
chars += 1;
match encoding.precise_mbclen(bytes) {
MbcLen::CharFound(len) => bytes = &bytes[len..],
MbcLen::NeedMore(len) => panic!("Met end of string expecting {} bytes", len),
MbcLen::Invalid => panic!("corrupted string"),
}
}
}
assert_eq!(chars, 6);
Ok(())
}
Sourcepub fn ascget(&self, slice: &[u8]) -> Option<(u8, usize)>
pub fn ascget(&self, slice: &[u8]) -> Option<(u8, usize)>
If the first character in slice
is included in ASCII return it and
its encoded length in slice
, otherwise returns None.
Typically the length will be 1, but some encodings such as UTF-16 will encode ASCII characters in 2 bytes.
§Examples
use magnus::{
encoding::{EncodingCapable, RbEncoding},
Error, Ruby,
};
fn example(ruby: &Ruby) -> Result<(), Error> {
let s = ruby.str_new("example");
let encoding: RbEncoding = s.enc_get().into();
let mut chars = Vec::new();
unsafe {
let mut bytes = s.as_slice();
while !bytes.is_empty() {
match encoding.ascget(bytes) {
Some((char, len)) => {
chars.push(char);
bytes = &bytes[len..];
}
None => panic!("string not ASCII"),
}
}
}
assert_eq!(chars, [101, 120, 97, 109, 112, 108, 101]);
Ok(())
}
Sourcepub fn codepoint_len(&self, slice: &[u8]) -> Result<(u32, usize), Error>
pub fn codepoint_len(&self, slice: &[u8]) -> Result<(u32, usize), Error>
Returns the codepoint and length in bytes of the first character in
slice
.
§Examples
use magnus::{
encoding::{EncodingCapable, RbEncoding},
Error, Ruby,
};
fn example(ruby: &Ruby) -> Result<(), Error> {
let s = ruby.str_new("🦀 café");
let encoding: RbEncoding = s.enc_get().into();
let mut codepoints = Vec::new();
unsafe {
let mut bytes = s.as_slice();
while !bytes.is_empty() {
let (codepoint, len) = encoding.codepoint_len(bytes)?;
codepoints.push(codepoint);
bytes = &bytes[len..];
}
}
assert_eq!(codepoints, [129408, 32, 99, 97, 102, 233]);
Ok(())
}
Sourcepub fn codelen(&self, code: u32) -> Result<usize, Error>
pub fn codelen(&self, code: u32) -> Result<usize, Error>
Returns the number of bytes required to represent the code point code
in the encoding of self
.
§Examples
use magnus::{Error, Ruby};
fn example(ruby: &Ruby) -> Result<(), Error> {
assert_eq!(ruby.utf8_encoding().codelen(97)?, 1);
assert_eq!(ruby.utf8_encoding().codelen(129408)?, 4);
Ok(())
}
Sourcepub fn chr(&self, code: u32) -> Result<RString, Error>
pub fn chr(&self, code: u32) -> Result<RString, Error>
Encode the codepoint code
as a series of bytes in the encoding self
and return the result as a Ruby string.
§Examples
use magnus::{eval, Error, Ruby};
fn example(ruby: &Ruby) -> Result<(), Error> {
let c = ruby.usascii_encoding().chr(97)?;
let res: bool = eval!(ruby, r#"c == "a""#, c)?;
assert!(res);
Ok(())
}
use magnus::{eval, Error, Ruby};
fn example(ruby: &Ruby) -> Result<(), Error> {
let c = ruby.utf8_encoding().chr(129408)?;
let res: bool = eval!(ruby, r#"c == "🦀""#, c)?;
assert!(res);
Ok(())
}
Sourcepub fn is_mbc_newline(&self, slice: &[u8]) -> bool
pub fn is_mbc_newline(&self, slice: &[u8]) -> bool
Returns true
if the first character in slice
is a newline in the
encoding self
, false
otherwise.
§Examples
use magnus::{Error, Ruby};
fn example(ruby: &Ruby) -> Result<(), Error> {
assert!(ruby.utf8_encoding().is_mbc_newline(&[10]));
assert!(!ruby.utf8_encoding().is_mbc_newline(&[32]));
Ok(())
}
Sourcepub fn is_code_ctype(&self, code: u32, ctype: CType) -> bool
pub fn is_code_ctype(&self, code: u32, ctype: CType) -> bool
Returns whether the given codepoint code
is of the character type
ctype
in the encoding self
.
§Examples
use magnus::{encoding::CType, Error, Ruby};
fn example(ruby: &Ruby) -> Result<(), Error> {
assert!(ruby.utf8_encoding().is_code_ctype(9, CType::Space)); // "\t"
assert!(ruby.utf8_encoding().is_code_ctype(32, CType::Space)); // " "
assert!(!ruby.utf8_encoding().is_code_ctype(65, CType::Space)); // "A"
assert!(ruby.utf8_encoding().is_code_ctype(65, CType::Alnum)); // "A"
assert!(ruby.utf8_encoding().is_code_ctype(65, CType::Upper)); // "A"
Ok(())
}