Thanks to visit codestin.com
Credit goes to docs.rs

arrow2/array/boolean/
mod.rs

1use crate::{
2    bitmap::{
3        utils::{BitmapIter, ZipValidity},
4        Bitmap, MutableBitmap,
5    },
6    datatypes::{DataType, PhysicalType},
7    error::Error,
8    trusted_len::TrustedLen,
9};
10use either::Either;
11
12use super::Array;
13
14#[cfg(feature = "arrow")]
15mod data;
16mod ffi;
17pub(super) mod fmt;
18mod from;
19mod iterator;
20mod mutable;
21
22pub use iterator::*;
23pub use mutable::*;
24
25/// A [`BooleanArray`] is Arrow's semantically equivalent of an immutable `Vec<Option<bool>>`.
26/// It implements [`Array`].
27///
28/// One way to think about a [`BooleanArray`] is `(DataType, Arc<Vec<u8>>, Option<Arc<Vec<u8>>>)`
29/// where:
30/// * the first item is the array's logical type
31/// * the second is the immutable values
32/// * the third is the immutable validity (whether a value is null or not as a bitmap).
33///
34/// The size of this struct is `O(1)`, as all data is stored behind an [`std::sync::Arc`].
35/// # Example
36/// ```
37/// use arrow2::array::BooleanArray;
38/// use arrow2::bitmap::Bitmap;
39/// use arrow2::buffer::Buffer;
40///
41/// let array = BooleanArray::from([Some(true), None, Some(false)]);
42/// assert_eq!(array.value(0), true);
43/// assert_eq!(array.iter().collect::<Vec<_>>(), vec![Some(true), None, Some(false)]);
44/// assert_eq!(array.values_iter().collect::<Vec<_>>(), vec![true, false, false]);
45/// // the underlying representation
46/// assert_eq!(array.values(), &Bitmap::from([true, false, false]));
47/// assert_eq!(array.validity(), Some(&Bitmap::from([true, false, true])));
48///
49/// ```
50#[derive(Clone)]
51pub struct BooleanArray {
52    data_type: DataType,
53    values: Bitmap,
54    validity: Option<Bitmap>,
55}
56
57impl BooleanArray {
58    /// The canonical method to create a [`BooleanArray`] out of low-end APIs.
59    /// # Errors
60    /// This function errors iff:
61    /// * The validity is not `None` and its length is different from `values`'s length
62    /// * The `data_type`'s [`PhysicalType`] is not equal to [`PhysicalType::Boolean`].
63    pub fn try_new(
64        data_type: DataType,
65        values: Bitmap,
66        validity: Option<Bitmap>,
67    ) -> Result<Self, Error> {
68        if validity
69            .as_ref()
70            .map_or(false, |validity| validity.len() != values.len())
71        {
72            return Err(Error::oos(
73                "validity mask length must match the number of values",
74            ));
75        }
76
77        if data_type.to_physical_type() != PhysicalType::Boolean {
78            return Err(Error::oos(
79                "BooleanArray can only be initialized with a DataType whose physical type is Boolean",
80            ));
81        }
82
83        Ok(Self {
84            data_type,
85            values,
86            validity,
87        })
88    }
89
90    /// Alias to `Self::try_new().unwrap()`
91    pub fn new(data_type: DataType, values: Bitmap, validity: Option<Bitmap>) -> Self {
92        Self::try_new(data_type, values, validity).unwrap()
93    }
94
95    /// Returns an iterator over the optional values of this [`BooleanArray`].
96    #[inline]
97    pub fn iter(&self) -> ZipValidity<bool, BitmapIter, BitmapIter> {
98        ZipValidity::new_with_validity(self.values().iter(), self.validity())
99    }
100
101    /// Returns an iterator over the values of this [`BooleanArray`].
102    #[inline]
103    pub fn values_iter(&self) -> BitmapIter {
104        self.values().iter()
105    }
106
107    /// Returns the length of this array
108    #[inline]
109    pub fn len(&self) -> usize {
110        self.values.len()
111    }
112
113    /// The values [`Bitmap`].
114    /// Values on null slots are undetermined (they can be anything).
115    #[inline]
116    pub fn values(&self) -> &Bitmap {
117        &self.values
118    }
119
120    /// Returns the optional validity.
121    #[inline]
122    pub fn validity(&self) -> Option<&Bitmap> {
123        self.validity.as_ref()
124    }
125
126    /// Returns the arrays' [`DataType`].
127    #[inline]
128    pub fn data_type(&self) -> &DataType {
129        &self.data_type
130    }
131
132    /// Returns the value at index `i`
133    /// # Panic
134    /// This function panics iff `i >= self.len()`.
135    #[inline]
136    pub fn value(&self, i: usize) -> bool {
137        self.values.get_bit(i)
138    }
139
140    /// Returns the element at index `i` as bool
141    /// # Safety
142    /// Caller must be sure that `i < self.len()`
143    #[inline]
144    pub unsafe fn value_unchecked(&self, i: usize) -> bool {
145        self.values.get_bit_unchecked(i)
146    }
147
148    /// Returns the element at index `i` or `None` if it is null
149    /// # Panics
150    /// iff `i >= self.len()`
151    #[inline]
152    pub fn get(&self, i: usize) -> Option<bool> {
153        if !self.is_null(i) {
154            // soundness: Array::is_null panics if i >= self.len
155            unsafe { Some(self.value_unchecked(i)) }
156        } else {
157            None
158        }
159    }
160
161    /// Slices this [`BooleanArray`].
162    /// # Implementation
163    /// This operation is `O(1)` as it amounts to increase up to two ref counts.
164    /// # Panic
165    /// This function panics iff `offset + length > self.len()`.
166    #[inline]
167    pub fn slice(&mut self, offset: usize, length: usize) {
168        assert!(
169            offset + length <= self.len(),
170            "the offset of the new Buffer cannot exceed the existing length"
171        );
172        unsafe { self.slice_unchecked(offset, length) }
173    }
174
175    /// Slices this [`BooleanArray`].
176    /// # Implementation
177    /// This operation is `O(1)` as it amounts to increase two ref counts.
178    /// # Safety
179    /// The caller must ensure that `offset + length <= self.len()`.
180    #[inline]
181    pub unsafe fn slice_unchecked(&mut self, offset: usize, length: usize) {
182        self.validity.as_mut().and_then(|bitmap| {
183            bitmap.slice_unchecked(offset, length);
184            (bitmap.unset_bits() > 0).then(|| bitmap)
185        });
186        self.values.slice_unchecked(offset, length);
187    }
188
189    impl_sliced!();
190    impl_mut_validity!();
191    impl_into_array!();
192
193    /// Returns a clone of this [`BooleanArray`] with new values.
194    /// # Panics
195    /// This function panics iff `values.len() != self.len()`.
196    #[must_use]
197    pub fn with_values(&self, values: Bitmap) -> Self {
198        let mut out = self.clone();
199        out.set_values(values);
200        out
201    }
202
203    /// Sets the values of this [`BooleanArray`].
204    /// # Panics
205    /// This function panics iff `values.len() != self.len()`.
206    pub fn set_values(&mut self, values: Bitmap) {
207        assert_eq!(
208            values.len(),
209            self.len(),
210            "values length must be equal to this arrays length"
211        );
212        self.values = values;
213    }
214
215    /// Applies a function `f` to the values of this array, cloning the values
216    /// iff they are being shared with others
217    ///
218    /// This is an API to use clone-on-write
219    /// # Implementation
220    /// This function is `O(f)` if the data is not being shared, and `O(N) + O(f)`
221    /// if it is being shared (since it results in a `O(N)` memcopy).
222    /// # Panics
223    /// This function panics if the function modifies the length of the [`MutableBitmap`].
224    pub fn apply_values_mut<F: Fn(&mut MutableBitmap)>(&mut self, f: F) {
225        let values = std::mem::take(&mut self.values);
226        let mut values = values.make_mut();
227        f(&mut values);
228        if let Some(validity) = &self.validity {
229            assert_eq!(validity.len(), values.len());
230        }
231        self.values = values.into();
232    }
233
234    /// Try to convert this [`BooleanArray`] to a [`MutableBooleanArray`]
235    pub fn into_mut(self) -> Either<Self, MutableBooleanArray> {
236        use Either::*;
237
238        if let Some(bitmap) = self.validity {
239            match bitmap.into_mut() {
240                Left(bitmap) => Left(BooleanArray::new(self.data_type, self.values, Some(bitmap))),
241                Right(mutable_bitmap) => match self.values.into_mut() {
242                    Left(immutable) => Left(BooleanArray::new(
243                        self.data_type,
244                        immutable,
245                        Some(mutable_bitmap.into()),
246                    )),
247                    Right(mutable) => Right(
248                        MutableBooleanArray::try_new(self.data_type, mutable, Some(mutable_bitmap))
249                            .unwrap(),
250                    ),
251                },
252            }
253        } else {
254            match self.values.into_mut() {
255                Left(immutable) => Left(BooleanArray::new(self.data_type, immutable, None)),
256                Right(mutable) => {
257                    Right(MutableBooleanArray::try_new(self.data_type, mutable, None).unwrap())
258                }
259            }
260        }
261    }
262
263    /// Returns a new empty [`BooleanArray`].
264    pub fn new_empty(data_type: DataType) -> Self {
265        Self::new(data_type, Bitmap::new(), None)
266    }
267
268    /// Returns a new [`BooleanArray`] whose all slots are null / `None`.
269    pub fn new_null(data_type: DataType, length: usize) -> Self {
270        let bitmap = Bitmap::new_zeroed(length);
271        Self::new(data_type, bitmap.clone(), Some(bitmap))
272    }
273
274    /// Creates a new [`BooleanArray`] from an [`TrustedLen`] of `bool`.
275    #[inline]
276    pub fn from_trusted_len_values_iter<I: TrustedLen<Item = bool>>(iterator: I) -> Self {
277        MutableBooleanArray::from_trusted_len_values_iter(iterator).into()
278    }
279
280    /// Creates a new [`BooleanArray`] from an [`TrustedLen`] of `bool`.
281    /// Use this over [`BooleanArray::from_trusted_len_iter`] when the iterator is trusted len
282    /// but this crate does not mark it as such.
283    /// # Safety
284    /// The iterator must be [`TrustedLen`](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html).
285    /// I.e. that `size_hint().1` correctly reports its length.
286    #[inline]
287    pub unsafe fn from_trusted_len_values_iter_unchecked<I: Iterator<Item = bool>>(
288        iterator: I,
289    ) -> Self {
290        MutableBooleanArray::from_trusted_len_values_iter_unchecked(iterator).into()
291    }
292
293    /// Creates a new [`BooleanArray`] from a slice of `bool`.
294    #[inline]
295    pub fn from_slice<P: AsRef<[bool]>>(slice: P) -> Self {
296        MutableBooleanArray::from_slice(slice).into()
297    }
298
299    /// Creates a [`BooleanArray`] from an iterator of trusted length.
300    /// Use this over [`BooleanArray::from_trusted_len_iter`] when the iterator is trusted len
301    /// but this crate does not mark it as such.
302    /// # Safety
303    /// The iterator must be [`TrustedLen`](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html).
304    /// I.e. that `size_hint().1` correctly reports its length.
305    #[inline]
306    pub unsafe fn from_trusted_len_iter_unchecked<I, P>(iterator: I) -> Self
307    where
308        P: std::borrow::Borrow<bool>,
309        I: Iterator<Item = Option<P>>,
310    {
311        MutableBooleanArray::from_trusted_len_iter_unchecked(iterator).into()
312    }
313
314    /// Creates a [`BooleanArray`] from a [`TrustedLen`].
315    #[inline]
316    pub fn from_trusted_len_iter<I, P>(iterator: I) -> Self
317    where
318        P: std::borrow::Borrow<bool>,
319        I: TrustedLen<Item = Option<P>>,
320    {
321        MutableBooleanArray::from_trusted_len_iter(iterator).into()
322    }
323
324    /// Creates a [`BooleanArray`] from an falible iterator of trusted length.
325    /// # Safety
326    /// The iterator must be [`TrustedLen`](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html).
327    /// I.e. that `size_hint().1` correctly reports its length.
328    #[inline]
329    pub unsafe fn try_from_trusted_len_iter_unchecked<E, I, P>(iterator: I) -> Result<Self, E>
330    where
331        P: std::borrow::Borrow<bool>,
332        I: Iterator<Item = Result<Option<P>, E>>,
333    {
334        Ok(MutableBooleanArray::try_from_trusted_len_iter_unchecked(iterator)?.into())
335    }
336
337    /// Creates a [`BooleanArray`] from a [`TrustedLen`].
338    #[inline]
339    pub fn try_from_trusted_len_iter<E, I, P>(iterator: I) -> Result<Self, E>
340    where
341        P: std::borrow::Borrow<bool>,
342        I: TrustedLen<Item = Result<Option<P>, E>>,
343    {
344        Ok(MutableBooleanArray::try_from_trusted_len_iter(iterator)?.into())
345    }
346
347    /// Returns its internal representation
348    #[must_use]
349    pub fn into_inner(self) -> (DataType, Bitmap, Option<Bitmap>) {
350        let Self {
351            data_type,
352            values,
353            validity,
354        } = self;
355        (data_type, values, validity)
356    }
357
358    /// Creates a `[BooleanArray]` from its internal representation.
359    /// This is the inverted from `[BooleanArray::into_inner]`
360    ///
361    /// # Safety
362    /// Callers must ensure all invariants of this struct are upheld.
363    pub unsafe fn from_inner_unchecked(
364        data_type: DataType,
365        values: Bitmap,
366        validity: Option<Bitmap>,
367    ) -> Self {
368        Self {
369            data_type,
370            values,
371            validity,
372        }
373    }
374}
375
376impl Array for BooleanArray {
377    impl_common_array!();
378
379    fn validity(&self) -> Option<&Bitmap> {
380        self.validity.as_ref()
381    }
382
383    #[inline]
384    fn with_validity(&self, validity: Option<Bitmap>) -> Box<dyn Array> {
385        Box::new(self.clone().with_validity(validity))
386    }
387}