Thanks to visit codestin.com
Credit goes to docs.rs

arrow2/array/binary/
mutable.rs

1use std::{iter::FromIterator, sync::Arc};
2
3use crate::{
4    array::{Array, MutableArray, TryExtend, TryExtendFromSelf, TryPush},
5    bitmap::{
6        utils::{BitmapIter, ZipValidity},
7        Bitmap, MutableBitmap,
8    },
9    datatypes::DataType,
10    error::{Error, Result},
11    offset::{Offset, Offsets},
12    trusted_len::TrustedLen,
13};
14
15use super::{BinaryArray, MutableBinaryValuesArray, MutableBinaryValuesIter};
16use crate::array::physical_binary::*;
17
18/// The Arrow's equivalent to `Vec<Option<Vec<u8>>>`.
19/// Converting a [`MutableBinaryArray`] into a [`BinaryArray`] is `O(1)`.
20/// # Implementation
21/// This struct does not allocate a validity until one is required (i.e. push a null to it).
22#[derive(Debug, Clone)]
23pub struct MutableBinaryArray<O: Offset> {
24    values: MutableBinaryValuesArray<O>,
25    validity: Option<MutableBitmap>,
26}
27
28impl<O: Offset> From<MutableBinaryArray<O>> for BinaryArray<O> {
29    fn from(other: MutableBinaryArray<O>) -> Self {
30        let validity = other.validity.and_then(|x| {
31            let validity: Option<Bitmap> = x.into();
32            validity
33        });
34        let array: BinaryArray<O> = other.values.into();
35        array.with_validity(validity)
36    }
37}
38
39impl<O: Offset> Default for MutableBinaryArray<O> {
40    fn default() -> Self {
41        Self::new()
42    }
43}
44
45impl<O: Offset> MutableBinaryArray<O> {
46    /// Creates a new empty [`MutableBinaryArray`].
47    /// # Implementation
48    /// This allocates a [`Vec`] of one element
49    pub fn new() -> Self {
50        Self::with_capacity(0)
51    }
52
53    /// Returns a [`MutableBinaryArray`] created from its internal representation.
54    ///
55    /// # Errors
56    /// This function returns an error iff:
57    /// * The last offset is not equal to the values' length.
58    /// * the validity's length is not equal to `offsets.len()`.
59    /// * The `data_type`'s [`crate::datatypes::PhysicalType`] is not equal to either `Binary` or `LargeBinary`.
60    /// # Implementation
61    /// This function is `O(1)`
62    pub fn try_new(
63        data_type: DataType,
64        offsets: Offsets<O>,
65        values: Vec<u8>,
66        validity: Option<MutableBitmap>,
67    ) -> Result<Self> {
68        let values = MutableBinaryValuesArray::try_new(data_type, offsets, values)?;
69
70        if validity
71            .as_ref()
72            .map_or(false, |validity| validity.len() != values.len())
73        {
74            return Err(Error::oos(
75                "validity's length must be equal to the number of values",
76            ));
77        }
78
79        Ok(Self { values, validity })
80    }
81
82    /// Creates a new [`MutableBinaryArray`] from a slice of optional `&[u8]`.
83    // Note: this can't be `impl From` because Rust does not allow double `AsRef` on it.
84    pub fn from<T: AsRef<[u8]>, P: AsRef<[Option<T>]>>(slice: P) -> Self {
85        Self::from_trusted_len_iter(slice.as_ref().iter().map(|x| x.as_ref()))
86    }
87
88    fn default_data_type() -> DataType {
89        BinaryArray::<O>::default_data_type()
90    }
91
92    /// Initializes a new [`MutableBinaryArray`] with a pre-allocated capacity of slots.
93    pub fn with_capacity(capacity: usize) -> Self {
94        Self::with_capacities(capacity, 0)
95    }
96
97    /// Initializes a new [`MutableBinaryArray`] with a pre-allocated capacity of slots and values.
98    /// # Implementation
99    /// This does not allocate the validity.
100    pub fn with_capacities(capacity: usize, values: usize) -> Self {
101        Self {
102            values: MutableBinaryValuesArray::with_capacities(capacity, values),
103            validity: None,
104        }
105    }
106
107    /// Reserves `additional` elements and `additional_values` on the values buffer.
108    pub fn reserve(&mut self, additional: usize, additional_values: usize) {
109        self.values.reserve(additional, additional_values);
110        if let Some(x) = self.validity.as_mut() {
111            x.reserve(additional)
112        }
113    }
114
115    /// Pushes a new element to the array.
116    /// # Panic
117    /// This operation panics iff the length of all values (in bytes) exceeds `O` maximum value.
118    pub fn push<T: AsRef<[u8]>>(&mut self, value: Option<T>) {
119        self.try_push(value).unwrap()
120    }
121
122    /// Pop the last entry from [`MutableBinaryArray`].
123    /// This function returns `None` iff this array is empty
124    pub fn pop(&mut self) -> Option<Vec<u8>> {
125        let value = self.values.pop()?;
126        self.validity
127            .as_mut()
128            .map(|x| x.pop()?.then(|| ()))
129            .unwrap_or_else(|| Some(()))
130            .map(|_| value)
131    }
132
133    fn try_from_iter<P: AsRef<[u8]>, I: IntoIterator<Item = Option<P>>>(iter: I) -> Result<Self> {
134        let iterator = iter.into_iter();
135        let (lower, _) = iterator.size_hint();
136        let mut primitive = Self::with_capacity(lower);
137        for item in iterator {
138            primitive.try_push(item.as_ref())?
139        }
140        Ok(primitive)
141    }
142
143    fn init_validity(&mut self) {
144        let mut validity = MutableBitmap::with_capacity(self.values.capacity());
145        validity.extend_constant(self.len(), true);
146        validity.set(self.len() - 1, false);
147        self.validity = Some(validity);
148    }
149
150    /// Converts itself into an [`Array`].
151    pub fn into_arc(self) -> Arc<dyn Array> {
152        let a: BinaryArray<O> = self.into();
153        Arc::new(a)
154    }
155
156    /// Shrinks the capacity of the [`MutableBinaryArray`] to fit its current length.
157    pub fn shrink_to_fit(&mut self) {
158        self.values.shrink_to_fit();
159        if let Some(validity) = &mut self.validity {
160            validity.shrink_to_fit()
161        }
162    }
163
164    impl_mutable_array_mut_validity!();
165}
166
167impl<O: Offset> MutableBinaryArray<O> {
168    /// returns its values.
169    pub fn values(&self) -> &Vec<u8> {
170        self.values.values()
171    }
172
173    /// returns its offsets.
174    pub fn offsets(&self) -> &Offsets<O> {
175        self.values.offsets()
176    }
177
178    /// Returns an iterator of `Option<&[u8]>`
179    pub fn iter(&self) -> ZipValidity<&[u8], MutableBinaryValuesIter<O>, BitmapIter> {
180        ZipValidity::new(self.values_iter(), self.validity.as_ref().map(|x| x.iter()))
181    }
182
183    /// Returns an iterator over the values of this array
184    pub fn values_iter(&self) -> MutableBinaryValuesIter<O> {
185        self.values.iter()
186    }
187}
188
189impl<O: Offset> MutableArray for MutableBinaryArray<O> {
190    fn len(&self) -> usize {
191        self.values.len()
192    }
193
194    fn validity(&self) -> Option<&MutableBitmap> {
195        self.validity.as_ref()
196    }
197
198    fn as_box(&mut self) -> Box<dyn Array> {
199        let array: BinaryArray<O> = std::mem::take(self).into();
200        array.boxed()
201    }
202
203    fn as_arc(&mut self) -> Arc<dyn Array> {
204        let array: BinaryArray<O> = std::mem::take(self).into();
205        array.arced()
206    }
207
208    fn data_type(&self) -> &DataType {
209        self.values.data_type()
210    }
211
212    fn as_any(&self) -> &dyn std::any::Any {
213        self
214    }
215
216    fn as_mut_any(&mut self) -> &mut dyn std::any::Any {
217        self
218    }
219
220    #[inline]
221    fn push_null(&mut self) {
222        self.push::<&[u8]>(None)
223    }
224
225    fn reserve(&mut self, additional: usize) {
226        self.reserve(additional, 0)
227    }
228
229    fn shrink_to_fit(&mut self) {
230        self.shrink_to_fit()
231    }
232}
233
234impl<O: Offset, P: AsRef<[u8]>> FromIterator<Option<P>> for MutableBinaryArray<O> {
235    fn from_iter<I: IntoIterator<Item = Option<P>>>(iter: I) -> Self {
236        Self::try_from_iter(iter).unwrap()
237    }
238}
239
240impl<O: Offset> MutableBinaryArray<O> {
241    /// Creates a [`MutableBinaryArray`] from an iterator of trusted length.
242    /// # Safety
243    /// The iterator must be [`TrustedLen`](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html).
244    /// I.e. that `size_hint().1` correctly reports its length.
245    #[inline]
246    pub unsafe fn from_trusted_len_iter_unchecked<I, P>(iterator: I) -> Self
247    where
248        P: AsRef<[u8]>,
249        I: Iterator<Item = Option<P>>,
250    {
251        let (validity, offsets, values) = trusted_len_unzip(iterator);
252
253        Self::try_new(Self::default_data_type(), offsets, values, validity).unwrap()
254    }
255
256    /// Creates a [`MutableBinaryArray`] from an iterator of trusted length.
257    #[inline]
258    pub fn from_trusted_len_iter<I, P>(iterator: I) -> Self
259    where
260        P: AsRef<[u8]>,
261        I: TrustedLen<Item = Option<P>>,
262    {
263        // soundness: I is `TrustedLen`
264        unsafe { Self::from_trusted_len_iter_unchecked(iterator) }
265    }
266
267    /// Creates a new [`BinaryArray`] from a [`TrustedLen`] of `&[u8]`.
268    /// # Safety
269    /// The iterator must be [`TrustedLen`](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html).
270    /// I.e. that `size_hint().1` correctly reports its length.
271    #[inline]
272    pub unsafe fn from_trusted_len_values_iter_unchecked<T: AsRef<[u8]>, I: Iterator<Item = T>>(
273        iterator: I,
274    ) -> Self {
275        let (offsets, values) = trusted_len_values_iter(iterator);
276        Self::try_new(Self::default_data_type(), offsets, values, None).unwrap()
277    }
278
279    /// Creates a new [`BinaryArray`] from a [`TrustedLen`] of `&[u8]`.
280    #[inline]
281    pub fn from_trusted_len_values_iter<T: AsRef<[u8]>, I: TrustedLen<Item = T>>(
282        iterator: I,
283    ) -> Self {
284        // soundness: I is `TrustedLen`
285        unsafe { Self::from_trusted_len_values_iter_unchecked(iterator) }
286    }
287
288    /// Creates a [`MutableBinaryArray`] from an falible iterator of trusted length.
289    /// # Safety
290    /// The iterator must be [`TrustedLen`](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html).
291    /// I.e. that `size_hint().1` correctly reports its length.
292    #[inline]
293    pub unsafe fn try_from_trusted_len_iter_unchecked<E, I, P>(
294        iterator: I,
295    ) -> std::result::Result<Self, E>
296    where
297        P: AsRef<[u8]>,
298        I: IntoIterator<Item = std::result::Result<Option<P>, E>>,
299    {
300        let iterator = iterator.into_iter();
301
302        // soundness: assumed trusted len
303        let (mut validity, offsets, values) = try_trusted_len_unzip(iterator)?;
304
305        if validity.as_mut().unwrap().unset_bits() == 0 {
306            validity = None;
307        }
308
309        Ok(Self::try_new(Self::default_data_type(), offsets, values, validity).unwrap())
310    }
311
312    /// Creates a [`MutableBinaryArray`] from an falible iterator of trusted length.
313    #[inline]
314    pub fn try_from_trusted_len_iter<E, I, P>(iterator: I) -> std::result::Result<Self, E>
315    where
316        P: AsRef<[u8]>,
317        I: TrustedLen<Item = std::result::Result<Option<P>, E>>,
318    {
319        // soundness: I: TrustedLen
320        unsafe { Self::try_from_trusted_len_iter_unchecked(iterator) }
321    }
322
323    /// Extends the [`MutableBinaryArray`] from an iterator of trusted length.
324    /// This differs from `extend_trusted_len` which accepts iterator of optional values.
325    #[inline]
326    pub fn extend_trusted_len_values<I, P>(&mut self, iterator: I)
327    where
328        P: AsRef<[u8]>,
329        I: TrustedLen<Item = P>,
330    {
331        // Safety: The iterator is `TrustedLen`
332        unsafe { self.extend_trusted_len_values_unchecked(iterator) }
333    }
334
335    /// Extends the [`MutableBinaryArray`] from an iterator of values.
336    /// This differs from `extended_trusted_len` which accepts iterator of optional values.
337    #[inline]
338    pub fn extend_values<I, P>(&mut self, iterator: I)
339    where
340        P: AsRef<[u8]>,
341        I: Iterator<Item = P>,
342    {
343        let length = self.values.len();
344        self.values.extend(iterator);
345        let additional = self.values.len() - length;
346
347        if let Some(validity) = self.validity.as_mut() {
348            validity.extend_constant(additional, true);
349        }
350    }
351
352    /// Extends the [`MutableBinaryArray`] from an `iterator` of values of trusted length.
353    /// This differs from `extend_trusted_len_unchecked` which accepts iterator of optional
354    /// values.
355    /// # Safety
356    /// The `iterator` must be [`TrustedLen`]
357    #[inline]
358    pub unsafe fn extend_trusted_len_values_unchecked<I, P>(&mut self, iterator: I)
359    where
360        P: AsRef<[u8]>,
361        I: Iterator<Item = P>,
362    {
363        let length = self.values.len();
364        self.values.extend_trusted_len_unchecked(iterator);
365        let additional = self.values.len() - length;
366
367        if let Some(validity) = self.validity.as_mut() {
368            validity.extend_constant(additional, true);
369        }
370    }
371
372    /// Extends the [`MutableBinaryArray`] from an iterator of [`TrustedLen`]
373    #[inline]
374    pub fn extend_trusted_len<I, P>(&mut self, iterator: I)
375    where
376        P: AsRef<[u8]>,
377        I: TrustedLen<Item = Option<P>>,
378    {
379        // Safety: The iterator is `TrustedLen`
380        unsafe { self.extend_trusted_len_unchecked(iterator) }
381    }
382
383    /// Extends the [`MutableBinaryArray`] from an iterator of [`TrustedLen`]
384    /// # Safety
385    /// The `iterator` must be [`TrustedLen`]
386    #[inline]
387    pub unsafe fn extend_trusted_len_unchecked<I, P>(&mut self, iterator: I)
388    where
389        P: AsRef<[u8]>,
390        I: Iterator<Item = Option<P>>,
391    {
392        if self.validity.is_none() {
393            let mut validity = MutableBitmap::new();
394            validity.extend_constant(self.len(), true);
395            self.validity = Some(validity);
396        }
397
398        self.values
399            .extend_from_trusted_len_iter(self.validity.as_mut().unwrap(), iterator);
400    }
401
402    /// Creates a new [`MutableBinaryArray`] from a [`Iterator`] of `&[u8]`.
403    pub fn from_iter_values<T: AsRef<[u8]>, I: Iterator<Item = T>>(iterator: I) -> Self {
404        let (offsets, values) = values_iter(iterator);
405        Self::try_new(Self::default_data_type(), offsets, values, None).unwrap()
406    }
407
408    /// Extend with a fallible iterator
409    pub fn extend_fallible<T, I, E>(&mut self, iter: I) -> std::result::Result<(), E>
410    where
411        E: std::error::Error,
412        I: IntoIterator<Item = std::result::Result<Option<T>, E>>,
413        T: AsRef<[u8]>,
414    {
415        let mut iter = iter.into_iter();
416        self.reserve(iter.size_hint().0, 0);
417        iter.try_for_each(|x| {
418            self.push(x?);
419            Ok(())
420        })
421    }
422}
423
424impl<O: Offset, T: AsRef<[u8]>> Extend<Option<T>> for MutableBinaryArray<O> {
425    fn extend<I: IntoIterator<Item = Option<T>>>(&mut self, iter: I) {
426        self.try_extend(iter).unwrap();
427    }
428}
429
430impl<O: Offset, T: AsRef<[u8]>> TryExtend<Option<T>> for MutableBinaryArray<O> {
431    fn try_extend<I: IntoIterator<Item = Option<T>>>(&mut self, iter: I) -> Result<()> {
432        let mut iter = iter.into_iter();
433        self.reserve(iter.size_hint().0, 0);
434        iter.try_for_each(|x| self.try_push(x))
435    }
436}
437
438impl<O: Offset, T: AsRef<[u8]>> TryPush<Option<T>> for MutableBinaryArray<O> {
439    fn try_push(&mut self, value: Option<T>) -> Result<()> {
440        match value {
441            Some(value) => {
442                self.values.try_push(value.as_ref())?;
443
444                match &mut self.validity {
445                    Some(validity) => validity.push(true),
446                    None => {}
447                }
448            }
449            None => {
450                self.values.push("");
451                match &mut self.validity {
452                    Some(validity) => validity.push(false),
453                    None => self.init_validity(),
454                }
455            }
456        }
457        Ok(())
458    }
459}
460
461impl<O: Offset> PartialEq for MutableBinaryArray<O> {
462    fn eq(&self, other: &Self) -> bool {
463        self.iter().eq(other.iter())
464    }
465}
466
467impl<O: Offset> TryExtendFromSelf for MutableBinaryArray<O> {
468    fn try_extend_from_self(&mut self, other: &Self) -> Result<()> {
469        extend_validity(self.len(), &mut self.validity, &other.validity);
470
471        self.values.try_extend_from_self(&other.values)
472    }
473}