Thanks to visit codestin.com
Credit goes to docs.rs

arrow2/array/primitive/
mutable.rs

1use std::{iter::FromIterator, sync::Arc};
2
3use crate::array::physical_binary::extend_validity;
4use crate::array::TryExtendFromSelf;
5use crate::bitmap::Bitmap;
6use crate::{
7    array::{Array, MutableArray, TryExtend, TryPush},
8    bitmap::MutableBitmap,
9    datatypes::DataType,
10    error::Error,
11    trusted_len::TrustedLen,
12    types::NativeType,
13};
14
15use super::{check, PrimitiveArray};
16
17/// The Arrow's equivalent to `Vec<Option<T>>` where `T` is byte-size (e.g. `i32`).
18/// Converting a [`MutablePrimitiveArray`] into a [`PrimitiveArray`] is `O(1)`.
19#[derive(Debug, Clone)]
20pub struct MutablePrimitiveArray<T: NativeType> {
21    data_type: DataType,
22    values: Vec<T>,
23    validity: Option<MutableBitmap>,
24}
25
26impl<T: NativeType> From<MutablePrimitiveArray<T>> for PrimitiveArray<T> {
27    fn from(other: MutablePrimitiveArray<T>) -> Self {
28        let validity = other.validity.and_then(|x| {
29            let bitmap: Bitmap = x.into();
30            if bitmap.unset_bits() == 0 {
31                None
32            } else {
33                Some(bitmap)
34            }
35        });
36
37        PrimitiveArray::<T>::new(other.data_type, other.values.into(), validity)
38    }
39}
40
41impl<T: NativeType, P: AsRef<[Option<T>]>> From<P> for MutablePrimitiveArray<T> {
42    fn from(slice: P) -> Self {
43        Self::from_trusted_len_iter(slice.as_ref().iter().map(|x| x.as_ref()))
44    }
45}
46
47impl<T: NativeType> MutablePrimitiveArray<T> {
48    /// Creates a new empty [`MutablePrimitiveArray`].
49    pub fn new() -> Self {
50        Self::with_capacity(0)
51    }
52
53    /// Creates a new [`MutablePrimitiveArray`] with a capacity.
54    pub fn with_capacity(capacity: usize) -> Self {
55        Self::with_capacity_from(capacity, T::PRIMITIVE.into())
56    }
57
58    /// The canonical method to create a [`MutablePrimitiveArray`] out of its internal components.
59    /// # Implementation
60    /// This function is `O(1)`.
61    ///
62    /// # Errors
63    /// This function errors iff:
64    /// * The validity is not `None` and its length is different from `values`'s length
65    /// * The `data_type`'s [`crate::datatypes::PhysicalType`] is not equal to [`crate::datatypes::PhysicalType::Primitive(T::PRIMITIVE)`]
66    pub fn try_new(
67        data_type: DataType,
68        values: Vec<T>,
69        validity: Option<MutableBitmap>,
70    ) -> Result<Self, Error> {
71        check(&data_type, &values, validity.as_ref().map(|x| x.len()))?;
72        Ok(Self {
73            data_type,
74            values,
75            validity,
76        })
77    }
78
79    /// Extract the low-end APIs from the [`MutablePrimitiveArray`].
80    pub fn into_inner(self) -> (DataType, Vec<T>, Option<MutableBitmap>) {
81        (self.data_type, self.values, self.validity)
82    }
83
84    /// Applies a function `f` to the values of this array, cloning the values
85    /// iff they are being shared with others
86    ///
87    /// This is an API to use clone-on-write
88    /// # Implementation
89    /// This function is `O(f)` if the data is not being shared, and `O(N) + O(f)`
90    /// if it is being shared (since it results in a `O(N)` memcopy).
91    /// # Panics
92    /// This function panics iff `f` panics
93    pub fn apply_values<F: Fn(&mut [T])>(&mut self, f: F) {
94        f(&mut self.values);
95    }
96}
97
98impl<T: NativeType> Default for MutablePrimitiveArray<T> {
99    fn default() -> Self {
100        Self::new()
101    }
102}
103
104impl<T: NativeType> From<DataType> for MutablePrimitiveArray<T> {
105    fn from(data_type: DataType) -> Self {
106        assert!(data_type.to_physical_type().eq_primitive(T::PRIMITIVE));
107        Self {
108            data_type,
109            values: Vec::<T>::new(),
110            validity: None,
111        }
112    }
113}
114
115impl<T: NativeType> MutablePrimitiveArray<T> {
116    /// Creates a new [`MutablePrimitiveArray`] from a capacity and [`DataType`].
117    pub fn with_capacity_from(capacity: usize, data_type: DataType) -> Self {
118        assert!(data_type.to_physical_type().eq_primitive(T::PRIMITIVE));
119        Self {
120            data_type,
121            values: Vec::<T>::with_capacity(capacity),
122            validity: None,
123        }
124    }
125
126    /// Reserves `additional` entries.
127    pub fn reserve(&mut self, additional: usize) {
128        self.values.reserve(additional);
129        if let Some(x) = self.validity.as_mut() {
130            x.reserve(additional)
131        }
132    }
133
134    /// Adds a new value to the array.
135    #[inline]
136    pub fn push(&mut self, value: Option<T>) {
137        match value {
138            Some(value) => {
139                self.values.push(value);
140                match &mut self.validity {
141                    Some(validity) => validity.push(true),
142                    None => {}
143                }
144            }
145            None => {
146                self.values.push(T::default());
147                match &mut self.validity {
148                    Some(validity) => validity.push(false),
149                    None => {
150                        self.init_validity();
151                    }
152                }
153            }
154        }
155    }
156
157    /// Pop a value from the array.
158    /// Note if the values is empty, this method will return None.
159    pub fn pop(&mut self) -> Option<T> {
160        let value = self.values.pop()?;
161        self.validity
162            .as_mut()
163            .map(|x| x.pop()?.then(|| value))
164            .unwrap_or_else(|| Some(value))
165    }
166
167    /// Extends the [`MutablePrimitiveArray`] with a constant
168    #[inline]
169    pub fn extend_constant(&mut self, additional: usize, value: Option<T>) {
170        if let Some(value) = value {
171            self.values.resize(self.values.len() + additional, value);
172            if let Some(validity) = &mut self.validity {
173                validity.extend_constant(additional, true)
174            }
175        } else {
176            if let Some(validity) = &mut self.validity {
177                validity.extend_constant(additional, false)
178            } else {
179                let mut validity = MutableBitmap::with_capacity(self.values.capacity());
180                validity.extend_constant(self.len(), true);
181                validity.extend_constant(additional, false);
182                self.validity = Some(validity)
183            }
184            self.values
185                .resize(self.values.len() + additional, T::default());
186        }
187    }
188
189    /// Extends the [`MutablePrimitiveArray`] from an iterator of trusted len.
190    #[inline]
191    pub fn extend_trusted_len<P, I>(&mut self, iterator: I)
192    where
193        P: std::borrow::Borrow<T>,
194        I: TrustedLen<Item = Option<P>>,
195    {
196        unsafe { self.extend_trusted_len_unchecked(iterator) }
197    }
198
199    /// Extends the [`MutablePrimitiveArray`] from an iterator of trusted len.
200    /// # Safety
201    /// The iterator must be trusted len.
202    #[inline]
203    pub unsafe fn extend_trusted_len_unchecked<P, I>(&mut self, iterator: I)
204    where
205        P: std::borrow::Borrow<T>,
206        I: Iterator<Item = Option<P>>,
207    {
208        if let Some(validity) = self.validity.as_mut() {
209            extend_trusted_len_unzip(iterator, validity, &mut self.values)
210        } else {
211            let mut validity = MutableBitmap::new();
212            validity.extend_constant(self.len(), true);
213            extend_trusted_len_unzip(iterator, &mut validity, &mut self.values);
214            self.validity = Some(validity);
215        }
216    }
217    /// Extends the [`MutablePrimitiveArray`] from an iterator of values of trusted len.
218    /// This differs from `extend_trusted_len` which accepts in iterator of optional values.
219    #[inline]
220    pub fn extend_trusted_len_values<I>(&mut self, iterator: I)
221    where
222        I: TrustedLen<Item = T>,
223    {
224        unsafe { self.extend_trusted_len_values_unchecked(iterator) }
225    }
226
227    /// Extends the [`MutablePrimitiveArray`] from an iterator of values of trusted len.
228    /// This differs from `extend_trusted_len_unchecked` which accepts in iterator of optional values.
229    /// # Safety
230    /// The iterator must be trusted len.
231    #[inline]
232    pub unsafe fn extend_trusted_len_values_unchecked<I>(&mut self, iterator: I)
233    where
234        I: Iterator<Item = T>,
235    {
236        self.values.extend(iterator);
237        self.update_all_valid();
238    }
239
240    #[inline]
241    /// Extends the [`MutablePrimitiveArray`] from a slice
242    pub fn extend_from_slice(&mut self, items: &[T]) {
243        self.values.extend_from_slice(items);
244        self.update_all_valid();
245    }
246
247    fn update_all_valid(&mut self) {
248        // get len before mutable borrow
249        let len = self.len();
250        if let Some(validity) = self.validity.as_mut() {
251            validity.extend_constant(len - validity.len(), true);
252        }
253    }
254
255    fn init_validity(&mut self) {
256        let mut validity = MutableBitmap::with_capacity(self.values.capacity());
257        validity.extend_constant(self.len(), true);
258        validity.set(self.len() - 1, false);
259        self.validity = Some(validity)
260    }
261
262    /// Changes the arrays' [`DataType`], returning a new [`MutablePrimitiveArray`].
263    /// Use to change the logical type without changing the corresponding physical Type.
264    /// # Implementation
265    /// This operation is `O(1)`.
266    #[inline]
267    pub fn to(self, data_type: DataType) -> Self {
268        Self::try_new(data_type, self.values, self.validity).unwrap()
269    }
270
271    /// Converts itself into an [`Array`].
272    pub fn into_arc(self) -> Arc<dyn Array> {
273        let a: PrimitiveArray<T> = self.into();
274        Arc::new(a)
275    }
276
277    /// Shrinks the capacity of the [`MutablePrimitiveArray`] to fit its current length.
278    pub fn shrink_to_fit(&mut self) {
279        self.values.shrink_to_fit();
280        if let Some(validity) = &mut self.validity {
281            validity.shrink_to_fit()
282        }
283    }
284
285    /// Returns the capacity of this [`MutablePrimitiveArray`].
286    pub fn capacity(&self) -> usize {
287        self.values.capacity()
288    }
289}
290
291/// Accessors
292impl<T: NativeType> MutablePrimitiveArray<T> {
293    /// Returns its values.
294    pub fn values(&self) -> &Vec<T> {
295        &self.values
296    }
297
298    /// Returns a mutable slice of values.
299    pub fn values_mut_slice(&mut self) -> &mut [T] {
300        self.values.as_mut_slice()
301    }
302}
303
304/// Setters
305impl<T: NativeType> MutablePrimitiveArray<T> {
306    /// Sets position `index` to `value`.
307    /// Note that if it is the first time a null appears in this array,
308    /// this initializes the validity bitmap (`O(N)`).
309    /// # Panic
310    /// Panics iff index is larger than `self.len()`.
311    pub fn set(&mut self, index: usize, value: Option<T>) {
312        assert!(index < self.len());
313        // Safety:
314        // we just checked bounds
315        unsafe { self.set_unchecked(index, value) }
316    }
317
318    /// Sets position `index` to `value`.
319    /// Note that if it is the first time a null appears in this array,
320    /// this initializes the validity bitmap (`O(N)`).
321    /// # Safety
322    /// Caller must ensure `index < self.len()`
323    pub unsafe fn set_unchecked(&mut self, index: usize, value: Option<T>) {
324        *self.values.get_unchecked_mut(index) = value.unwrap_or_default();
325
326        if value.is_none() && self.validity.is_none() {
327            // When the validity is None, all elements so far are valid. When one of the elements is set fo null,
328            // the validity must be initialized.
329            let mut validity = MutableBitmap::new();
330            validity.extend_constant(self.len(), true);
331            self.validity = Some(validity);
332        }
333        if let Some(x) = self.validity.as_mut() {
334            x.set_unchecked(index, value.is_some())
335        }
336    }
337
338    /// Sets the validity.
339    /// # Panic
340    /// Panics iff the validity's len is not equal to the existing values' length.
341    pub fn set_validity(&mut self, validity: Option<MutableBitmap>) {
342        if let Some(validity) = &validity {
343            assert_eq!(self.values.len(), validity.len())
344        }
345        self.validity = validity;
346    }
347
348    /// Sets values.
349    /// # Panic
350    /// Panics iff the values' length is not equal to the existing validity's len.
351    pub fn set_values(&mut self, values: Vec<T>) {
352        assert_eq!(values.len(), self.values.len());
353        self.values = values;
354    }
355}
356
357impl<T: NativeType> Extend<Option<T>> for MutablePrimitiveArray<T> {
358    fn extend<I: IntoIterator<Item = Option<T>>>(&mut self, iter: I) {
359        let iter = iter.into_iter();
360        self.reserve(iter.size_hint().0);
361        iter.for_each(|x| self.push(x))
362    }
363}
364
365impl<T: NativeType> TryExtend<Option<T>> for MutablePrimitiveArray<T> {
366    /// This is infalible and is implemented for consistency with all other types
367    fn try_extend<I: IntoIterator<Item = Option<T>>>(&mut self, iter: I) -> Result<(), Error> {
368        self.extend(iter);
369        Ok(())
370    }
371}
372
373impl<T: NativeType> TryPush<Option<T>> for MutablePrimitiveArray<T> {
374    /// This is infalible and is implemented for consistency with all other types
375    fn try_push(&mut self, item: Option<T>) -> Result<(), Error> {
376        self.push(item);
377        Ok(())
378    }
379}
380
381impl<T: NativeType> MutableArray for MutablePrimitiveArray<T> {
382    fn len(&self) -> usize {
383        self.values.len()
384    }
385
386    fn validity(&self) -> Option<&MutableBitmap> {
387        self.validity.as_ref()
388    }
389
390    fn as_box(&mut self) -> Box<dyn Array> {
391        PrimitiveArray::new(
392            self.data_type.clone(),
393            std::mem::take(&mut self.values).into(),
394            std::mem::take(&mut self.validity).map(|x| x.into()),
395        )
396        .boxed()
397    }
398
399    fn as_arc(&mut self) -> Arc<dyn Array> {
400        PrimitiveArray::new(
401            self.data_type.clone(),
402            std::mem::take(&mut self.values).into(),
403            std::mem::take(&mut self.validity).map(|x| x.into()),
404        )
405        .arced()
406    }
407
408    fn data_type(&self) -> &DataType {
409        &self.data_type
410    }
411
412    fn as_any(&self) -> &dyn std::any::Any {
413        self
414    }
415
416    fn as_mut_any(&mut self) -> &mut dyn std::any::Any {
417        self
418    }
419
420    fn push_null(&mut self) {
421        self.push(None)
422    }
423
424    fn reserve(&mut self, additional: usize) {
425        self.reserve(additional)
426    }
427
428    fn shrink_to_fit(&mut self) {
429        self.shrink_to_fit()
430    }
431}
432
433impl<T: NativeType> MutablePrimitiveArray<T> {
434    /// Creates a [`MutablePrimitiveArray`] from a slice of values.
435    pub fn from_slice<P: AsRef<[T]>>(slice: P) -> Self {
436        Self::from_trusted_len_values_iter(slice.as_ref().iter().copied())
437    }
438
439    /// Creates a [`MutablePrimitiveArray`] from an iterator of trusted length.
440    /// # Safety
441    /// The iterator must be [`TrustedLen`](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html).
442    /// I.e. `size_hint().1` correctly reports its length.
443    #[inline]
444    pub unsafe fn from_trusted_len_iter_unchecked<I, P>(iterator: I) -> Self
445    where
446        P: std::borrow::Borrow<T>,
447        I: Iterator<Item = Option<P>>,
448    {
449        let (validity, values) = trusted_len_unzip(iterator);
450
451        Self {
452            data_type: T::PRIMITIVE.into(),
453            values,
454            validity,
455        }
456    }
457
458    /// Creates a [`MutablePrimitiveArray`] from a [`TrustedLen`].
459    #[inline]
460    pub fn from_trusted_len_iter<I, P>(iterator: I) -> Self
461    where
462        P: std::borrow::Borrow<T>,
463        I: TrustedLen<Item = Option<P>>,
464    {
465        unsafe { Self::from_trusted_len_iter_unchecked(iterator) }
466    }
467
468    /// Creates a [`MutablePrimitiveArray`] from an fallible iterator of trusted length.
469    /// # Safety
470    /// The iterator must be [`TrustedLen`](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html).
471    /// I.e. that `size_hint().1` correctly reports its length.
472    #[inline]
473    pub unsafe fn try_from_trusted_len_iter_unchecked<E, I, P>(
474        iter: I,
475    ) -> std::result::Result<Self, E>
476    where
477        P: std::borrow::Borrow<T>,
478        I: IntoIterator<Item = std::result::Result<Option<P>, E>>,
479    {
480        let iterator = iter.into_iter();
481
482        let (validity, values) = try_trusted_len_unzip(iterator)?;
483
484        Ok(Self {
485            data_type: T::PRIMITIVE.into(),
486            values,
487            validity,
488        })
489    }
490
491    /// Creates a [`MutablePrimitiveArray`] from an fallible iterator of trusted length.
492    #[inline]
493    pub fn try_from_trusted_len_iter<E, I, P>(iterator: I) -> std::result::Result<Self, E>
494    where
495        P: std::borrow::Borrow<T>,
496        I: TrustedLen<Item = std::result::Result<Option<P>, E>>,
497    {
498        unsafe { Self::try_from_trusted_len_iter_unchecked(iterator) }
499    }
500
501    /// Creates a new [`MutablePrimitiveArray`] out an iterator over values
502    pub fn from_trusted_len_values_iter<I: TrustedLen<Item = T>>(iter: I) -> Self {
503        Self {
504            data_type: T::PRIMITIVE.into(),
505            values: iter.collect(),
506            validity: None,
507        }
508    }
509
510    /// Creates a (non-null) [`MutablePrimitiveArray`] from a vector of values.
511    /// This does not have memcopy and is the fastest way to create a [`PrimitiveArray`].
512    pub fn from_vec(values: Vec<T>) -> Self {
513        Self::try_new(T::PRIMITIVE.into(), values, None).unwrap()
514    }
515
516    /// Creates a new [`MutablePrimitiveArray`] from an iterator over values
517    /// # Safety
518    /// The iterator must be [`TrustedLen`](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html).
519    /// I.e. that `size_hint().1` correctly reports its length.
520    pub unsafe fn from_trusted_len_values_iter_unchecked<I: Iterator<Item = T>>(iter: I) -> Self {
521        Self {
522            data_type: T::PRIMITIVE.into(),
523            values: iter.collect(),
524            validity: None,
525        }
526    }
527}
528
529impl<T: NativeType, Ptr: std::borrow::Borrow<Option<T>>> FromIterator<Ptr>
530    for MutablePrimitiveArray<T>
531{
532    fn from_iter<I: IntoIterator<Item = Ptr>>(iter: I) -> Self {
533        let iter = iter.into_iter();
534        let (lower, _) = iter.size_hint();
535
536        let mut validity = MutableBitmap::with_capacity(lower);
537
538        let values: Vec<T> = iter
539            .map(|item| {
540                if let Some(a) = item.borrow() {
541                    validity.push(true);
542                    *a
543                } else {
544                    validity.push(false);
545                    T::default()
546                }
547            })
548            .collect();
549
550        let validity = Some(validity);
551
552        Self {
553            data_type: T::PRIMITIVE.into(),
554            values,
555            validity,
556        }
557    }
558}
559
560/// Extends a [`MutableBitmap`] and a [`Vec`] from an iterator of `Option`.
561/// The first buffer corresponds to a bitmap buffer, the second one
562/// corresponds to a values buffer.
563/// # Safety
564/// The caller must ensure that `iterator` is `TrustedLen`.
565#[inline]
566pub(crate) unsafe fn extend_trusted_len_unzip<I, P, T>(
567    iterator: I,
568    validity: &mut MutableBitmap,
569    buffer: &mut Vec<T>,
570) where
571    T: NativeType,
572    P: std::borrow::Borrow<T>,
573    I: Iterator<Item = Option<P>>,
574{
575    let (_, upper) = iterator.size_hint();
576    let additional = upper.expect("trusted_len_unzip requires an upper limit");
577
578    validity.reserve(additional);
579    let values = iterator.map(|item| {
580        if let Some(item) = item {
581            validity.push_unchecked(true);
582            *item.borrow()
583        } else {
584            validity.push_unchecked(false);
585            T::default()
586        }
587    });
588    buffer.extend(values);
589}
590
591/// Creates a [`MutableBitmap`] and a [`Vec`] from an iterator of `Option`.
592/// The first buffer corresponds to a bitmap buffer, the second one
593/// corresponds to a values buffer.
594/// # Safety
595/// The caller must ensure that `iterator` is `TrustedLen`.
596#[inline]
597pub(crate) unsafe fn trusted_len_unzip<I, P, T>(iterator: I) -> (Option<MutableBitmap>, Vec<T>)
598where
599    T: NativeType,
600    P: std::borrow::Borrow<T>,
601    I: Iterator<Item = Option<P>>,
602{
603    let mut validity = MutableBitmap::new();
604    let mut buffer = Vec::<T>::new();
605
606    extend_trusted_len_unzip(iterator, &mut validity, &mut buffer);
607
608    let validity = Some(validity);
609
610    (validity, buffer)
611}
612
613/// # Safety
614/// The caller must ensure that `iterator` is `TrustedLen`.
615#[inline]
616pub(crate) unsafe fn try_trusted_len_unzip<E, I, P, T>(
617    iterator: I,
618) -> std::result::Result<(Option<MutableBitmap>, Vec<T>), E>
619where
620    T: NativeType,
621    P: std::borrow::Borrow<T>,
622    I: Iterator<Item = std::result::Result<Option<P>, E>>,
623{
624    let (_, upper) = iterator.size_hint();
625    let len = upper.expect("trusted_len_unzip requires an upper limit");
626
627    let mut null = MutableBitmap::with_capacity(len);
628    let mut buffer = Vec::<T>::with_capacity(len);
629
630    let mut dst = buffer.as_mut_ptr();
631    for item in iterator {
632        let item = if let Some(item) = item? {
633            null.push(true);
634            *item.borrow()
635        } else {
636            null.push(false);
637            T::default()
638        };
639        std::ptr::write(dst, item);
640        dst = dst.add(1);
641    }
642    assert_eq!(
643        dst.offset_from(buffer.as_ptr()) as usize,
644        len,
645        "Trusted iterator length was not accurately reported"
646    );
647    buffer.set_len(len);
648    null.set_len(len);
649
650    let validity = Some(null);
651
652    Ok((validity, buffer))
653}
654
655impl<T: NativeType> PartialEq for MutablePrimitiveArray<T> {
656    fn eq(&self, other: &Self) -> bool {
657        self.iter().eq(other.iter())
658    }
659}
660
661impl<T: NativeType> TryExtendFromSelf for MutablePrimitiveArray<T> {
662    fn try_extend_from_self(&mut self, other: &Self) -> Result<(), Error> {
663        extend_validity(self.len(), &mut self.validity, &other.validity);
664
665        let slice = other.values.as_slice();
666        self.values.extend_from_slice(slice);
667        Ok(())
668    }
669}