1use crate::{
2 bitmap::{
3 utils::{BitmapIter, ZipValidity},
4 Bitmap,
5 },
6 buffer::Buffer,
7 datatypes::DataType,
8 error::Error,
9 offset::{Offset, Offsets, OffsetsBuffer},
10 trusted_len::TrustedLen,
11};
12
13use either::Either;
14
15use super::{specification::try_check_offsets_bounds, Array, GenericBinaryArray};
16
17mod ffi;
18pub(super) mod fmt;
19mod iterator;
20pub use iterator::*;
21mod from;
22mod mutable_values;
23pub use mutable_values::*;
24mod mutable;
25pub use mutable::*;
26
27#[cfg(feature = "arrow")]
28mod data;
29
30#[derive(Clone)]
61pub struct BinaryArray<O: Offset> {
62 data_type: DataType,
63 offsets: OffsetsBuffer<O>,
64 values: Buffer<u8>,
65 validity: Option<Bitmap>,
66}
67
68impl<O: Offset> BinaryArray<O> {
69 pub fn try_new(
79 data_type: DataType,
80 offsets: OffsetsBuffer<O>,
81 values: Buffer<u8>,
82 validity: Option<Bitmap>,
83 ) -> Result<Self, Error> {
84 try_check_offsets_bounds(&offsets, values.len())?;
85
86 if validity
87 .as_ref()
88 .map_or(false, |validity| validity.len() != offsets.len_proxy())
89 {
90 return Err(Error::oos(
91 "validity mask length must match the number of values",
92 ));
93 }
94
95 if data_type.to_physical_type() != Self::default_data_type().to_physical_type() {
96 return Err(Error::oos(
97 "BinaryArray can only be initialized with DataType::Binary or DataType::LargeBinary",
98 ));
99 }
100
101 Ok(Self {
102 data_type,
103 offsets,
104 values,
105 validity,
106 })
107 }
108
109 pub fn from_slice<T: AsRef<[u8]>, P: AsRef<[T]>>(slice: P) -> Self {
111 Self::from_trusted_len_values_iter(slice.as_ref().iter())
112 }
113
114 pub fn from<T: AsRef<[u8]>, P: AsRef<[Option<T>]>>(slice: P) -> Self {
117 MutableBinaryArray::<O>::from(slice).into()
118 }
119
120 pub fn iter(&self) -> ZipValidity<&[u8], BinaryValueIter<O>, BitmapIter> {
122 ZipValidity::new_with_validity(self.values_iter(), self.validity.as_ref())
123 }
124
125 pub fn values_iter(&self) -> BinaryValueIter<O> {
127 BinaryValueIter::new(self)
128 }
129
130 #[inline]
132 pub fn len(&self) -> usize {
133 self.offsets.len_proxy()
134 }
135
136 #[inline]
140 pub fn value(&self, i: usize) -> &[u8] {
141 assert!(i < self.len());
142 unsafe { self.value_unchecked(i) }
143 }
144
145 #[inline]
149 pub unsafe fn value_unchecked(&self, i: usize) -> &[u8] {
150 let (start, end) = self.offsets.start_end_unchecked(i);
152
153 self.values.get_unchecked(start..end)
155 }
156
157 #[inline]
161 pub fn get(&self, i: usize) -> Option<&[u8]> {
162 if !self.is_null(i) {
163 unsafe { Some(self.value_unchecked(i)) }
165 } else {
166 None
167 }
168 }
169
170 #[inline]
172 pub fn data_type(&self) -> &DataType {
173 &self.data_type
174 }
175
176 #[inline]
178 pub fn values(&self) -> &Buffer<u8> {
179 &self.values
180 }
181
182 #[inline]
184 pub fn offsets(&self) -> &OffsetsBuffer<O> {
185 &self.offsets
186 }
187
188 #[inline]
190 pub fn validity(&self) -> Option<&Bitmap> {
191 self.validity.as_ref()
192 }
193
194 pub fn slice(&mut self, offset: usize, length: usize) {
200 assert!(
201 offset + length <= self.len(),
202 "the offset of the new Buffer cannot exceed the existing length"
203 );
204 unsafe { self.slice_unchecked(offset, length) }
205 }
206
207 pub unsafe fn slice_unchecked(&mut self, offset: usize, length: usize) {
213 self.validity.as_mut().and_then(|bitmap| {
214 bitmap.slice_unchecked(offset, length);
215 (bitmap.unset_bits() > 0).then(|| bitmap)
216 });
217 self.offsets.slice_unchecked(offset, length + 1);
218 }
219
220 impl_sliced!();
221 impl_mut_validity!();
222 impl_into_array!();
223
224 #[must_use]
226 pub fn into_inner(self) -> (DataType, OffsetsBuffer<O>, Buffer<u8>, Option<Bitmap>) {
227 let Self {
228 data_type,
229 offsets,
230 values,
231 validity,
232 } = self;
233 (data_type, offsets, values, validity)
234 }
235
236 #[must_use]
238 pub fn into_mut(self) -> Either<Self, MutableBinaryArray<O>> {
239 use Either::*;
240 if let Some(bitmap) = self.validity {
241 match bitmap.into_mut() {
242 Left(bitmap) => Left(BinaryArray::new(
244 self.data_type,
245 self.offsets,
246 self.values,
247 Some(bitmap),
248 )),
249 Right(mutable_bitmap) => match (self.values.into_mut(), self.offsets.into_mut()) {
250 (Left(values), Left(offsets)) => Left(BinaryArray::new(
251 self.data_type,
252 offsets,
253 values,
254 Some(mutable_bitmap.into()),
255 )),
256 (Left(values), Right(offsets)) => Left(BinaryArray::new(
257 self.data_type,
258 offsets.into(),
259 values,
260 Some(mutable_bitmap.into()),
261 )),
262 (Right(values), Left(offsets)) => Left(BinaryArray::new(
263 self.data_type,
264 offsets,
265 values.into(),
266 Some(mutable_bitmap.into()),
267 )),
268 (Right(values), Right(offsets)) => Right(
269 MutableBinaryArray::try_new(
270 self.data_type,
271 offsets,
272 values,
273 Some(mutable_bitmap),
274 )
275 .unwrap(),
276 ),
277 },
278 }
279 } else {
280 match (self.values.into_mut(), self.offsets.into_mut()) {
281 (Left(values), Left(offsets)) => {
282 Left(BinaryArray::new(self.data_type, offsets, values, None))
283 }
284 (Left(values), Right(offsets)) => Left(BinaryArray::new(
285 self.data_type,
286 offsets.into(),
287 values,
288 None,
289 )),
290 (Right(values), Left(offsets)) => Left(BinaryArray::new(
291 self.data_type,
292 offsets,
293 values.into(),
294 None,
295 )),
296 (Right(values), Right(offsets)) => Right(
297 MutableBinaryArray::try_new(self.data_type, offsets, values, None).unwrap(),
298 ),
299 }
300 }
301 }
302
303 pub fn new_empty(data_type: DataType) -> Self {
305 Self::new(data_type, OffsetsBuffer::new(), Buffer::new(), None)
306 }
307
308 #[inline]
310 pub fn new_null(data_type: DataType, length: usize) -> Self {
311 Self::new(
312 data_type,
313 Offsets::new_zeroed(length).into(),
314 Buffer::new(),
315 Some(Bitmap::new_zeroed(length)),
316 )
317 }
318
319 pub fn default_data_type() -> DataType {
321 if O::IS_LARGE {
322 DataType::LargeBinary
323 } else {
324 DataType::Binary
325 }
326 }
327
328 pub fn new(
330 data_type: DataType,
331 offsets: OffsetsBuffer<O>,
332 values: Buffer<u8>,
333 validity: Option<Bitmap>,
334 ) -> Self {
335 Self::try_new(data_type, offsets, values, validity).unwrap()
336 }
337
338 #[inline]
342 pub fn from_trusted_len_values_iter<T: AsRef<[u8]>, I: TrustedLen<Item = T>>(
343 iterator: I,
344 ) -> Self {
345 MutableBinaryArray::<O>::from_trusted_len_values_iter(iterator).into()
346 }
347
348 pub fn from_iter_values<T: AsRef<[u8]>, I: Iterator<Item = T>>(iterator: I) -> Self {
352 MutableBinaryArray::<O>::from_iter_values(iterator).into()
353 }
354
355 #[inline]
360 pub unsafe fn from_trusted_len_iter_unchecked<I, P>(iterator: I) -> Self
361 where
362 P: AsRef<[u8]>,
363 I: Iterator<Item = Option<P>>,
364 {
365 MutableBinaryArray::<O>::from_trusted_len_iter_unchecked(iterator).into()
366 }
367
368 #[inline]
370 pub fn from_trusted_len_iter<I, P>(iterator: I) -> Self
371 where
372 P: AsRef<[u8]>,
373 I: TrustedLen<Item = Option<P>>,
374 {
375 unsafe { Self::from_trusted_len_iter_unchecked(iterator) }
377 }
378
379 #[inline]
384 pub unsafe fn try_from_trusted_len_iter_unchecked<E, I, P>(iterator: I) -> Result<Self, E>
385 where
386 P: AsRef<[u8]>,
387 I: IntoIterator<Item = Result<Option<P>, E>>,
388 {
389 MutableBinaryArray::<O>::try_from_trusted_len_iter_unchecked(iterator).map(|x| x.into())
390 }
391
392 #[inline]
394 pub fn try_from_trusted_len_iter<E, I, P>(iter: I) -> Result<Self, E>
395 where
396 P: AsRef<[u8]>,
397 I: TrustedLen<Item = Result<Option<P>, E>>,
398 {
399 unsafe { Self::try_from_trusted_len_iter_unchecked(iter) }
401 }
402}
403
404impl<O: Offset> Array for BinaryArray<O> {
405 impl_common_array!();
406
407 fn validity(&self) -> Option<&Bitmap> {
408 self.validity.as_ref()
409 }
410
411 #[inline]
412 fn with_validity(&self, validity: Option<Bitmap>) -> Box<dyn Array> {
413 Box::new(self.clone().with_validity(validity))
414 }
415}
416
417unsafe impl<O: Offset> GenericBinaryArray<O> for BinaryArray<O> {
418 #[inline]
419 fn values(&self) -> &[u8] {
420 self.values()
421 }
422
423 #[inline]
424 fn offsets(&self) -> &[O] {
425 self.offsets().buffer()
426 }
427}