Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 12f76e5

Browse files
committed
Support for union-types (Schema, Value). The whole serde-stuff — still to be done
1 parent e5ca80b commit 12f76e5

8 files changed

Lines changed: 100 additions & 68 deletions

File tree

src/de.rs

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -172,8 +172,15 @@ impl<'a, 'de> de::Deserializer<'de> for &'a mut Deserializer<'de> {
172172
V: Visitor<'de>,
173173
{
174174
match *self.input {
175-
Value::Union(ref inner) if inner.as_ref() == &Value::Null => visitor.visit_none(),
176-
Value::Union(ref inner) => visitor.visit_some(&mut Deserializer::new(inner)),
175+
/*
176+
* https://avro.apache.org/docs/current/spec.html#Unions
177+
* | Thus, for unions containing "null", the "null" is usually listed first, since the default value of such unions is typically null
178+
*
179+
* Although that is not a guarantee (that the schema will be defined as ["null", {"type": ...}]: null-first),
180+
* this can be used as a guideline to choose variant-index=0 for the None value.
181+
*/
182+
Value::Union(0, ref inner) if inner.as_ref() == &Value::Null => visitor.visit_none(),
183+
Value::Union(1, ref inner) => visitor.visit_some(&mut Deserializer::new(inner)),
177184
_ => Err(Error::custom("not a union")),
178185
}
179186
}

src/decode.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -119,10 +119,10 @@ pub fn decode<R: Read>(schema: &Schema, reader: &mut R) -> Result<Value, Error>
119119
Ok(Value::Map(items))
120120
},
121121
Schema::Union(ref inner) => {
122-
let index = zag_i64(reader)?;
122+
let index = zag_i64(reader)? as usize;
123123
let variants = inner.variants();
124124
match variants.get(index as usize) {
125-
Some(variant) => decode(variant, reader).map(|x| Value::Union(Box::new(x))),
125+
Some(variant) => decode(variant, reader).map(|x| Value::Union(index, Box::new(x))),
126126
None => Err(DecodeError::new("Union index out of bounds").into()),
127127
}
128128
},

src/encode.rs

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -54,14 +54,13 @@ pub fn encode_ref(value: &Value, schema: &Schema, buffer: &mut Vec<u8>) {
5454
},
5555
Value::Fixed(_, bytes) => buffer.extend(bytes),
5656
Value::Enum(i, _) => encode_int(*i, buffer),
57-
Value::Union(item) => {
57+
Value::Union(idx, item) => {
5858
if let Schema::Union(ref inner) = *schema {
59-
// Find the schema that is matched here. Due to validation, this should always
60-
// return a value.
61-
let (idx, inner_schema) = inner
62-
.find_schema(item)
63-
.expect("Invalid Union validation occurred");
64-
encode_long(idx as i64, buffer);
59+
let variant_idx = *idx;
60+
let inner_schema = inner
61+
.variant_schema(variant_idx)
62+
.expect(format!("Invalid variant index: {:?}", variant_idx).as_ref());
63+
encode_long(variant_idx as i64, buffer);
6564
encode_ref(&*item, inner_schema, buffer);
6665
}
6766
},

src/reader.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -344,7 +344,7 @@ mod tests {
344344

345345
assert_eq!(
346346
from_avro_datum(&schema, &mut encoded, None).unwrap(),
347-
Value::Union(Box::new(Value::Long(0)))
347+
Value::Union(1, Box::new(Value::Long(0)))
348348
);
349349
}
350350

src/schema.rs

Lines changed: 29 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
//! Logic for parsing and interacting with schemas in Avro format.
22
use std::borrow::Cow;
3-
use std::collections::HashMap;
3+
use std::collections::{HashMap, HashSet};
44
use std::fmt;
55

66
use digest::Digest;
@@ -161,7 +161,7 @@ impl<'a> From<&'a types::Value> for SchemaKind {
161161
types::Value::String(_) => SchemaKind::String,
162162
types::Value::Array(_) => SchemaKind::Array,
163163
types::Value::Map(_) => SchemaKind::Map,
164-
types::Value::Union(_) => SchemaKind::Union,
164+
types::Value::Union(_, _) => SchemaKind::Union,
165165
types::Value::Record(_) => SchemaKind::Record,
166166
types::Value::Enum(_, _) => SchemaKind::Enum,
167167
types::Value::Fixed(_, _) => SchemaKind::Fixed,
@@ -316,32 +316,39 @@ impl RecordField {
316316
#[derive(Debug, Clone)]
317317
pub struct UnionSchema {
318318
schemas: Vec<Schema>,
319-
// Used to ensure uniqueness of schema inputs, and provide constant time finding of the
320-
// schema index given a value.
321-
// **NOTE** that this approach does not work for named types, and will have to be modified
322-
// to support that. A simple solution is to also keep a mapping of the names used.
323-
variant_index: HashMap<SchemaKind, usize>,
324319
}
325320

326321
impl UnionSchema {
327322
pub(crate) fn new(schemas: Vec<Schema>) -> Result<Self, Error> {
328-
let mut vindex = HashMap::new();
329-
for (i, schema) in schemas.iter().enumerate() {
330-
if let Schema::Union(_) = schema {
331-
Err(ParseSchemaError::new(
332-
"Unions may not directly contain a union",
333-
))?;
334-
}
335-
let kind = SchemaKind::from(schema);
336-
if vindex.insert(kind, i).is_some() {
337-
Err(ParseSchemaError::new(
338-
"Unions cannot contain duplicate types",
339-
))?;
323+
let mut prim_index = HashSet::<SchemaKind>::new();
324+
let mut record_index = HashSet::<String>::new();
325+
326+
for (variant_index, schema) in schemas.iter().enumerate() {
327+
match schema {
328+
Schema::Union(_) =>
329+
Err(ParseSchemaError::new(
330+
format!("Unions may not directly contain a union (variant-index: {})", variant_index),
331+
))?,
332+
Schema::Record { name, .. } =>
333+
if !record_index.insert(name.fullname(None).clone()) {
334+
Err(ParseSchemaError::new(
335+
format!("Union cannot have several record-variants with the same record-name (variant-index: {})", variant_index),
336+
))?
337+
},
338+
339+
primitive => {
340+
let schema_kind = SchemaKind::from(primitive);
341+
if !prim_index.insert(schema_kind) {
342+
Err(ParseSchemaError::new(
343+
format!("Unions cannot contain duplicate primitive types(variant-index: {})", variant_index),
344+
))?
345+
}
346+
},
340347
}
341348
}
349+
342350
Ok(UnionSchema {
343351
schemas,
344-
variant_index: vindex,
345352
})
346353
}
347354

@@ -355,14 +362,8 @@ impl UnionSchema {
355362
!self.schemas.is_empty() && self.schemas[0] == Schema::Null
356363
}
357364

358-
/// Optionally returns a reference to the schema matched by this value, as well as its position
359-
/// within this enum.
360-
pub fn find_schema(&self, value: &crate::types::Value) -> Option<(usize, &Schema)> {
361-
let kind = SchemaKind::from(value);
362-
self.variant_index
363-
.get(&kind)
364-
.cloned()
365-
.map(|i| (i, &self.schemas[i]))
365+
pub fn variant_schema(&self, variant_index: usize) -> Option<&Schema> {
366+
self.schemas.get(variant_index)
366367
}
367368
}
368369

src/types.rs

Lines changed: 50 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ use std::u8;
66
use failure::Error;
77
use serde_json::Value as JsonValue;
88

9-
use crate::schema::{RecordField, Schema, SchemaKind, UnionSchema};
9+
use crate::schema::{RecordField, Schema, UnionSchema};
1010

1111
/// Describes errors happened while performing schema resolution on Avro data.
1212
#[derive(Fail, Debug)]
@@ -54,7 +54,7 @@ pub enum Value {
5454
/// reading values.
5555
Enum(i32, String),
5656
/// An `union` Avro value.
57-
Union(Box<Value>),
57+
Union(usize, Box<Value>),
5858
/// An `array` Avro value.
5959
Array(Vec<Value>),
6060
/// A `map` Avro value.
@@ -121,11 +121,18 @@ where
121121
T: ToAvro,
122122
{
123123
fn avro(self) -> Value {
124-
let v = match self {
125-
Some(v) => T::avro(v),
126-
None => Value::Null,
124+
let (i, v) = match self {
125+
Some(v) => (1, T::avro(v)),
126+
None => (0, Value::Null),
127127
};
128-
Value::Union(Box::new(v))
128+
/*
129+
* https://avro.apache.org/docs/current/spec.html#Unions
130+
* | Thus, for unions containing "null", the "null" is usually listed first, since the default value of such unions is typically null
131+
*
132+
* Although that is not a guarantee (that the schema will be defined as ["null", {"type": ...}]: null-first),
133+
* this can be used as a guideline to choose variant-index=0 for the None value.
134+
*/
135+
Value::Union(i, Box::new(v))
129136
}
130137
}
131138

@@ -271,8 +278,14 @@ impl Value {
271278
.map(|ref symbol| symbol == &s)
272279
.unwrap_or(false),
273280
// (&Value::Union(None), &Schema::Union(_)) => true,
274-
(&Value::Union(ref value), &Schema::Union(ref inner)) => {
275-
inner.find_schema(value).is_some()
281+
(&Value::Union(variant_idx, ref variant_value), &Schema::Union(ref inner)) => {
282+
match inner.variant_schema(variant_idx) {
283+
None =>
284+
// Is an invalid variant-index a schema validation error or a failure worth panicking?
285+
false,
286+
Some(variant_schema) =>
287+
variant_value.validate(variant_schema),
288+
}
276289
},
277290
(&Value::Array(ref items), &Schema::Array(ref inner)) => {
278291
items.iter().all(|item| item.validate(inner))
@@ -297,8 +310,14 @@ impl Value {
297310
/// See [Schema Resolution](https://avro.apache.org/docs/current/spec.html#Schema+Resolution)
298311
/// in the Avro specification for the full set of rules of schema
299312
/// resolution.
300-
pub fn resolve(mut self, schema: &Schema) -> Result<Self, Error> {
301-
// Check if this schema is a union, and if the reader schema is not.
313+
pub fn resolve(/*mut */self, schema: &Schema) -> Result<Self, Error> {
314+
/*
315+
// RGafiyatullin:
316+
// I am convinced that a VariantValue does not conform UnionSchema(vec![ VariantSchema ]) under any circumstances;
317+
// Union(0, VariantValue) on the other hand does.
318+
// Therefore there is no need to extract inner-value of a Union
319+
// ---
320+
// // Check if this schema is a union, and if the reader schema is not.
302321
if SchemaKind::from(&self) == SchemaKind::Union
303322
&& SchemaKind::from(schema) != SchemaKind::Union
304323
{
@@ -309,6 +328,7 @@ impl Value {
309328
};
310329
self = v;
311330
}
331+
*/
312332
match *schema {
313333
Schema::Null => self.resolve_null(),
314334
Schema::Boolean => self.resolve_boolean(),
@@ -462,17 +482,22 @@ impl Value {
462482
}
463483

464484
fn resolve_union(self, schema: &UnionSchema) -> Result<Self, Error> {
465-
let v = match self {
466-
// Both are unions case.
467-
Value::Union(v) => *v,
468-
// Reader is a union, but writer is not.
469-
v => v,
470-
};
471-
// Find the first match in the reader schema.
472-
let (_, inner) = schema
473-
.find_schema(&v)
474-
.ok_or_else(|| SchemaResolutionError::new("Could not find matching type in union"))?;
475-
v.resolve(inner)
485+
match self {
486+
Value::Union(variant_index, v) => {
487+
let variant_value = *v;
488+
let variant_schema =
489+
schema
490+
.variant_schema(variant_index)
491+
.ok_or_else(
492+
|| SchemaResolutionError::new(
493+
format!("Invalid variant index: {:?}", variant_index))
494+
)?;
495+
variant_value.resolve(variant_schema)
496+
},
497+
_ =>
498+
Err(SchemaResolutionError::new(
499+
"Attempt to resolve schema for non-union type via UnionSchema"))?
500+
}
476501
}
477502

478503
fn resolve_array(self, schema: &Schema) -> Result<Self, Error> {
@@ -570,22 +595,22 @@ mod tests {
570595
(Value::Int(42), Schema::Int, true),
571596
(Value::Int(42), Schema::Boolean, false),
572597
(
573-
Value::Union(Box::new(Value::Null)),
598+
Value::Union(0, Box::new(Value::Null)),
574599
Schema::Union(UnionSchema::new(vec![Schema::Null, Schema::Int]).unwrap()),
575600
true,
576601
),
577602
(
578-
Value::Union(Box::new(Value::Int(42))),
603+
Value::Union(1, Box::new(Value::Int(42))),
579604
Schema::Union(UnionSchema::new(vec![Schema::Null, Schema::Int]).unwrap()),
580605
true,
581606
),
582607
(
583-
Value::Union(Box::new(Value::Null)),
608+
Value::Union(0, Box::new(Value::Null)),
584609
Schema::Union(UnionSchema::new(vec![Schema::Double, Schema::Int]).unwrap()),
585610
false,
586611
),
587612
(
588-
Value::Union(Box::new(Value::Int(42))),
613+
Value::Union(3, Box::new(Value::Int(42))),
589614
Schema::Union(
590615
UnionSchema::new(vec![
591616
Schema::Null,

src/writer.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -382,7 +382,7 @@ mod tests {
382382
#[test]
383383
fn test_union() {
384384
let schema = Schema::parse_str(UNION_SCHEMA).unwrap();
385-
let union = Value::Union(Box::new(Value::Long(3)));
385+
let union = Value::Union(1, Box::new(Value::Long(3)));
386386

387387
let mut expected = Vec::new();
388388
zig_i64(1, &mut expected);

tests/validate.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,8 @@ lazy_static! {
4545
map.avro()
4646
}),
4747
(
48-
r#"["string", "null", "long"]"#,
49-
Value::Union(Box::new(Value::Null))
48+
r#"["null", "string", "long"]"#,
49+
Value::Union(0, Box::new(Value::Null))
5050
),
5151
(
5252
r#"{"type": "record", "name": "Test", "fields": [{"name": "f", "type": "long"}]}"#,

0 commit comments

Comments
 (0)