apache_avro/schema/
union.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::AvroResult;
19use crate::error::Details;
20use crate::schema::{Name, Namespace, ResolvedSchema, Schema, SchemaKind};
21use crate::types;
22use std::borrow::Borrow;
23use std::collections::{BTreeMap, HashMap, HashSet};
24use std::fmt::Debug;
25
26/// A description of a Union schema
27#[derive(Debug, Clone)]
28pub struct UnionSchema {
29    /// The schemas that make up this union
30    pub(crate) schemas: Vec<Schema>,
31    // Used to ensure uniqueness of schema inputs, and provide constant time finding of the
32    // schema index given a value.
33    // **NOTE** that this approach does not work for named types, and will have to be modified
34    // to support that. A simple solution is to also keep a mapping of the names used.
35    variant_index: BTreeMap<SchemaKind, usize>,
36}
37
38impl UnionSchema {
39    /// Creates a new UnionSchema from a vector of schemas.
40    ///
41    /// # Errors
42    /// Will return an error if `schemas` has duplicate unnamed schemas or if `schemas`
43    /// contains a union.
44    pub fn new(schemas: Vec<Schema>) -> AvroResult<Self> {
45        let mut named_schemas: HashSet<&Name> = HashSet::default();
46        let mut vindex = BTreeMap::new();
47        for (i, schema) in schemas.iter().enumerate() {
48            if let Schema::Union(_) = schema {
49                return Err(Details::GetNestedUnion.into());
50            } else if !schema.is_named() && vindex.insert(SchemaKind::from(schema), i).is_some() {
51                return Err(Details::GetUnionDuplicate.into());
52            } else if schema.is_named() {
53                let name = schema.name().unwrap();
54                if !named_schemas.insert(name) {
55                    return Err(Details::GetUnionDuplicateNamedSchemas(name.to_string()).into());
56                }
57                vindex.insert(SchemaKind::from(schema), i);
58            }
59        }
60        Ok(UnionSchema {
61            schemas,
62            variant_index: vindex,
63        })
64    }
65
66    /// Returns a slice to all variants of this schema.
67    pub fn variants(&self) -> &[Schema] {
68        &self.schemas
69    }
70
71    /// Returns true if the any of the variants of this `UnionSchema` is `Null`.
72    pub fn is_nullable(&self) -> bool {
73        self.schemas.iter().any(|x| matches!(x, Schema::Null))
74    }
75
76    /// Optionally returns a reference to the schema matched by this value, as well as its position
77    /// within this union.
78    ///
79    /// Extra arguments:
80    /// - `known_schemata` - mapping between `Name` and `Schema` - if passed, additional external schemas would be used to resolve references.
81    pub fn find_schema_with_known_schemata<S: Borrow<Schema> + Debug>(
82        &self,
83        value: &types::Value,
84        known_schemata: Option<&HashMap<Name, S>>,
85        enclosing_namespace: &Namespace,
86    ) -> Option<(usize, &Schema)> {
87        let schema_kind = SchemaKind::from(value);
88        if let Some(&i) = self.variant_index.get(&schema_kind) {
89            // fast path
90            Some((i, &self.schemas[i]))
91        } else {
92            // slow path (required for matching logical or named types)
93
94            // first collect what schemas we already know
95            let mut collected_names: HashMap<Name, &Schema> = known_schemata
96                .map(|names| {
97                    names
98                        .iter()
99                        .map(|(name, schema)| (name.clone(), schema.borrow()))
100                        .collect()
101                })
102                .unwrap_or_default();
103
104            self.schemas.iter().enumerate().find(|(_, schema)| {
105                let resolved_schema = ResolvedSchema::new_with_known_schemata(
106                    vec![*schema],
107                    enclosing_namespace,
108                    &collected_names,
109                )
110                .expect("Schema didn't successfully parse");
111                let resolved_names = resolved_schema.names_ref;
112
113                // extend known schemas with just resolved names
114                collected_names.extend(resolved_names);
115                let namespace = &schema.namespace().or_else(|| enclosing_namespace.clone());
116
117                value
118                    .clone()
119                    .resolve_internal(schema, &collected_names, namespace, &None)
120                    .is_ok()
121            })
122        }
123    }
124}
125
126// No need to compare variant_index, it is derivative of schemas.
127impl PartialEq for UnionSchema {
128    fn eq(&self, other: &UnionSchema) -> bool {
129        self.schemas.eq(&other.schemas)
130    }
131}
132
133#[cfg(test)]
134mod tests {
135    use super::*;
136    use crate::error::{Details, Error};
137    use crate::schema::RecordSchema;
138    use apache_avro_test_helper::TestResult;
139
140    #[test]
141    fn avro_rs_402_new_union_schema() -> TestResult {
142        let schema1 = Schema::Int;
143        let schema2 = Schema::String;
144        let union_schema = UnionSchema::new(vec![schema1.clone(), schema2.clone()])?;
145
146        assert_eq!(union_schema.variants(), &[schema1, schema2]);
147
148        Ok(())
149    }
150
151    #[test]
152    fn avro_rs_402_new_union_schema_duplicate_names() -> TestResult {
153        let res = UnionSchema::new(vec![
154            Schema::Record(RecordSchema::builder().try_name("Same_name")?.build()),
155            Schema::Record(RecordSchema::builder().try_name("Same_name")?.build()),
156        ])
157        .map_err(Error::into_details);
158
159        match res {
160            Err(Details::GetUnionDuplicateNamedSchemas(name)) => {
161                assert_eq!(name, Name::new("Same_name")?.to_string());
162            }
163            err => panic!("Expected GetUnionDuplicateNamedSchemas error, got: {err:?}"),
164        }
165
166        Ok(())
167    }
168}