apache_avro/
validator.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::{schema::Namespace, AvroResult, Error};
19use log::debug;
20use regex_lite::Regex;
21use std::sync::OnceLock;
22
23/// A validator that validates names and namespaces according to the Avro specification.
24struct SpecificationValidator;
25
26/// A trait that validates schema names.
27/// To register a custom one use [set_schema_name_validator].
28pub trait SchemaNameValidator: Send + Sync {
29    /// Returns the regex used to validate the schema name
30    /// according to the Avro specification.
31    fn regex(&self) -> &'static Regex {
32        static SCHEMA_NAME_ONCE: OnceLock<Regex> = OnceLock::new();
33        SCHEMA_NAME_ONCE.get_or_init(|| {
34            Regex::new(
35                // An optional namespace (with optional dots) followed by a name without any dots in it.
36                r"^((?P<namespace>([A-Za-z_][A-Za-z0-9_]*(\.[A-Za-z_][A-Za-z0-9_]*)*)?)\.)?(?P<name>[A-Za-z_][A-Za-z0-9_]*)$",
37            )
38                .unwrap()
39        })
40    }
41
42    /// Validates the schema name and returns the name and the optional namespace,
43    /// or [Error::InvalidSchemaName] if it is invalid.
44    fn validate(&self, schema_name: &str) -> AvroResult<(String, Namespace)>;
45}
46
47impl SchemaNameValidator for SpecificationValidator {
48    fn validate(&self, schema_name: &str) -> AvroResult<(String, Namespace)> {
49        let regex = SchemaNameValidator::regex(self);
50        let caps = regex
51            .captures(schema_name)
52            .ok_or_else(|| Error::InvalidSchemaName(schema_name.to_string(), regex.as_str()))?;
53        Ok((
54            caps["name"].to_string(),
55            caps.name("namespace").map(|s| s.as_str().to_string()),
56        ))
57    }
58}
59
60static NAME_VALIDATOR_ONCE: OnceLock<Box<dyn SchemaNameValidator + Send + Sync>> = OnceLock::new();
61
62/// Sets a custom schema name validator.
63///
64/// Returns a unit if the registration was successful or the already
65/// registered validator if the registration failed.
66///
67/// **Note**: This function must be called before parsing any schema because this will
68/// register the default validator and the registration is one time only!
69pub fn set_schema_name_validator(
70    validator: Box<dyn SchemaNameValidator + Send + Sync>,
71) -> Result<(), Box<dyn SchemaNameValidator + Send + Sync>> {
72    debug!("Setting a custom schema name validator.");
73    NAME_VALIDATOR_ONCE.set(validator)
74}
75
76pub(crate) fn validate_schema_name(schema_name: &str) -> AvroResult<(String, Namespace)> {
77    NAME_VALIDATOR_ONCE
78        .get_or_init(|| {
79            debug!("Going to use the default name validator.");
80            Box::new(SpecificationValidator)
81        })
82        .validate(schema_name)
83}
84
85/// A trait that validates schema namespaces.
86/// To register a custom one use [set_schema_namespace_validator].
87pub trait SchemaNamespaceValidator: Send + Sync {
88    /// Returns the regex used to validate the schema namespace
89    /// according to the Avro specification.
90    fn regex(&self) -> &'static Regex {
91        static NAMESPACE_ONCE: OnceLock<Regex> = OnceLock::new();
92        NAMESPACE_ONCE.get_or_init(|| {
93            Regex::new(r"^([A-Za-z_][A-Za-z0-9_]*(\.[A-Za-z_][A-Za-z0-9_]*)*)?$").unwrap()
94        })
95    }
96
97    /// Validates the schema namespace or [Error::InvalidNamespace] if it is invalid.
98    fn validate(&self, namespace: &str) -> AvroResult<()>;
99}
100
101impl SchemaNamespaceValidator for SpecificationValidator {
102    fn validate(&self, ns: &str) -> AvroResult<()> {
103        let regex = SchemaNamespaceValidator::regex(self);
104        if !regex.is_match(ns) {
105            Err(Error::InvalidNamespace(ns.to_string(), regex.as_str()))
106        } else {
107            Ok(())
108        }
109    }
110}
111
112static NAMESPACE_VALIDATOR_ONCE: OnceLock<Box<dyn SchemaNamespaceValidator + Send + Sync>> =
113    OnceLock::new();
114
115/// Sets a custom schema namespace validator.
116///
117/// Returns a unit if the registration was successful or the already
118/// registered validator if the registration failed.
119///
120/// **Note**: This function must be called before parsing any schema because this will
121/// register the default validator and the registration is one time only!
122pub fn set_schema_namespace_validator(
123    validator: Box<dyn SchemaNamespaceValidator + Send + Sync>,
124) -> Result<(), Box<dyn SchemaNamespaceValidator + Send + Sync>> {
125    NAMESPACE_VALIDATOR_ONCE.set(validator)
126}
127
128pub(crate) fn validate_namespace(ns: &str) -> AvroResult<()> {
129    NAMESPACE_VALIDATOR_ONCE
130        .get_or_init(|| {
131            debug!("Going to use the default namespace validator.");
132            Box::new(SpecificationValidator)
133        })
134        .validate(ns)
135}
136
137/// A trait that validates enum symbol names.
138/// To register a custom one use [set_enum_symbol_name_validator].
139pub trait EnumSymbolNameValidator: Send + Sync {
140    /// Returns the regex used to validate the symbols of enum schema
141    /// according to the Avro specification.
142    fn regex(&self) -> &'static Regex {
143        static ENUM_SYMBOL_NAME_ONCE: OnceLock<Regex> = OnceLock::new();
144        ENUM_SYMBOL_NAME_ONCE.get_or_init(|| Regex::new(r"^[A-Za-z_][A-Za-z0-9_]*$").unwrap())
145    }
146
147    /// Validates the symbols of an Enum schema name and returns nothing (unit),
148    /// or [Error::EnumSymbolName] if it is invalid.
149    fn validate(&self, name: &str) -> AvroResult<()>;
150}
151
152impl EnumSymbolNameValidator for SpecificationValidator {
153    fn validate(&self, symbol: &str) -> AvroResult<()> {
154        let regex = EnumSymbolNameValidator::regex(self);
155        if !regex.is_match(symbol) {
156            return Err(Error::EnumSymbolName(symbol.to_string()));
157        }
158
159        Ok(())
160    }
161}
162
163static ENUM_SYMBOL_NAME_VALIDATOR_ONCE: OnceLock<Box<dyn EnumSymbolNameValidator + Send + Sync>> =
164    OnceLock::new();
165
166/// Sets a custom enum symbol name validator.
167///
168/// Returns a unit if the registration was successful or the already
169/// registered validator if the registration failed.
170///
171/// **Note**: This function must be called before parsing any schema because this will
172/// register the default validator and the registration is one time only!
173pub fn set_enum_symbol_name_validator(
174    validator: Box<dyn EnumSymbolNameValidator + Send + Sync>,
175) -> Result<(), Box<dyn EnumSymbolNameValidator + Send + Sync>> {
176    ENUM_SYMBOL_NAME_VALIDATOR_ONCE.set(validator)
177}
178
179pub(crate) fn validate_enum_symbol_name(symbol: &str) -> AvroResult<()> {
180    ENUM_SYMBOL_NAME_VALIDATOR_ONCE
181        .get_or_init(|| {
182            debug!("Going to use the default enum symbol name validator.");
183            Box::new(SpecificationValidator)
184        })
185        .validate(symbol)
186}
187
188/// A trait that validates record field names.
189/// To register a custom one use [set_record_field_name_validator].
190pub trait RecordFieldNameValidator: Send + Sync {
191    /// Returns the regex used to validate the record field names
192    /// according to the Avro specification.
193    fn regex(&self) -> &'static Regex {
194        static FIELD_NAME_ONCE: OnceLock<Regex> = OnceLock::new();
195        FIELD_NAME_ONCE.get_or_init(|| Regex::new(r"^[A-Za-z_][A-Za-z0-9_]*$").unwrap())
196    }
197
198    /// Validates the record field's names and returns nothing (unit),
199    /// or [Error::FieldName] if it is invalid.
200    fn validate(&self, name: &str) -> AvroResult<()>;
201}
202
203impl RecordFieldNameValidator for SpecificationValidator {
204    fn validate(&self, field_name: &str) -> AvroResult<()> {
205        let regex = RecordFieldNameValidator::regex(self);
206        if !regex.is_match(field_name) {
207            return Err(Error::FieldName(field_name.to_string()));
208        }
209
210        Ok(())
211    }
212}
213
214static RECORD_FIELD_NAME_VALIDATOR_ONCE: OnceLock<Box<dyn RecordFieldNameValidator + Send + Sync>> =
215    OnceLock::new();
216
217/// Sets a custom record field name validator.
218///
219/// Returns a unit if the registration was successful or the already
220/// registered validator if the registration failed.
221///
222/// **Note**: This function must be called before parsing any schema because this will
223/// register the default validator and the registration is one time only!
224pub fn set_record_field_name_validator(
225    validator: Box<dyn RecordFieldNameValidator + Send + Sync>,
226) -> Result<(), Box<dyn RecordFieldNameValidator + Send + Sync>> {
227    RECORD_FIELD_NAME_VALIDATOR_ONCE.set(validator)
228}
229
230pub(crate) fn validate_record_field_name(field_name: &str) -> AvroResult<()> {
231    RECORD_FIELD_NAME_VALIDATOR_ONCE
232        .get_or_init(|| {
233            debug!("Going to use the default record field name validator.");
234            Box::new(SpecificationValidator)
235        })
236        .validate(field_name)
237}
238
239#[cfg(test)]
240mod tests {
241    use super::*;
242    use crate::schema::Name;
243    use apache_avro_test_helper::TestResult;
244
245    #[test]
246    fn avro_3900_default_name_validator_with_valid_ns() -> TestResult {
247        validate_schema_name("example")?;
248        Ok(())
249    }
250
251    #[test]
252    fn avro_3900_default_name_validator_with_invalid_ns() -> TestResult {
253        assert!(validate_schema_name("com-example").is_err());
254        Ok(())
255    }
256
257    #[test]
258    fn test_avro_3897_disallow_invalid_namespaces_in_fully_qualified_name() -> TestResult {
259        let full_name = "ns.0.record1";
260        let name = Name::new(full_name);
261        assert!(name.is_err());
262        let validator = SpecificationValidator;
263        let expected = Error::InvalidSchemaName(
264            full_name.to_string(),
265            SchemaNameValidator::regex(&validator).as_str(),
266        )
267        .to_string();
268        let err = name.map_err(|e| e.to_string()).err().unwrap();
269        pretty_assertions::assert_eq!(expected, err);
270
271        let full_name = "ns..record1";
272        let name = Name::new(full_name);
273        assert!(name.is_err());
274        let expected = Error::InvalidSchemaName(
275            full_name.to_string(),
276            SchemaNameValidator::regex(&validator).as_str(),
277        )
278        .to_string();
279        let err = name.map_err(|e| e.to_string()).err().unwrap();
280        pretty_assertions::assert_eq!(expected, err);
281        Ok(())
282    }
283
284    #[test]
285    fn avro_3900_default_namespace_validator_with_valid_ns() -> TestResult {
286        validate_namespace("com.example")?;
287        Ok(())
288    }
289
290    #[test]
291    fn avro_3900_default_namespace_validator_with_invalid_ns() -> TestResult {
292        assert!(validate_namespace("com-example").is_err());
293        Ok(())
294    }
295
296    #[test]
297    fn avro_3900_default_enum_symbol_validator_with_valid_symbol_name() -> TestResult {
298        validate_enum_symbol_name("spades")?;
299        Ok(())
300    }
301
302    #[test]
303    fn avro_3900_default_enum_symbol_validator_with_invalid_symbol_name() -> TestResult {
304        assert!(validate_enum_symbol_name("com-example").is_err());
305        Ok(())
306    }
307
308    #[test]
309    fn avro_3900_default_record_field_validator_with_valid_name() -> TestResult {
310        validate_record_field_name("test")?;
311        Ok(())
312    }
313
314    #[test]
315    fn avro_3900_default_record_field_validator_with_invalid_name() -> TestResult {
316        assert!(validate_record_field_name("com-example").is_err());
317        Ok(())
318    }
319}