apache_avro/
validator.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! # Custom name validation
19//!
20//! By default, the library follows the rules specified in the [Avro specification](https://avro.apache.org/docs/++version++/specification/#names).
21//!
22//! Some of the other Apache Avro language SDKs are more flexible in their name validation. For
23//! interoperability with those SDKs, the library provides a way to customize the name validation.
24//!
25//! ```
26//! # use apache_avro::{AvroResult, validator::{SchemaNameValidator, set_schema_name_validator}};
27//! # use regex_lite::Regex;
28//! # use std::sync::OnceLock;
29//! struct DontAllowNamespaces;
30//!
31//! impl SchemaNameValidator for DontAllowNamespaces {
32//!     fn regex(&self) -> &'static Regex {
33//!         static SCHEMA_NAME_ONCE: OnceLock<Regex> = OnceLock::new();
34//!         SCHEMA_NAME_ONCE.get_or_init(|| {
35//!             Regex::new(
36//!                 // Disallows any namespace. By naming the group `name`, the default
37//!                 // implementation of `SchemaNameValidator::validate` can be reused.
38//!                 r"^(?P<name>[A-Za-z_][A-Za-z0-9_]*)$",
39//!             ).expect("Regex is valid")
40//!         })
41//!     }
42//! }
43//!
44//! // don't parse any schema before registering the custom validator(s)!
45//!
46//! if set_schema_name_validator(Box::new(DontAllowNamespaces)).is_err() {
47//!     // `.unwrap()` doesn't work as the return type does not implement `Debug`
48//!     panic!("There was already a schema validator configured")
49//! }
50//!
51//! // ... use the library
52//! ```
53//!
54//! Similar logic could be applied to the schema namespace, enum symbols and field names validation.
55//!
56//! **Note**: the library allows to set a validator only once per the application lifetime!
57//! If the application parses schemas before setting a validator, the default validator will be
58//! registered and used!
59
60use crate::{AvroResult, error::Details};
61use log::debug;
62use regex_lite::Regex;
63use std::sync::OnceLock;
64
65/// A validator that validates names and namespaces according to the Avro specification.
66struct SpecificationValidator;
67
68/// A trait that validates schema names.
69///
70/// To register a custom one use [`set_schema_name_validator`].
71pub trait SchemaNameValidator: Send + Sync {
72    /// The regex used to validate the schema name.
73    ///
74    /// When the name part of the full name is provided as a capture group named `name`, the
75    /// default implementation of [`Self::validate`] can be used.
76    ///
77    /// The default implementation uses the Avro specified regex.
78    fn regex(&self) -> &'static Regex {
79        static SCHEMA_NAME_ONCE: OnceLock<Regex> = OnceLock::new();
80        SCHEMA_NAME_ONCE.get_or_init(|| {
81            Regex::new(
82                // An optional namespace (with optional dots) followed by a name without any dots in it.
83                r"^((?P<namespace>([A-Za-z_][A-Za-z0-9_]*(\.[A-Za-z_][A-Za-z0-9_]*)*)?)\.)?(?P<name>[A-Za-z_][A-Za-z0-9_]*)$",
84            )
85                .unwrap()
86        })
87    }
88
89    /// Validates the schema name and returns the start byte of the name.
90    ///
91    /// Requires that the implementation of [`Self::regex`] provides a capture group named `name`
92    /// that captures the name part of the full name.
93    ///
94    /// Should return [`Details::InvalidSchemaName`] if it is invalid.
95    fn validate(&self, schema_name: &str) -> AvroResult<usize> {
96        let regex = SchemaNameValidator::regex(self);
97        let caps = regex
98            .captures(schema_name)
99            .ok_or_else(|| Details::InvalidSchemaName(schema_name.to_string(), regex.as_str()))?;
100        Ok(caps
101            .name("name")
102            .ok_or(Details::InvalidSchemaNameValidatorImplementation)?
103            .start())
104    }
105}
106
107impl SchemaNameValidator for SpecificationValidator {}
108
109static NAME_VALIDATOR_ONCE: OnceLock<Box<dyn SchemaNameValidator + Send + Sync>> = OnceLock::new();
110
111/// Sets a custom schema name validator.
112///
113/// Returns `Err(validator)` if a validator is already configured.
114///
115/// **Note**: This function must be called before parsing any schema because this will
116/// register the default validator and the registration is one time only!
117pub fn set_schema_name_validator(
118    validator: Box<dyn SchemaNameValidator + Send + Sync>,
119) -> Result<(), Box<dyn SchemaNameValidator + Send + Sync>> {
120    debug!("Setting a custom schema name validator.");
121    NAME_VALIDATOR_ONCE.set(validator)
122}
123
124pub(crate) fn validate_schema_name(schema_name: &str) -> AvroResult<usize> {
125    NAME_VALIDATOR_ONCE
126        .get_or_init(|| {
127            debug!("Going to use the default name validator.");
128            Box::new(SpecificationValidator)
129        })
130        .validate(schema_name)
131}
132
133/// A trait that validates schema namespaces.
134///
135/// To register a custom one use [`set_schema_namespace_validator`].
136pub trait SchemaNamespaceValidator: Send + Sync {
137    /// The regex used to validate the schema namespace.
138    ///
139    /// The default implementation uses the Avro specified regex.
140    fn regex(&self) -> &'static Regex {
141        static NAMESPACE_ONCE: OnceLock<Regex> = OnceLock::new();
142        NAMESPACE_ONCE.get_or_init(|| {
143            Regex::new(r"^([A-Za-z_][A-Za-z0-9_]*(\.[A-Za-z_][A-Za-z0-9_]*)*)?$").unwrap()
144        })
145    }
146
147    /// Validates a schema namespace.
148    ///
149    /// Should return [`Details::InvalidNamespace`] if it is invalid.
150    fn validate(&self, namespace: &str) -> AvroResult<()> {
151        let regex = SchemaNamespaceValidator::regex(self);
152        if !regex.is_match(namespace) {
153            Err(Details::InvalidNamespace(namespace.to_string(), regex.as_str()).into())
154        } else {
155            Ok(())
156        }
157    }
158}
159
160impl SchemaNamespaceValidator for SpecificationValidator {}
161
162static NAMESPACE_VALIDATOR_ONCE: OnceLock<Box<dyn SchemaNamespaceValidator + Send + Sync>> =
163    OnceLock::new();
164
165/// Sets a custom schema namespace validator.
166///
167/// Returns `Err(validator)` if a validator is already configured.
168///
169/// **Note**: This function must be called before parsing any schema because this will
170/// register the default validator and the registration is one time only!
171pub fn set_schema_namespace_validator(
172    validator: Box<dyn SchemaNamespaceValidator + Send + Sync>,
173) -> Result<(), Box<dyn SchemaNamespaceValidator + Send + Sync>> {
174    NAMESPACE_VALIDATOR_ONCE.set(validator)
175}
176
177pub(crate) fn validate_namespace(ns: &str) -> AvroResult<()> {
178    NAMESPACE_VALIDATOR_ONCE
179        .get_or_init(|| {
180            debug!("Going to use the default namespace validator.");
181            Box::new(SpecificationValidator)
182        })
183        .validate(ns)
184}
185
186/// A trait that validates enum symbol names.
187///
188/// To register a custom one use [`set_enum_symbol_name_validator`].
189pub trait EnumSymbolNameValidator: Send + Sync {
190    /// The regex used to validate the symbols of enums.
191    ///
192    /// The default implementation uses the Avro specified regex.
193    fn regex(&self) -> &'static Regex {
194        static ENUM_SYMBOL_NAME_ONCE: OnceLock<Regex> = OnceLock::new();
195        ENUM_SYMBOL_NAME_ONCE.get_or_init(|| Regex::new(r"^[A-Za-z_][A-Za-z0-9_]*$").unwrap())
196    }
197
198    /// Validate the symbol of an enum.
199    ///
200    /// Should return [`Details::EnumSymbolName`] if it is invalid.
201    fn validate(&self, symbol: &str) -> AvroResult<()> {
202        let regex = EnumSymbolNameValidator::regex(self);
203        if !regex.is_match(symbol) {
204            return Err(Details::EnumSymbolName(symbol.to_string()).into());
205        }
206
207        Ok(())
208    }
209}
210
211impl EnumSymbolNameValidator for SpecificationValidator {}
212
213static ENUM_SYMBOL_NAME_VALIDATOR_ONCE: OnceLock<Box<dyn EnumSymbolNameValidator + Send + Sync>> =
214    OnceLock::new();
215
216/// Sets a custom enum symbol name validator.
217///
218/// Returns `Err(validator)` if a validator is already configured.
219///
220/// **Note**: This function must be called before parsing any schema because this will
221/// register the default validator and the registration is one time only!
222pub fn set_enum_symbol_name_validator(
223    validator: Box<dyn EnumSymbolNameValidator + Send + Sync>,
224) -> Result<(), Box<dyn EnumSymbolNameValidator + Send + Sync>> {
225    ENUM_SYMBOL_NAME_VALIDATOR_ONCE.set(validator)
226}
227
228pub(crate) fn validate_enum_symbol_name(symbol: &str) -> AvroResult<()> {
229    ENUM_SYMBOL_NAME_VALIDATOR_ONCE
230        .get_or_init(|| {
231            debug!("Going to use the default enum symbol name validator.");
232            Box::new(SpecificationValidator)
233        })
234        .validate(symbol)
235}
236
237/// A trait that validates record field names.
238///
239/// To register a custom one use [`set_record_field_name_validator`].
240pub trait RecordFieldNameValidator: Send + Sync {
241    /// The regex used to validate the record field names.
242    ///
243    /// The default implementation uses the Avro specified regex.
244    fn regex(&self) -> &'static Regex {
245        static FIELD_NAME_ONCE: OnceLock<Regex> = OnceLock::new();
246        FIELD_NAME_ONCE.get_or_init(|| Regex::new(r"^[A-Za-z_][A-Za-z0-9_]*$").unwrap())
247    }
248
249    /// Validate the name of a record field.
250    ///
251    /// Should return [`Details::FieldName`] if it is invalid.
252    fn validate(&self, field_name: &str) -> AvroResult<()> {
253        let regex = RecordFieldNameValidator::regex(self);
254        if !regex.is_match(field_name) {
255            return Err(Details::FieldName(field_name.to_string()).into());
256        }
257
258        Ok(())
259    }
260}
261
262impl RecordFieldNameValidator for SpecificationValidator {}
263
264static RECORD_FIELD_NAME_VALIDATOR_ONCE: OnceLock<Box<dyn RecordFieldNameValidator + Send + Sync>> =
265    OnceLock::new();
266
267/// Sets a custom record field name validator.
268///
269/// Returns `Err(validator)` if a validator is already configured.
270///
271/// **Note**: This function must be called before parsing any schema because this will
272/// register the default validator and the registration is one time only!
273pub fn set_record_field_name_validator(
274    validator: Box<dyn RecordFieldNameValidator + Send + Sync>,
275) -> Result<(), Box<dyn RecordFieldNameValidator + Send + Sync>> {
276    RECORD_FIELD_NAME_VALIDATOR_ONCE.set(validator)
277}
278
279pub(crate) fn validate_record_field_name(field_name: &str) -> AvroResult<()> {
280    RECORD_FIELD_NAME_VALIDATOR_ONCE
281        .get_or_init(|| {
282            debug!("Going to use the default record field name validator.");
283            Box::new(SpecificationValidator)
284        })
285        .validate(field_name)
286}
287
288#[cfg(test)]
289mod tests {
290    use super::*;
291    use crate::schema::Name;
292    use apache_avro_test_helper::TestResult;
293
294    #[test]
295    fn avro_3900_default_name_validator_with_valid_ns() -> TestResult {
296        validate_schema_name("example")?;
297        Ok(())
298    }
299
300    #[test]
301    fn avro_3900_default_name_validator_with_invalid_ns() -> TestResult {
302        assert!(validate_schema_name("com-example").is_err());
303        Ok(())
304    }
305
306    #[test]
307    fn test_avro_3897_disallow_invalid_namespaces_in_fully_qualified_name() -> TestResult {
308        let full_name = "ns.0.record1";
309        let name = Name::new(full_name);
310        assert!(name.is_err());
311        let validator = SpecificationValidator;
312        let expected = Details::InvalidSchemaName(
313            full_name.to_string(),
314            SchemaNameValidator::regex(&validator).as_str(),
315        )
316        .to_string();
317        let err = name.map_err(|e| e.to_string()).err().unwrap();
318        pretty_assertions::assert_eq!(expected, err);
319
320        let full_name = "ns..record1";
321        let name = Name::new(full_name);
322        assert!(name.is_err());
323        let expected = Details::InvalidSchemaName(
324            full_name.to_string(),
325            SchemaNameValidator::regex(&validator).as_str(),
326        )
327        .to_string();
328        let err = name.map_err(|e| e.to_string()).err().unwrap();
329        pretty_assertions::assert_eq!(expected, err);
330        Ok(())
331    }
332
333    #[test]
334    fn avro_3900_default_namespace_validator_with_valid_ns() -> TestResult {
335        validate_namespace("com.example")?;
336        Ok(())
337    }
338
339    #[test]
340    fn avro_3900_default_namespace_validator_with_invalid_ns() -> TestResult {
341        assert!(validate_namespace("com-example").is_err());
342        Ok(())
343    }
344
345    #[test]
346    fn avro_3900_default_enum_symbol_validator_with_valid_symbol_name() -> TestResult {
347        validate_enum_symbol_name("spades")?;
348        Ok(())
349    }
350
351    #[test]
352    fn avro_3900_default_enum_symbol_validator_with_invalid_symbol_name() -> TestResult {
353        assert!(validate_enum_symbol_name("com-example").is_err());
354        Ok(())
355    }
356
357    #[test]
358    fn avro_3900_default_record_field_validator_with_valid_name() -> TestResult {
359        validate_record_field_name("test")?;
360        Ok(())
361    }
362
363    #[test]
364    fn avro_3900_default_record_field_validator_with_invalid_name() -> TestResult {
365        assert!(validate_record_field_name("com-example").is_err());
366        Ok(())
367    }
368}