apache_avro/
validator.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! # Custom name validation
19//!
20//! By default, the library follows the rules specified in the [Avro specification](https://avro.apache.org/docs/1.11.1/specification/#names).
21//!
22//! Some of the other Apache Avro language SDKs are more flexible in their name validation. For
23//! interoperability with those SDKs, the library provides a way to customize the name validation.
24//!
25//! ```
26//! use apache_avro::AvroResult;
27//! use apache_avro::schema::Namespace;
28//! use apache_avro::validator::{SchemaNameValidator, set_schema_name_validator};
29//!
30//! struct MyCustomValidator;
31//!
32//! impl SchemaNameValidator for MyCustomValidator {
33//!     fn validate(&self, name: &str) -> AvroResult<(String, Namespace)> {
34//!         todo!()
35//!     }
36//! }
37//!
38//! // don't parse any schema before registering the custom validator(s)!
39//!
40//! if set_schema_name_validator(Box::new(MyCustomValidator)).is_err() {
41//!     // `.unwrap()` doesn't work as the return type does not implement `Debug`
42//!     panic!("There was already a schema validator configured")
43//! }
44//!
45//! // ... use the library
46//! ```
47//!
48//! Similar logic could be applied to the schema namespace, enum symbols and field names validation.
49//!
50//! **Note**: the library allows to set a validator only once per the application lifetime!
51//! If the application parses schemas before setting a validator, the default validator will be
52//! registered and used!
53
54use crate::{AvroResult, error::Details, schema::Namespace};
55use log::debug;
56use regex_lite::Regex;
57use std::sync::OnceLock;
58
59/// A validator that validates names and namespaces according to the Avro specification.
60struct SpecificationValidator;
61
62/// A trait that validates schema names.
63///
64/// To register a custom one use [`set_schema_name_validator`].
65pub trait SchemaNameValidator: Send + Sync {
66    /// The regex used to validate the schema name.
67    ///
68    /// The default implementation uses the Avro specified regex.
69    fn regex(&self) -> &'static Regex {
70        static SCHEMA_NAME_ONCE: OnceLock<Regex> = OnceLock::new();
71        SCHEMA_NAME_ONCE.get_or_init(|| {
72            Regex::new(
73                // An optional namespace (with optional dots) followed by a name without any dots in it.
74                r"^((?P<namespace>([A-Za-z_][A-Za-z0-9_]*(\.[A-Za-z_][A-Za-z0-9_]*)*)?)\.)?(?P<name>[A-Za-z_][A-Za-z0-9_]*)$",
75            )
76                .unwrap()
77        })
78    }
79
80    /// Validates the schema name and returns the name and the optional namespace.
81    ///
82    /// Should return [`Details::InvalidSchemaName`] if it is invalid.
83    fn validate(&self, schema_name: &str) -> AvroResult<(String, Namespace)>;
84}
85
86impl SchemaNameValidator for SpecificationValidator {
87    fn validate(&self, schema_name: &str) -> AvroResult<(String, Namespace)> {
88        let regex = SchemaNameValidator::regex(self);
89        let caps = regex
90            .captures(schema_name)
91            .ok_or_else(|| Details::InvalidSchemaName(schema_name.to_string(), regex.as_str()))?;
92        Ok((
93            caps["name"].to_string(),
94            caps.name("namespace").map(|s| s.as_str().to_string()),
95        ))
96    }
97}
98
99static NAME_VALIDATOR_ONCE: OnceLock<Box<dyn SchemaNameValidator + Send + Sync>> = OnceLock::new();
100
101/// Sets a custom schema name validator.
102///
103/// Returns `Err(validator)` if a validator is already configured.
104///
105/// **Note**: This function must be called before parsing any schema because this will
106/// register the default validator and the registration is one time only!
107pub fn set_schema_name_validator(
108    validator: Box<dyn SchemaNameValidator + Send + Sync>,
109) -> Result<(), Box<dyn SchemaNameValidator + Send + Sync>> {
110    debug!("Setting a custom schema name validator.");
111    NAME_VALIDATOR_ONCE.set(validator)
112}
113
114pub(crate) fn validate_schema_name(schema_name: &str) -> AvroResult<(String, Namespace)> {
115    NAME_VALIDATOR_ONCE
116        .get_or_init(|| {
117            debug!("Going to use the default name validator.");
118            Box::new(SpecificationValidator)
119        })
120        .validate(schema_name)
121}
122
123/// A trait that validates schema namespaces.
124///
125/// To register a custom one use [`set_schema_namespace_validator`].
126pub trait SchemaNamespaceValidator: Send + Sync {
127    /// The regex used to validate the schema namespace.
128    ///
129    /// The default implementation uses the Avro specified regex.
130    fn regex(&self) -> &'static Regex {
131        static NAMESPACE_ONCE: OnceLock<Regex> = OnceLock::new();
132        NAMESPACE_ONCE.get_or_init(|| {
133            Regex::new(r"^([A-Za-z_][A-Za-z0-9_]*(\.[A-Za-z_][A-Za-z0-9_]*)*)?$").unwrap()
134        })
135    }
136
137    /// Validates a schema namespace.
138    ///
139    /// Should return [`Details::InvalidNamespace`] if it is invalid.
140    fn validate(&self, namespace: &str) -> AvroResult<()>;
141}
142
143impl SchemaNamespaceValidator for SpecificationValidator {
144    fn validate(&self, ns: &str) -> AvroResult<()> {
145        let regex = SchemaNamespaceValidator::regex(self);
146        if !regex.is_match(ns) {
147            Err(Details::InvalidNamespace(ns.to_string(), regex.as_str()).into())
148        } else {
149            Ok(())
150        }
151    }
152}
153
154static NAMESPACE_VALIDATOR_ONCE: OnceLock<Box<dyn SchemaNamespaceValidator + Send + Sync>> =
155    OnceLock::new();
156
157/// Sets a custom schema namespace validator.
158///
159/// Returns `Err(validator)` if a validator is already configured.
160///
161/// **Note**: This function must be called before parsing any schema because this will
162/// register the default validator and the registration is one time only!
163pub fn set_schema_namespace_validator(
164    validator: Box<dyn SchemaNamespaceValidator + Send + Sync>,
165) -> Result<(), Box<dyn SchemaNamespaceValidator + Send + Sync>> {
166    NAMESPACE_VALIDATOR_ONCE.set(validator)
167}
168
169pub(crate) fn validate_namespace(ns: &str) -> AvroResult<()> {
170    NAMESPACE_VALIDATOR_ONCE
171        .get_or_init(|| {
172            debug!("Going to use the default namespace validator.");
173            Box::new(SpecificationValidator)
174        })
175        .validate(ns)
176}
177
178/// A trait that validates enum symbol names.
179///
180/// To register a custom one use [`set_enum_symbol_name_validator`].
181pub trait EnumSymbolNameValidator: Send + Sync {
182    /// The regex used to validate the symbols of enums.
183    ///
184    /// The default implementation uses the Avro specified regex.
185    fn regex(&self) -> &'static Regex {
186        static ENUM_SYMBOL_NAME_ONCE: OnceLock<Regex> = OnceLock::new();
187        ENUM_SYMBOL_NAME_ONCE.get_or_init(|| Regex::new(r"^[A-Za-z_][A-Za-z0-9_]*$").unwrap())
188    }
189
190    /// Validate the symbol of an enum.
191    ///
192    /// Should return [`Details::EnumSymbolName`] if it is invalid.
193    fn validate(&self, name: &str) -> AvroResult<()>;
194}
195
196impl EnumSymbolNameValidator for SpecificationValidator {
197    fn validate(&self, symbol: &str) -> AvroResult<()> {
198        let regex = EnumSymbolNameValidator::regex(self);
199        if !regex.is_match(symbol) {
200            return Err(Details::EnumSymbolName(symbol.to_string()).into());
201        }
202
203        Ok(())
204    }
205}
206
207static ENUM_SYMBOL_NAME_VALIDATOR_ONCE: OnceLock<Box<dyn EnumSymbolNameValidator + Send + Sync>> =
208    OnceLock::new();
209
210/// Sets a custom enum symbol name validator.
211///
212/// Returns `Err(validator)` if a validator is already configured.
213///
214/// **Note**: This function must be called before parsing any schema because this will
215/// register the default validator and the registration is one time only!
216pub fn set_enum_symbol_name_validator(
217    validator: Box<dyn EnumSymbolNameValidator + Send + Sync>,
218) -> Result<(), Box<dyn EnumSymbolNameValidator + Send + Sync>> {
219    ENUM_SYMBOL_NAME_VALIDATOR_ONCE.set(validator)
220}
221
222pub(crate) fn validate_enum_symbol_name(symbol: &str) -> AvroResult<()> {
223    ENUM_SYMBOL_NAME_VALIDATOR_ONCE
224        .get_or_init(|| {
225            debug!("Going to use the default enum symbol name validator.");
226            Box::new(SpecificationValidator)
227        })
228        .validate(symbol)
229}
230
231/// A trait that validates record field names.
232///
233/// To register a custom one use [`set_record_field_name_validator`].
234pub trait RecordFieldNameValidator: Send + Sync {
235    /// The regex used to validate the record field names.
236    ///
237    /// The default implementation uses the Avro specified regex.
238    fn regex(&self) -> &'static Regex {
239        static FIELD_NAME_ONCE: OnceLock<Regex> = OnceLock::new();
240        FIELD_NAME_ONCE.get_or_init(|| Regex::new(r"^[A-Za-z_][A-Za-z0-9_]*$").unwrap())
241    }
242
243    /// Validate the name of a record field.
244    ///
245    /// Should return [`Details::FieldName`] if it is invalid.
246    fn validate(&self, name: &str) -> AvroResult<()>;
247}
248
249impl RecordFieldNameValidator for SpecificationValidator {
250    fn validate(&self, field_name: &str) -> AvroResult<()> {
251        let regex = RecordFieldNameValidator::regex(self);
252        if !regex.is_match(field_name) {
253            return Err(Details::FieldName(field_name.to_string()).into());
254        }
255
256        Ok(())
257    }
258}
259
260static RECORD_FIELD_NAME_VALIDATOR_ONCE: OnceLock<Box<dyn RecordFieldNameValidator + Send + Sync>> =
261    OnceLock::new();
262
263/// Sets a custom record field name validator.
264///
265/// Returns `Err(validator)` if a validator is already configured.
266///
267/// **Note**: This function must be called before parsing any schema because this will
268/// register the default validator and the registration is one time only!
269pub fn set_record_field_name_validator(
270    validator: Box<dyn RecordFieldNameValidator + Send + Sync>,
271) -> Result<(), Box<dyn RecordFieldNameValidator + Send + Sync>> {
272    RECORD_FIELD_NAME_VALIDATOR_ONCE.set(validator)
273}
274
275pub(crate) fn validate_record_field_name(field_name: &str) -> AvroResult<()> {
276    RECORD_FIELD_NAME_VALIDATOR_ONCE
277        .get_or_init(|| {
278            debug!("Going to use the default record field name validator.");
279            Box::new(SpecificationValidator)
280        })
281        .validate(field_name)
282}
283
284#[cfg(test)]
285mod tests {
286    use super::*;
287    use crate::schema::Name;
288    use apache_avro_test_helper::TestResult;
289
290    #[test]
291    fn avro_3900_default_name_validator_with_valid_ns() -> TestResult {
292        validate_schema_name("example")?;
293        Ok(())
294    }
295
296    #[test]
297    fn avro_3900_default_name_validator_with_invalid_ns() -> TestResult {
298        assert!(validate_schema_name("com-example").is_err());
299        Ok(())
300    }
301
302    #[test]
303    fn test_avro_3897_disallow_invalid_namespaces_in_fully_qualified_name() -> TestResult {
304        let full_name = "ns.0.record1";
305        let name = Name::new(full_name);
306        assert!(name.is_err());
307        let validator = SpecificationValidator;
308        let expected = Details::InvalidSchemaName(
309            full_name.to_string(),
310            SchemaNameValidator::regex(&validator).as_str(),
311        )
312        .to_string();
313        let err = name.map_err(|e| e.to_string()).err().unwrap();
314        pretty_assertions::assert_eq!(expected, err);
315
316        let full_name = "ns..record1";
317        let name = Name::new(full_name);
318        assert!(name.is_err());
319        let expected = Details::InvalidSchemaName(
320            full_name.to_string(),
321            SchemaNameValidator::regex(&validator).as_str(),
322        )
323        .to_string();
324        let err = name.map_err(|e| e.to_string()).err().unwrap();
325        pretty_assertions::assert_eq!(expected, err);
326        Ok(())
327    }
328
329    #[test]
330    fn avro_3900_default_namespace_validator_with_valid_ns() -> TestResult {
331        validate_namespace("com.example")?;
332        Ok(())
333    }
334
335    #[test]
336    fn avro_3900_default_namespace_validator_with_invalid_ns() -> TestResult {
337        assert!(validate_namespace("com-example").is_err());
338        Ok(())
339    }
340
341    #[test]
342    fn avro_3900_default_enum_symbol_validator_with_valid_symbol_name() -> TestResult {
343        validate_enum_symbol_name("spades")?;
344        Ok(())
345    }
346
347    #[test]
348    fn avro_3900_default_enum_symbol_validator_with_invalid_symbol_name() -> TestResult {
349        assert!(validate_enum_symbol_name("com-example").is_err());
350        Ok(())
351    }
352
353    #[test]
354    fn avro_3900_default_record_field_validator_with_valid_name() -> TestResult {
355        validate_record_field_name("test")?;
356        Ok(())
357    }
358
359    #[test]
360    fn avro_3900_default_record_field_validator_with_invalid_name() -> TestResult {
361        assert!(validate_record_field_name("com-example").is_err());
362        Ok(())
363    }
364}