Skip to main content

apache_avro/serde/
derive.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use std::{
19    borrow::Cow,
20    collections::{HashMap, HashSet},
21};
22
23use crate::{
24    Schema,
25    schema::{FixedSchema, Name, NamespaceRef, RecordField, RecordSchema, UnionSchema, UuidSchema},
26};
27
28/// Trait for types that serve as an Avro data model.
29///
30/// **Do not implement directly!** Either derive it or implement [`AvroSchemaComponent`] to get this trait
31/// through a blanket implementation.
32///
33/// ## Deriving `AvroSchema`
34///
35/// Using the custom derive requires that you enable the `"derive"` cargo
36/// feature in your `Cargo.toml`:
37///
38/// ```toml
39/// [dependencies]
40/// apache-avro = { version = "..", features = ["derive"] }
41/// ```
42///
43/// Then, you add the `#[derive(AvroSchema)]` annotation to your `struct` and
44/// `enum` type definition:
45///
46/// ```
47/// # use serde::{Serialize, Deserialize};
48/// # use apache_avro::AvroSchema;
49/// #[derive(AvroSchema, Serialize, Deserialize)]
50/// pub struct Foo {
51///     bar: Vec<Bar>,
52/// }
53///
54/// #[derive(AvroSchema, Serialize, Deserialize)]
55/// pub enum Bar {
56///     Spam,
57///     Maps
58/// }
59/// ```
60///
61/// This will implement [`AvroSchemaComponent`] for the type, and `AvroSchema`
62/// through the blanket implementation for `T: AvroSchemaComponent`.
63///
64/// When deriving `struct`s, every member must also implement `AvroSchemaComponent`.
65///
66/// ## Changing the generated schema
67///
68/// The derive macro will read both the `avro` and `serde` attributes to modify the generated schema.
69/// It will also check for compatibility between the various attributes.
70///
71/// #### Container attributes
72///
73///  - `#[serde(rename = "name")]`
74///
75// TODO: Should we check if `name` contains any dots? As that would imply a namespace
76///    Set the `name` of the schema to the given string. Defaults to the name of the type.
77///
78///  - `#[avro(namespace = "some.name.space")]`
79///
80///    Set the `namespace` of the schema. This will be the relative namespace if the schema is included
81///    in another schema.
82///
83///  - `#[avro(doc = "Some documentation")]`
84///
85///    Set the `doc` attribute of the schema. Defaults to the documentation of the type.
86///
87///  - `#[avro(default = r#"{"field": 42, "other": "Spam"}"#)]`
88///
89///    Provide the default value for this type when it is used in a field.
90///
91///  - `#[avro(alias = "name")]`
92///
93///    Set the `alias` attribute of the schema. Can be specified multiple times.
94///
95///  - `#[serde(rename_all = "camelCase")]`
96///
97///    Rename all the fields or variants in the schema to follow the given case convention. The possible values
98///    are `"lowercase"`, `"UPPERCASE"`, `"PascalCase"`, `"camelCase"`, `"snake_case"`, `"kebab-case"`,
99///    `"SCREAMING_SNAKE_CASE"`, `"SCREAMING-KEBAB-CASE"`.
100///
101///  - `#[serde(transparent)]`
102///
103///    Use the schema of the inner field directly. Is only allowed on structs with only one unskipped field.
104///
105///
106/// #### Variant attributes
107///
108///  - `#[serde(rename = "name")]`
109///
110///    Rename the variant to the given name.
111///
112///
113/// #### Field attributes
114///
115///  - `#[serde(rename = "name")]`
116///
117///    Rename the field name to the given name.
118///
119///  - `#[avro(doc = "Some documentation")]`
120///
121///    Set the `doc` attribute of the field. Defaults to the documentation of the field.
122///
123///  - `#[avro(default = ..)]`
124///
125///    Control the `default` attribute of the field. When not used, it will use [`AvroSchemaComponent::field_default`]
126///    to get the default value for a type. To remove the `default` attribute for a field, set `default` to `false`: `#[avro(default = false)]`.
127///
128///    To override or set a default value, provide a JSON string:
129///
130///      - Null: `#[avro(default = "null")]`
131///      - Boolean: `#[avro(default = "true")]`.
132///      - Number: `#[avro(default = "42")]` or `#[avro(default = "42.5")]`
133///      - String: `#[avro(default = r#""String needs extra quotes""#)]`.
134///      - Array: `#[avro(default = r#"["One", "Two", "Three"]"#)]`.
135///      - Object: `#[avro(default = r#"{"One": 1}"#)]`.
136///
137///    See [the specification](https://avro.apache.org/docs/++version++/specification/#schema-record)
138///    for details on how to map a type to a JSON value.
139///
140///  - `#[serde(alias = "name")]`
141///
142///    Set the `alias` attribute of the field. Can be specified multiple times.
143///
144///  - `#[serde(flatten)]`
145///
146///    Flatten the content of this field into the container it is defined in.
147///
148///  - `#[serde(skip)]`
149///
150///    Do not include this field in the schema.
151///
152///  - `#[serde(skip_serializing)]`
153///
154///    When combined with `#[serde(skip_deserializing)]`, don't include this field in the schema.
155///    Otherwise, it will be included in the schema and the `#[avro(default)]` attribute **must** be
156///    set. That value will be used for serializing.
157///
158///  - `#[serde(skip_serializing_if)]`
159///
160///    Conditionally use the value of the field or the value provided by `#[avro(default)]`. The
161///    `#[avro(default)]` attribute **must** be set.
162///
163///  - `#[avro(with)]` and `#[serde(with = "module")]`
164///
165///    Override the schema used for this field. See [Working with foreign types](#working-with-foreign-types).
166///
167/// #### Incompatible Serde attributes
168///
169/// The derive macro is compatible with most Serde attributes, but it is incompatible with
170/// the following attributes:
171///
172/// - Container attributes
173///     - `tag`
174///     - `content`
175///     - `untagged`
176///     - `variant_identifier`
177///     - `field_identifier`
178///     - `remote`
179///     - `rename_all(serialize = "..", deserialize = "..")` where `serialize` != `deserialize`
180/// - Variant attributes
181///     - `other`
182///     - `untagged`
183/// - Field attributes
184///     - `getter`
185///
186/// ## Working with foreign types
187///
188/// Most foreign types won't have a [`AvroSchema`] implementation. This crate implements it only
189/// for built-in types and [`uuid::Uuid`].
190///
191/// To still be able to derive schemas for fields of foreign types, the `#[avro(with)`]
192/// attribute can be used to get the schema for those fields. It can be used in two ways:
193///
194/// 1. In combination with `#[serde(with = "path::to::module)]`
195///
196///    To get the schema, it will call the functions `fn get_schema_in_ctxt(&mut HashSet<Name>, NamespaceRef) -> Schema`
197///    and `fn get_record_fields_in_ctxt(&mut HashSet<Name>, NamespaceRef) -> Option<Vec<RecordField>>` in the module provided
198///    to the Serde attribute. See [`AvroSchemaComponent`] for details on how to implement those
199///    functions.
200///
201/// 2. By providing a function directly, `#[avro(with = some_fn)]`.
202///
203///    To get the schema, it will call the function provided. It must have the signature
204///    `fn(&mut HashSet<Name>, NamespaceRef) -> Schema`. When this is used for a `transparent` struct, the
205///    default implementation of [`AvroSchemaComponent::get_record_fields_in_ctxt`] will be used.
206///    This is only recommended for primitive types, as the default implementation cannot be efficiently
207///    implemented for complex types.
208///
209pub trait AvroSchema {
210    /// Construct the full schema that represents this type.
211    ///
212    /// The returned schema is fully independent and contains only `Schema::Ref` to named types defined
213    /// earlier in the schema.
214    fn get_schema() -> Schema;
215}
216
217/// Trait for types that serve as fully defined components inside an Avro data model.
218///
219/// This trait can be derived with [`#[derive(AvroSchema)]`](AvroSchema) when the `derive` feature is enabled.
220///
221/// # Implementation guide
222///
223/// ### Implementation for returning primitive types
224/// When the schema you want to return is a primitive type (a type without a name), the function
225/// arguments can be ignored.
226///
227/// For example, you have a custom integer type:
228/// ```
229/// # use apache_avro::{Schema, serde::{AvroSchemaComponent}, schema::{Name, NamespaceRef, RecordField}};
230/// # use std::collections::HashSet;
231/// // Make sure to implement `Serialize` and `Deserialize` to use the right serialization methods
232/// pub struct U24([u8; 3]);
233/// impl AvroSchemaComponent for U24 {
234///     fn get_schema_in_ctxt(_: &mut HashSet<Name>, _: NamespaceRef) -> Schema {
235///         Schema::Int
236///     }
237///
238///     fn get_record_fields_in_ctxt(_: &mut HashSet<Name>, _: NamespaceRef) -> Option<Vec<RecordField>> {
239///         None // A Schema::Int is not a Schema::Record so there are no fields to return
240///     }
241///
242///     fn field_default() -> Option<serde_json::Value> {
243///         // Zero as default value. Can also be None if you don't want to provide a default value
244///         Some(0u8.into())
245///     }
246///}
247/// ```
248///
249/// ### Passthrough implementation
250///
251/// To construct a schema for a type is "transparent", such as for smart pointers, simply
252/// pass through the arguments to the inner type:
253/// ```
254/// # use apache_avro::{Schema, serde::{AvroSchemaComponent}, schema::{Name, NamespaceRef, RecordField}};
255/// # use serde::{Serialize, Deserialize};
256/// # use std::collections::HashSet;
257/// #[derive(Serialize, Deserialize)]
258/// #[serde(transparent)] // This attribute is important for all passthrough implementations!
259/// pub struct Transparent<T>(T);
260/// impl<T: AvroSchemaComponent> AvroSchemaComponent for Transparent<T> {
261///     fn get_schema_in_ctxt(named_schemas: &mut HashSet<Name>, enclosing_namespace: NamespaceRef) -> Schema {
262///         T::get_schema_in_ctxt(named_schemas, enclosing_namespace)
263///     }
264///
265///     fn get_record_fields_in_ctxt(named_schemas: &mut HashSet<Name>, enclosing_namespace: NamespaceRef) -> Option<Vec<RecordField>> {
266///         T::get_record_fields_in_ctxt(named_schemas, enclosing_namespace)
267///     }
268///
269///     fn field_default() -> Option<serde_json::Value> {
270///         T::field_default()
271///     }
272///}
273/// ```
274///
275/// ### Implementation for complex types
276/// When the schema you want to return is a complex type (a type with a name), special care has to
277/// be taken to avoid duplicate type definitions and getting the correct namespace.
278///
279/// Things to keep in mind:
280///  - If the fully qualified name already exists, return a [`Schema::Ref`]
281///  - Use the `AvroSchemaComponent` implementations to get the schemas for the subtypes
282///  - The ordering of fields in the schema **must** match with the ordering in Serde
283///  - Implement `get_record_fields_in_ctxt` as the default implementation has to be implemented
284///    with backtracking and a lot of cloning.
285///      - Even if your schema is not a record, still implement the function and just return `None`
286///  - Implement `field_default()` if you want to use `#[serde(skip_serializing{,_if})]`.
287///
288/// ```
289/// # use apache_avro::{Schema, serde::{AvroSchemaComponent}, schema::{Name, NamespaceRef, RecordField, RecordSchema}};
290/// # use serde::{Serialize, Deserialize};
291/// # use std::{time::Duration, collections::HashSet};
292/// pub struct Foo {
293///     one: String,
294///     two: i32,
295///     three: Option<Duration>
296/// }
297///
298/// impl AvroSchemaComponent for Foo {
299///     fn get_schema_in_ctxt(named_schemas: &mut HashSet<Name>, enclosing_namespace: NamespaceRef) -> Schema {
300///         // Create the fully qualified name for your type given the enclosing namespace
301///         let name = Name::new_with_enclosing_namespace("Foo", enclosing_namespace).expect("Name is valid");
302///         if named_schemas.contains(&name) {
303///             Schema::Ref { name }
304///         } else {
305///             let enclosing_namespace = name.namespace();
306///             // Do this before you start creating the schema, as otherwise recursive types will cause infinite recursion.
307///             named_schemas.insert(name.clone());
308///             let schema = Schema::Record(RecordSchema::builder()
309///                 .name(name.clone())
310///                 .fields(Self::get_record_fields_in_ctxt(named_schemas, enclosing_namespace).expect("Impossible!"))
311///                 .build()
312///             );
313///             schema
314///         }
315///     }
316///
317///     fn get_record_fields_in_ctxt(named_schemas: &mut HashSet<Name>, enclosing_namespace: NamespaceRef) -> Option<Vec<RecordField>> {
318///         Some(vec![
319///             RecordField::builder()
320///                 .name("one")
321///                 .schema(String::get_schema_in_ctxt(named_schemas, enclosing_namespace))
322///                 .build(),
323///             RecordField::builder()
324///                 .name("two")
325///                 .schema(i32::get_schema_in_ctxt(named_schemas, enclosing_namespace))
326///                 .build(),
327///             RecordField::builder()
328///                 .name("three")
329///                 .schema(<Option<Duration>>::get_schema_in_ctxt(named_schemas, enclosing_namespace))
330///                 .build(),
331///         ])
332///     }
333///
334///     fn field_default() -> Option<serde_json::Value> {
335///         // This type does not provide a default value
336///         None
337///     }
338///}
339/// ```
340pub trait AvroSchemaComponent {
341    /// Get the schema for this component
342    fn get_schema_in_ctxt(
343        named_schemas: &mut HashSet<Name>,
344        enclosing_namespace: NamespaceRef,
345    ) -> Schema;
346
347    /// Get the fields of this schema if it is a record.
348    ///
349    /// This returns `None` if the schema is not a record.
350    ///
351    /// The default implementation has to do a lot of extra work, so it is strongly recommended to
352    /// implement this function when manually implementing this trait.
353    fn get_record_fields_in_ctxt(
354        named_schemas: &mut HashSet<Name>,
355        enclosing_namespace: NamespaceRef,
356    ) -> Option<Vec<RecordField>> {
357        get_record_fields_in_ctxt(named_schemas, enclosing_namespace, Self::get_schema_in_ctxt)
358    }
359
360    /// The default value of this type when used for a record field.
361    ///
362    /// `None` means no default value, which is also the default implementation.
363    ///
364    /// Implementations of this trait provided by this crate return `None` except for `Option<T>`
365    /// which returns `Some(serde_json::Value::Null)`.
366    fn field_default() -> Option<serde_json::Value> {
367        None
368    }
369}
370
371/// Get the record fields from `schema_fn` without polluting `named_schemas` or causing duplicate names
372///
373/// This is public so the derive macro can use it for `#[avro(with = ||)]` and `#[avro(with = path)]`
374#[doc(hidden)]
375pub fn get_record_fields_in_ctxt(
376    named_schemas: &mut HashSet<Name>,
377    enclosing_namespace: NamespaceRef,
378    schema_fn: fn(named_schemas: &mut HashSet<Name>, enclosing_namespace: NamespaceRef) -> Schema,
379) -> Option<Vec<RecordField>> {
380    let mut record = match schema_fn(named_schemas, enclosing_namespace) {
381        Schema::Record(record) => record,
382        Schema::Ref { name } => {
383            // This schema already exists in `named_schemas` so temporarily remove it so we can
384            // get the actual schema.
385            assert!(
386                named_schemas.remove(&name),
387                "Name '{name}' should exist in `named_schemas` otherwise Ref is invalid: {named_schemas:?}"
388            );
389            // Get the schema
390            let schema = schema_fn(named_schemas, enclosing_namespace);
391            // Reinsert the old value
392            named_schemas.insert(name);
393
394            // Now check if we actually got a record and return the fields if that is the case
395            let Schema::Record(record) = schema else {
396                return None;
397            };
398            return Some(record.fields);
399        }
400        _ => return None,
401    };
402    // This schema did not yet exist in `named_schemas`, so we need to remove it if and only if
403    // it isn't used somewhere in the schema (recursive type).
404
405    // Find the first Schema::Ref that has the target name
406    fn find_first_ref<'a>(schema: &'a mut Schema, target: &Name) -> Option<&'a mut Schema> {
407        match schema {
408            Schema::Ref { name } if name == target => Some(schema),
409            Schema::Array(array) => find_first_ref(&mut array.items, target),
410            Schema::Map(map) => find_first_ref(&mut map.types, target),
411            Schema::Union(union) => {
412                for schema in &mut union.schemas {
413                    if let Some(schema) = find_first_ref(schema, target) {
414                        return Some(schema);
415                    }
416                }
417                None
418            }
419            Schema::Record(record) => {
420                assert_ne!(
421                    &record.name, target,
422                    "Only expecting a Ref named {target:?}"
423                );
424                for field in &mut record.fields {
425                    if let Some(schema) = find_first_ref(&mut field.schema, target) {
426                        return Some(schema);
427                    }
428                }
429                None
430            }
431            _ => None,
432        }
433    }
434
435    // Prepare the fields for the new record. All named types will become references.
436    let new_fields = record
437        .fields
438        .iter()
439        .map(|field| RecordField {
440            name: field.name.clone(),
441            doc: field.doc.clone(),
442            aliases: field.aliases.clone(),
443            default: field.default.clone(),
444            schema: if field.schema.is_named() {
445                Schema::Ref {
446                    name: field.schema.name().expect("Schema is named").clone(),
447                }
448            } else {
449                field.schema.clone()
450            },
451            custom_attributes: field.custom_attributes.clone(),
452        })
453        .collect();
454
455    // Remove the name in case it is not used
456    named_schemas.remove(&record.name);
457
458    // Find the first reference to this schema so we can replace it with the actual schema
459    for field in &mut record.fields {
460        if let Some(schema) = find_first_ref(&mut field.schema, &record.name) {
461            let new_schema = RecordSchema {
462                name: record.name,
463                aliases: record.aliases,
464                doc: record.doc,
465                fields: new_fields,
466                lookup: record.lookup,
467                attributes: record.attributes,
468            };
469
470            let name = match std::mem::replace(schema, Schema::Record(new_schema)) {
471                Schema::Ref { name } => name,
472                schema => {
473                    panic!("Only expected `Schema::Ref` from `find_first_ref`, got: {schema:?}")
474                }
475            };
476
477            // The schema is used, so reinsert it
478            named_schemas.insert(name.clone());
479
480            break;
481        }
482    }
483
484    Some(record.fields)
485}
486
487impl<T> AvroSchema for T
488where
489    T: AvroSchemaComponent + ?Sized,
490{
491    fn get_schema() -> Schema {
492        T::get_schema_in_ctxt(&mut HashSet::default(), None)
493    }
494}
495
496macro_rules! impl_schema (
497    ($type:ty, $variant_constructor:expr) => (
498        impl AvroSchemaComponent for $type {
499            fn get_schema_in_ctxt(_: &mut HashSet<Name>, _: NamespaceRef) -> Schema {
500                $variant_constructor
501            }
502
503            fn get_record_fields_in_ctxt(_: &mut HashSet<Name>, _: NamespaceRef) -> Option<Vec<RecordField>> {
504                None
505            }
506        }
507    );
508);
509
510impl_schema!(bool, Schema::Boolean);
511impl_schema!(i8, Schema::Int);
512impl_schema!(i16, Schema::Int);
513impl_schema!(i32, Schema::Int);
514impl_schema!(i64, Schema::Long);
515impl_schema!(u8, Schema::Int);
516impl_schema!(u16, Schema::Int);
517impl_schema!(u32, Schema::Long);
518impl_schema!(f32, Schema::Float);
519impl_schema!(f64, Schema::Double);
520impl_schema!(String, Schema::String);
521impl_schema!(str, Schema::String);
522impl_schema!(char, Schema::String);
523impl_schema!((), Schema::Null);
524
525macro_rules! impl_passthrough_schema (
526    ($type:ty where T: AvroSchemaComponent + ?Sized $(+ $bound:tt)*) => (
527        impl<T: AvroSchemaComponent $(+ $bound)* + ?Sized> AvroSchemaComponent for $type {
528            fn get_schema_in_ctxt(named_schemas: &mut HashSet<Name>, enclosing_namespace: NamespaceRef) -> Schema {
529                T::get_schema_in_ctxt(named_schemas, enclosing_namespace)
530            }
531
532            fn get_record_fields_in_ctxt(named_schemas: &mut HashSet<Name>, enclosing_namespace: NamespaceRef) -> Option<Vec<RecordField>> {
533                T::get_record_fields_in_ctxt(named_schemas, enclosing_namespace)
534            }
535
536            fn field_default() -> Option<serde_json::Value> {
537                T::field_default()
538            }
539        }
540    );
541);
542
543impl_passthrough_schema!(&T where T: AvroSchemaComponent + ?Sized);
544impl_passthrough_schema!(&mut T where T: AvroSchemaComponent + ?Sized);
545impl_passthrough_schema!(Box<T> where T: AvroSchemaComponent + ?Sized);
546impl_passthrough_schema!(Cow<'_, T> where T: AvroSchemaComponent + ?Sized + ToOwned);
547impl_passthrough_schema!(std::sync::Mutex<T> where T: AvroSchemaComponent + ?Sized);
548
549macro_rules! impl_array_schema (
550    ($type:ty where T: AvroSchemaComponent) => (
551        impl<T: AvroSchemaComponent> AvroSchemaComponent for $type {
552            fn get_schema_in_ctxt(named_schemas: &mut HashSet<Name>, enclosing_namespace: NamespaceRef) -> Schema {
553                Schema::array(T::get_schema_in_ctxt(named_schemas, enclosing_namespace)).build()
554            }
555
556            fn get_record_fields_in_ctxt(_: &mut HashSet<Name>, _: NamespaceRef) -> Option<Vec<RecordField>> {
557                None
558            }
559        }
560    );
561);
562
563impl_array_schema!([T] where T: AvroSchemaComponent);
564impl_array_schema!(Vec<T> where T: AvroSchemaComponent);
565
566impl<T> AvroSchemaComponent for HashMap<String, T>
567where
568    T: AvroSchemaComponent,
569{
570    fn get_schema_in_ctxt(
571        named_schemas: &mut HashSet<Name>,
572        enclosing_namespace: NamespaceRef,
573    ) -> Schema {
574        Schema::map(T::get_schema_in_ctxt(named_schemas, enclosing_namespace)).build()
575    }
576
577    fn get_record_fields_in_ctxt(
578        _: &mut HashSet<Name>,
579        _: NamespaceRef,
580    ) -> Option<Vec<RecordField>> {
581        None
582    }
583}
584
585impl<T> AvroSchemaComponent for Option<T>
586where
587    T: AvroSchemaComponent,
588{
589    fn get_schema_in_ctxt(
590        named_schemas: &mut HashSet<Name>,
591        enclosing_namespace: NamespaceRef,
592    ) -> Schema {
593        let variants = vec![
594            Schema::Null,
595            T::get_schema_in_ctxt(named_schemas, enclosing_namespace),
596        ];
597
598        Schema::Union(
599            UnionSchema::new(variants).expect("Option<T> must produce a valid (non-nested) union"),
600        )
601    }
602
603    fn get_record_fields_in_ctxt(
604        _: &mut HashSet<Name>,
605        _: NamespaceRef,
606    ) -> Option<Vec<RecordField>> {
607        None
608    }
609
610    fn field_default() -> Option<serde_json::Value> {
611        Some(serde_json::Value::Null)
612    }
613}
614
615impl AvroSchemaComponent for core::time::Duration {
616    /// The schema is [`Schema::Record`] with the name `Duration`.
617    ///
618    /// It has two fields:
619    /// - `secs` with the schema `Schema::Fixed(name: "u64", size: 8)`
620    /// - `nanos` with the schema `Schema::Long`
621    fn get_schema_in_ctxt(
622        named_schemas: &mut HashSet<Name>,
623        enclosing_namespace: NamespaceRef,
624    ) -> Schema {
625        let name = Name::new_with_enclosing_namespace("Duration", enclosing_namespace)
626            .expect("Name is valid");
627        if named_schemas.contains(&name) {
628            Schema::Ref { name }
629        } else {
630            named_schemas.insert(name.clone());
631            Schema::record(name)
632                .fields(
633                    Self::get_record_fields_in_ctxt(named_schemas, enclosing_namespace)
634                        .expect("Unreachable!"),
635                )
636                .build()
637        }
638    }
639
640    fn get_record_fields_in_ctxt(
641        named_schemas: &mut HashSet<Name>,
642        enclosing_namespace: NamespaceRef,
643    ) -> Option<Vec<RecordField>> {
644        Some(vec![
645            // Secs is an u64
646            RecordField::builder()
647                .name("secs")
648                .schema(u64::get_schema_in_ctxt(named_schemas, enclosing_namespace))
649                .build(),
650            // Nanos is an u32
651            RecordField::builder()
652                .name("nanos")
653                .schema(Schema::Long)
654                .build(),
655        ])
656    }
657}
658
659impl AvroSchemaComponent for uuid::Uuid {
660    /// The schema is [`Schema::Uuid`] with the name `uuid`.
661    ///
662    /// The underlying schema is [`Schema::Fixed`] with a size of 16.
663    ///
664    /// If you're using `human_readable: true` you need to override this schema with a `Schema::String`.
665    fn get_schema_in_ctxt(
666        named_schemas: &mut HashSet<Name>,
667        enclosing_namespace: NamespaceRef,
668    ) -> Schema {
669        let name =
670            Name::new_with_enclosing_namespace("uuid", enclosing_namespace).expect("Name is valid");
671        if named_schemas.contains(&name) {
672            Schema::Ref { name }
673        } else {
674            let schema = Schema::Uuid(UuidSchema::Fixed(FixedSchema {
675                name: name.clone(),
676                aliases: None,
677                doc: None,
678                size: 16,
679                attributes: Default::default(),
680            }));
681            named_schemas.insert(name);
682            schema
683        }
684    }
685
686    fn get_record_fields_in_ctxt(
687        _: &mut HashSet<Name>,
688        _: NamespaceRef,
689    ) -> Option<Vec<RecordField>> {
690        None
691    }
692}
693
694impl AvroSchemaComponent for u64 {
695    /// The schema is [`Schema::Fixed`] of size 8 with the name `u64`.
696    fn get_schema_in_ctxt(
697        named_schemas: &mut HashSet<Name>,
698        enclosing_namespace: NamespaceRef,
699    ) -> Schema {
700        let name =
701            Name::new_with_enclosing_namespace("u64", enclosing_namespace).expect("Name is valid");
702        if named_schemas.contains(&name) {
703            Schema::Ref { name }
704        } else {
705            let schema = Schema::Fixed(FixedSchema {
706                name: name.clone(),
707                aliases: None,
708                doc: None,
709                size: 8,
710                attributes: Default::default(),
711            });
712            named_schemas.insert(name);
713            schema
714        }
715    }
716
717    fn get_record_fields_in_ctxt(
718        _: &mut HashSet<Name>,
719        _: NamespaceRef,
720    ) -> Option<Vec<RecordField>> {
721        None
722    }
723}
724
725impl AvroSchemaComponent for u128 {
726    /// The schema is [`Schema::Fixed`] of size 16 with the name `u128`.
727    fn get_schema_in_ctxt(
728        named_schemas: &mut HashSet<Name>,
729        enclosing_namespace: NamespaceRef,
730    ) -> Schema {
731        let name =
732            Name::new_with_enclosing_namespace("u128", enclosing_namespace).expect("Name is valid");
733        if named_schemas.contains(&name) {
734            Schema::Ref { name }
735        } else {
736            let schema = Schema::Fixed(FixedSchema {
737                name: name.clone(),
738                aliases: None,
739                doc: None,
740                size: 16,
741                attributes: Default::default(),
742            });
743            named_schemas.insert(name);
744            schema
745        }
746    }
747
748    fn get_record_fields_in_ctxt(
749        _: &mut HashSet<Name>,
750        _: NamespaceRef,
751    ) -> Option<Vec<RecordField>> {
752        None
753    }
754}
755
756impl AvroSchemaComponent for i128 {
757    /// The schema is [`Schema::Fixed`] of size 16 with the name `i128`.
758    fn get_schema_in_ctxt(
759        named_schemas: &mut HashSet<Name>,
760        enclosing_namespace: NamespaceRef,
761    ) -> Schema {
762        let name =
763            Name::new_with_enclosing_namespace("i128", enclosing_namespace).expect("Name is valid");
764        if named_schemas.contains(&name) {
765            Schema::Ref { name }
766        } else {
767            let schema = Schema::Fixed(FixedSchema {
768                name: name.clone(),
769                aliases: None,
770                doc: None,
771                size: 16,
772                attributes: Default::default(),
773            });
774            named_schemas.insert(name);
775            schema
776        }
777    }
778
779    fn get_record_fields_in_ctxt(
780        _: &mut HashSet<Name>,
781        _: NamespaceRef,
782    ) -> Option<Vec<RecordField>> {
783        None
784    }
785}
786
787/// Schema definition for `[T; N]`
788///
789/// Schema is defined as follows:
790/// - 0-sized arrays: [`Schema::Null`]
791/// - 1-sized arrays: `T::get_schema_in_ctxt`
792/// - N-sized arrays: [`Schema::Record`] with a field for every index
793///
794/// If you need or want a [`Schema::Array`], [`Schema::Bytes`], or [`Schema::Fixed`] instead,
795/// use [`apache_avro::serde::array`], [`apache_avro::serde::bytes`], or [`apache_avro::serde::fixed`] respectively.
796///
797/// [`apache_avro::serde::array`]: crate::serde::array
798/// [`apache_avro::serde::bytes`]: crate::serde::bytes
799/// [`apache_avro::serde::fixed`]: crate::serde::fixed
800impl<const N: usize, T: AvroSchemaComponent> AvroSchemaComponent for [T; N] {
801    fn get_schema_in_ctxt(
802        named_schemas: &mut HashSet<Name>,
803        enclosing_namespace: NamespaceRef,
804    ) -> Schema {
805        if N == 0 {
806            Schema::Null
807        } else if N == 1 {
808            T::get_schema_in_ctxt(named_schemas, enclosing_namespace)
809        } else {
810            let t_schema = T::get_schema_in_ctxt(named_schemas, enclosing_namespace);
811            let name = Name::new_with_enclosing_namespace(
812                format!("A{N}_{}", t_schema.unique_normalized_name()),
813                enclosing_namespace,
814            )
815            .expect("Name is valid");
816            if named_schemas.contains(&name) {
817                Schema::Ref { name }
818            } else {
819                named_schemas.insert(name.clone());
820
821                let t_default = T::field_default();
822                // If T is a named schema or contains named schemas, they'll now be a reference.
823                let t_ref = T::get_schema_in_ctxt(named_schemas, enclosing_namespace);
824                let fields = std::iter::once(
825                    RecordField::builder()
826                        .name("field_0".to_string())
827                        .schema(t_schema)
828                        .maybe_default(t_default.clone())
829                        .build(),
830                )
831                .chain((1..N).map(|n| {
832                    RecordField::builder()
833                        .name(format!("field_{n}"))
834                        .schema(t_ref.clone())
835                        .maybe_default(t_default.clone())
836                        .build()
837                }))
838                .collect();
839
840                Schema::record(name).fields(fields).build()
841            }
842        }
843    }
844
845    fn get_record_fields_in_ctxt(
846        named_schemas: &mut HashSet<Name>,
847        enclosing_namespace: NamespaceRef,
848    ) -> Option<Vec<RecordField>> {
849        if N == 0 {
850            None
851        } else if N == 1 {
852            T::get_record_fields_in_ctxt(named_schemas, enclosing_namespace)
853        } else {
854            let t_schema = T::get_schema_in_ctxt(named_schemas, enclosing_namespace);
855            let t_default = T::field_default();
856            // If T is a named schema or contains named schemas, they'll now be a reference.
857            let t_ref = T::get_schema_in_ctxt(named_schemas, enclosing_namespace);
858            let fields = std::iter::once(
859                RecordField::builder()
860                    .name("field_0".to_string())
861                    .schema(t_schema)
862                    .maybe_default(t_default.clone())
863                    .build(),
864            )
865            .chain((1..N).map(|n| {
866                RecordField::builder()
867                    .name(format!("field_{n}"))
868                    .schema(t_ref.clone())
869                    .maybe_default(t_default.clone())
870                    .build()
871            }))
872            .collect();
873            Some(fields)
874        }
875    }
876
877    /// `None` for 0-sized and N-sized arrays, `T::field_default` for 1-sized arrays
878    fn field_default() -> Option<serde_json::Value> {
879        if N == 1 { T::field_default() } else { None }
880    }
881}
882
883/// Schema definition for `(T₁, T₂, …, Tₙ)`.
884///
885/// Implemented for tuples of up to 16 elements.
886///
887/// Schema is defined as follows:
888/// - 1-tuple: `T::get_schema_in_ctxt`
889/// - N-tuple: [`Schema::Record`] with a field for every element
890#[cfg_attr(docsrs, doc(fake_variadic))]
891impl<T: AvroSchemaComponent> AvroSchemaComponent for (T,) {
892    fn get_schema_in_ctxt(
893        named_schemas: &mut HashSet<Name>,
894        enclosing_namespace: NamespaceRef,
895    ) -> Schema {
896        T::get_schema_in_ctxt(named_schemas, enclosing_namespace)
897    }
898
899    fn get_record_fields_in_ctxt(
900        named_schemas: &mut HashSet<Name>,
901        enclosing_namespace: NamespaceRef,
902    ) -> Option<Vec<RecordField>> {
903        T::get_record_fields_in_ctxt(named_schemas, enclosing_namespace)
904    }
905
906    /// `None` for N-tuples, `T::field_default()` for 1-tuple.
907    fn field_default() -> Option<serde_json::Value> {
908        T::field_default()
909    }
910}
911
912macro_rules! tuple_impls {
913    ($($len:expr => ($($name:ident)+))+) => {
914        $(
915            #[cfg_attr(docsrs, doc(hidden))]
916            impl<$($name: AvroSchemaComponent),+> AvroSchemaComponent for ($($name),+) {
917                fn get_schema_in_ctxt(named_schemas: &mut HashSet<Name>, enclosing_namespace: NamespaceRef) -> Schema {
918                    let schemas: [Schema; $len] = [$($name::get_schema_in_ctxt(named_schemas, enclosing_namespace)),+];
919
920                    let mut name = format!("T{}", $len);
921                    for schema in &schemas {
922                        name.push('_');
923                        name.push_str(&schema.unique_normalized_name());
924                    }
925                    let name = Name::new_with_enclosing_namespace(name, enclosing_namespace).expect("Name is valid");
926
927                    if named_schemas.contains(&name) {
928                        Schema::Ref { name }
929                    } else {
930                        named_schemas.insert(name.clone());
931
932                        let defaults: [Option<serde_json::Value>; $len] = [$($name::field_default()),+];
933
934                        let fields = schemas.into_iter().zip(defaults.into_iter()).enumerate().map(|(n, (schema, default))| {
935                            RecordField::builder()
936                                .name(format!("field_{n}"))
937                                .schema(schema)
938                                .maybe_default(default)
939                                .build()
940                        }).collect();
941
942                        Schema::record(name).fields(fields).build()
943                    }
944                }
945            }
946        )+
947    }
948}
949
950tuple_impls! {
951    2 => (T0 T1)
952    3 => (T0 T1 T2)
953    4 => (T0 T1 T2 T3)
954    5 => (T0 T1 T2 T3 T4)
955    6 => (T0 T1 T2 T3 T4 T5)
956    7 => (T0 T1 T2 T3 T4 T5 T6)
957    8 => (T0 T1 T2 T3 T4 T5 T6 T7)
958    9 => (T0 T1 T2 T3 T4 T5 T6 T7 T8)
959    10 => (T0 T1 T2 T3 T4 T5 T6 T7 T8 T9)
960    11 => (T0 T1 T2 T3 T4 T5 T6 T7 T8 T9 T10)
961    12 => (T0 T1 T2 T3 T4 T5 T6 T7 T8 T9 T10 T11)
962    13 => (T0 T1 T2 T3 T4 T5 T6 T7 T8 T9 T10 T11 T12)
963    14 => (T0 T1 T2 T3 T4 T5 T6 T7 T8 T9 T10 T11 T12 T13)
964    15 => (T0 T1 T2 T3 T4 T5 T6 T7 T8 T9 T10 T11 T12 T13 T14)
965    16 => (T0 T1 T2 T3 T4 T5 T6 T7 T8 T9 T10 T11 T12 T13 T14 T15)
966}
967
968#[cfg(test)]
969mod tests {
970    use apache_avro_test_helper::TestResult;
971
972    use crate::{
973        AvroSchema, Schema,
974        reader::datum::GenericDatumReader,
975        schema::{FixedSchema, Name},
976        writer::datum::GenericDatumWriter,
977    };
978
979    #[test]
980    fn avro_rs_401_str() -> TestResult {
981        let schema = str::get_schema();
982        assert_eq!(schema, Schema::String);
983
984        Ok(())
985    }
986
987    #[test]
988    fn avro_rs_401_references() -> TestResult {
989        let schema_ref = <&str>::get_schema();
990        let schema_ref_mut = <&mut str>::get_schema();
991
992        assert_eq!(schema_ref, Schema::String);
993        assert_eq!(schema_ref_mut, Schema::String);
994
995        Ok(())
996    }
997
998    #[test]
999    fn avro_rs_401_slice() -> TestResult {
1000        let schema = <[u8]>::get_schema();
1001        assert_eq!(schema, Schema::array(Schema::Int).build());
1002
1003        Ok(())
1004    }
1005
1006    #[test]
1007    fn avro_rs_401_option_ref_slice_array() -> TestResult {
1008        let schema = <Option<&[u8]>>::get_schema();
1009        assert_eq!(
1010            schema,
1011            Schema::union(vec![Schema::Null, Schema::array(Schema::Int).build()])?
1012        );
1013
1014        Ok(())
1015    }
1016
1017    #[test]
1018    fn avro_rs_414_char() -> TestResult {
1019        let schema = char::get_schema();
1020        assert_eq!(schema, Schema::String);
1021
1022        Ok(())
1023    }
1024
1025    #[test]
1026    fn avro_rs_414_u64() -> TestResult {
1027        let schema = u64::get_schema();
1028        assert_eq!(
1029            schema,
1030            Schema::Fixed(FixedSchema {
1031                name: Name::new("u64")?,
1032                aliases: None,
1033                doc: None,
1034                size: 8,
1035                attributes: Default::default(),
1036            })
1037        );
1038
1039        Ok(())
1040    }
1041
1042    #[test]
1043    fn avro_rs_414_i128() -> TestResult {
1044        let schema = i128::get_schema();
1045        assert_eq!(
1046            schema,
1047            Schema::Fixed(FixedSchema {
1048                name: Name::new("i128")?,
1049                aliases: None,
1050                doc: None,
1051                size: 16,
1052                attributes: Default::default(),
1053            })
1054        );
1055
1056        Ok(())
1057    }
1058
1059    #[test]
1060    fn avro_rs_414_u128() -> TestResult {
1061        let schema = u128::get_schema();
1062        assert_eq!(
1063            schema,
1064            Schema::Fixed(FixedSchema {
1065                name: Name::new("u128")?,
1066                aliases: None,
1067                doc: None,
1068                size: 16,
1069                attributes: Default::default(),
1070            })
1071        );
1072
1073        Ok(())
1074    }
1075
1076    #[test]
1077    fn avro_rs_486_unit() -> TestResult {
1078        let schema = <()>::get_schema();
1079        assert_eq!(schema, Schema::Null);
1080
1081        Ok(())
1082    }
1083
1084    #[test]
1085    #[should_panic(
1086        expected = "Option<T> must produce a valid (non-nested) union: Error { details: Unions cannot contain duplicate types, found at least two Null }"
1087    )]
1088    fn avro_rs_489_some_unit() {
1089        <Option<()>>::get_schema();
1090    }
1091
1092    #[test]
1093    #[should_panic(
1094        expected = "Option<T> must produce a valid (non-nested) union: Error { details: Unions may not directly contain a union }"
1095    )]
1096    fn avro_rs_489_option_option() {
1097        <Option<Option<i32>>>::get_schema();
1098    }
1099
1100    #[test]
1101    fn avro_rs_512_std_time_duration() -> TestResult {
1102        let schema = Schema::parse_str(
1103            r#"{
1104            "type": "record",
1105            "name": "Duration",
1106            "fields": [
1107                { "name": "secs", "type": {"type": "fixed", "name": "u64", "size": 8} },
1108                { "name": "nanos", "type": "long" }
1109            ]
1110        }"#,
1111        )?;
1112        let zero = std::time::Duration::ZERO;
1113        let max = std::time::Duration::MAX;
1114        assert_eq!(schema, std::time::Duration::get_schema());
1115
1116        let writer = GenericDatumWriter::builder(&schema).build()?;
1117        let written_zero = writer.write_ser_to_vec(&zero)?;
1118        let written_max = writer.write_ser_to_vec(&max)?;
1119
1120        let reader = GenericDatumReader::builder(&schema).build()?;
1121        let read_zero = reader.read_deser(&mut &written_zero[..])?;
1122        assert_eq!(zero, read_zero);
1123        let read_max = reader.read_deser(&mut &written_max[..])?;
1124        assert_eq!(max, read_max);
1125        Ok(())
1126    }
1127
1128    #[test]
1129    fn avro_rs_512_0_array() -> TestResult {
1130        assert_eq!(Schema::Null, <[String; 0]>::get_schema());
1131        assert_eq!(Schema::Null, <[(); 0]>::get_schema());
1132        assert_eq!(Schema::Null, <[bool; 0]>::get_schema());
1133        Ok(())
1134    }
1135
1136    #[test]
1137    fn avro_rs_512_1_array() -> TestResult {
1138        assert_eq!(Schema::String, <[String; 1]>::get_schema());
1139        assert_eq!(Schema::Null, <[(); 1]>::get_schema());
1140        assert_eq!(Schema::Boolean, <[bool; 1]>::get_schema());
1141        Ok(())
1142    }
1143
1144    #[test]
1145    fn avro_rs_512_n_array() -> TestResult {
1146        let schema = Schema::parse_str(
1147            r#"{
1148            "type": "record",
1149            "name": "A5_s",
1150            "fields": [
1151                { "name": "field_0", "type": "string" },
1152                { "name": "field_1", "type": "string" },
1153                { "name": "field_2", "type": "string" },
1154                { "name": "field_3", "type": "string" },
1155                { "name": "field_4", "type": "string" }
1156            ]
1157        }"#,
1158        )?;
1159
1160        assert_eq!(schema, <[String; 5]>::get_schema());
1161        Ok(())
1162    }
1163
1164    #[test]
1165    fn avro_rs_512_n_array_complex_type() -> TestResult {
1166        let schema = Schema::parse_str(
1167            r#"{
1168            "type": "record",
1169            "name": "A2_u2_n_r4_uuid",
1170            "fields": [
1171                { "name": "field_0", "type": ["null", {"type": "fixed", "logicalType": "uuid", "size": 16, "name": "uuid"}], "default": null },
1172                { "name": "field_1", "type": ["null", "uuid"], "default": null }
1173            ]
1174        }"#,
1175        )?;
1176
1177        assert_eq!(schema, <[Option<uuid::Uuid>; 2]>::get_schema());
1178        Ok(())
1179    }
1180
1181    #[test]
1182    fn avro_rs_512_1_tuple() -> TestResult {
1183        assert_eq!(Schema::String, <(String,)>::get_schema());
1184        assert_eq!(Schema::Null, <((),)>::get_schema());
1185        assert_eq!(Schema::Boolean, <(bool,)>::get_schema());
1186        Ok(())
1187    }
1188
1189    #[test]
1190    fn avro_rs_512_n_tuple() -> TestResult {
1191        let schema = Schema::parse_str(
1192            r#"{
1193            "type": "record",
1194            "name": "T5_s_i_l_B_n",
1195            "fields": [
1196                { "name": "field_0", "type": "string" },
1197                { "name": "field_1", "type": "int" },
1198                { "name": "field_2", "type": "long" },
1199                { "name": "field_3", "type": "boolean" },
1200                { "name": "field_4", "type": "null" }
1201            ]
1202        }"#,
1203        )?;
1204
1205        assert_eq!(schema, <(String, i32, i64, bool, ())>::get_schema());
1206        Ok(())
1207    }
1208
1209    #[test]
1210    fn avro_rs_512_n_tuple_complex_type() -> TestResult {
1211        let schema = Schema::parse_str(
1212            r#"{
1213            "type": "record",
1214            "name": "T3_u2_n_r4_uuid_r4_uuid_s",
1215            "fields": [
1216                { "name": "field_0", "type": ["null", {"type": "fixed", "logicalType": "uuid", "size": 16, "name": "uuid"}], "default": null },
1217                { "name": "field_1", "type": "uuid" },
1218                { "name": "field_2", "type": "string" }
1219            ]
1220        }"#,
1221        )?;
1222
1223        assert_eq!(
1224            schema,
1225            <(Option<uuid::Uuid>, uuid::Uuid, String)>::get_schema()
1226        );
1227        Ok(())
1228    }
1229}