Skip to main content

apache_avro/serde/
derive.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use std::{
19    borrow::Cow,
20    collections::{HashMap, HashSet},
21};
22
23use crate::{
24    Schema,
25    schema::{FixedSchema, Name, NamespaceRef, RecordField, RecordSchema, UnionSchema, UuidSchema},
26};
27
28/// Trait for types that serve as an Avro data model.
29///
30/// **Do not implement directly!** Either derive it or implement [`AvroSchemaComponent`] to get this trait
31/// through a blanket implementation.
32///
33/// ## Deriving `AvroSchema`
34///
35/// Using the custom derive requires that you enable the `"derive"` cargo
36/// feature in your `Cargo.toml`:
37///
38/// ```toml
39/// [dependencies]
40/// apache-avro = { version = "..", features = ["derive"] }
41/// ```
42///
43/// Then, you add the `#[derive(AvroSchema)]` annotation to your `struct` and
44/// `enum` type definition:
45///
46/// ```
47/// # use serde::{Serialize, Deserialize};
48/// # use apache_avro::AvroSchema;
49/// #[derive(AvroSchema, Serialize, Deserialize)]
50/// pub struct Foo {
51///     bar: Vec<Bar>,
52/// }
53///
54/// #[derive(AvroSchema, Serialize, Deserialize)]
55/// pub enum Bar {
56///     Spam,
57///     Maps
58/// }
59/// ```
60///
61/// This will implement [`AvroSchemaComponent`] for the type, and `AvroSchema`
62/// through the blanket implementation for `T: AvroSchemaComponent`.
63///
64/// When deriving `struct`s, every member must also implement `AvroSchemaComponent`.
65///
66/// ## Changing the generated schema
67///
68/// The derive macro will read both the `avro` and `serde` attributes to modify the generated schema.
69/// It will also check for compatibility between the various attributes.
70///
71/// #### Container attributes
72///
73///  - `#[serde(rename = "name")]`
74///
75// TODO: Should we check if `name` contains any dots? As that would imply a namespace
76///    Set the `name` of the schema to the given string. Defaults to the name of the type.
77///
78///  - `#[avro(namespace = "some.name.space")]`
79///
80///    Set the `namespace` of the schema. This will be the relative namespace if the schema is included
81///    in another schema.
82///
83///  - `#[avro(doc = "Some documentation")]`
84///
85///    Set the `doc` attribute of the schema. Defaults to the documentation of the type.
86///
87///  - `#[avro(default = r#"{"field": 42, "other": "Spam"}"#)]`
88///
89///    Provide the default value for this type when it is used in a field.
90///
91///  - `#[avro(alias = "name")]`
92///
93///    Set the `alias` attribute of the schema. Can be specified multiple times.
94///
95///  - `#[serde(rename_all = "camelCase")]`
96///
97///    Rename all the fields or variants in the schema to follow the given case convention. The possible values
98///    are `"lowercase"`, `"UPPERCASE"`, `"PascalCase"`, `"camelCase"`, `"snake_case"`, `"kebab-case"`,
99///    `"SCREAMING_SNAKE_CASE"`, `"SCREAMING-KEBAB-CASE"`.
100///
101///  - `#[serde(transparent)]`
102///
103///    Use the schema of the inner field directly. Is only allowed on structs with only one unskipped field.
104///
105///
106/// #### Variant attributes
107///
108///  - `#[serde(rename = "name")]`
109///
110///    Rename the variant to the given name.
111///
112///
113/// #### Field attributes
114///
115///  - `#[serde(rename = "name")]`
116///
117///    Rename the field name to the given name.
118///
119///  - `#[avro(doc = "Some documentation")]`
120///
121///    Set the `doc` attribute of the field. Defaults to the documentation of the field.
122///
123///  - `#[avro(default = ..)]`
124///
125///    Control the `default` attribute of the field. When not used, it will use [`AvroSchemaComponent::field_default`]
126///    to get the default value for a type. To remove the `default` attribute for a field, set `default` to `false`: `#[avro(default = false)]`.
127///
128///    To override or set a default value, provide a JSON string:
129///
130///      - Null: `#[avro(default = "null")]`
131///      - Boolean: `#[avro(default = "true")]`.
132///      - Number: `#[avro(default = "42")]` or `#[avro(default = "42.5")]`
133///      - String: `#[avro(default = r#""String needs extra quotes""#)]`.
134///      - Array: `#[avro(default = r#"["One", "Two", "Three"]"#)]`.
135///      - Object: `#[avro(default = r#"{"One": 1}"#)]`.
136///
137///    See [the specification](https://avro.apache.org/docs/++version++/specification/#schema-record)
138///    for details on how to map a type to a JSON value.
139///
140///  - `#[serde(alias = "name")]`
141///
142///    Set the `alias` attribute of the field. Can be specified multiple times.
143///
144///  - `#[serde(flatten)]`
145///
146///    Flatten the content of this field into the container it is defined in.
147///
148///  - `#[serde(skip)]`
149///
150///    Do not include this field in the schema.
151///
152///  - `#[serde(skip_serializing)]`
153///
154///    When combined with `#[serde(skip_deserializing)]`, don't include this field in the schema.
155///    Otherwise, it will be included in the schema and the `#[avro(default)]` attribute **must** be
156///    set. That value will be used for serializing.
157///
158///  - `#[serde(skip_serializing_if)]`
159///
160///    Conditionally use the value of the field or the value provided by `#[avro(default)]`. The
161///    `#[avro(default)]` attribute **must** be set.
162///
163///  - `#[avro(with)]` and `#[serde(with = "module")]`
164///
165///    Override the schema used for this field. See [Working with foreign types](#working-with-foreign-types).
166///
167/// #### Incompatible Serde attributes
168///
169/// The derive macro is compatible with most Serde attributes, but it is incompatible with
170/// the following attributes:
171///
172/// - Container attributes
173///     - `tag`
174///     - `content`
175///     - `untagged`
176///     - `variant_identifier`
177///     - `field_identifier`
178///     - `remote`
179///     - `rename_all(serialize = "..", deserialize = "..")` where `serialize` != `deserialize`
180/// - Variant attributes
181///     - `other`
182///     - `untagged`
183/// - Field attributes
184///     - `getter`
185///
186/// ## Working with foreign types
187///
188/// Most foreign types won't have a [`AvroSchema`] implementation. This crate implements it only
189/// for built-in types and [`uuid::Uuid`].
190///
191/// To still be able to derive schemas for fields of foreign types, the `#[avro(with)`]
192/// attribute can be used to get the schema for those fields. It can be used in two ways:
193///
194/// 1. In combination with `#[serde(with = "path::to::module)]`
195///
196///    To get the schema, it will call the functions `fn get_schema_in_ctxt(&mut HashSet<Name>, NamespaceRef) -> Schema`
197///    and `fn get_record_fields_in_ctxt(&mut HashSet<Name>, NamespaceRef) -> Option<Vec<RecordField>>` in the module provided
198///    to the Serde attribute. See [`AvroSchemaComponent`] for details on how to implement those
199///    functions.
200///
201/// 2. By providing a function directly, `#[avro(with = some_fn)]`.
202///
203///    To get the schema, it will call the function provided. It must have the signature
204///    `fn(&mut HashSet<Name>, NamespaceRef) -> Schema`. When this is used for a `transparent` struct, the
205///    default implementation of [`AvroSchemaComponent::get_record_fields_in_ctxt`] will be used.
206///    This is only recommended for primitive types, as the default implementation cannot be efficiently
207///    implemented for complex types.
208///
209pub trait AvroSchema {
210    /// Construct the full schema that represents this type.
211    ///
212    /// The returned schema is fully independent and contains only `Schema::Ref` to named types defined
213    /// earlier in the schema.
214    fn get_schema() -> Schema;
215}
216
217/// Trait for types that serve as fully defined components inside an Avro data model.
218///
219/// This trait can be derived with [`#[derive(AvroSchema)]`](AvroSchema) when the `derive` feature is enabled.
220///
221/// # Implementation guide
222///
223/// ### Implementation for returning primitive types
224/// When the schema you want to return is a primitive type (a type without a name), the function
225/// arguments can be ignored.
226///
227/// For example, you have a custom integer type:
228/// ```
229/// # use apache_avro::{Schema, serde::{AvroSchemaComponent}, schema::{Name, NamespaceRef, RecordField}};
230/// # use std::collections::HashSet;
231/// // Make sure to implement `Serialize` and `Deserialize` to use the right serialization methods
232/// pub struct U24([u8; 3]);
233/// impl AvroSchemaComponent for U24 {
234///     fn get_schema_in_ctxt(_: &mut HashSet<Name>, _: NamespaceRef) -> Schema {
235///         Schema::Int
236///     }
237///
238///     fn get_record_fields_in_ctxt(_: &mut HashSet<Name>, _: NamespaceRef) -> Option<Vec<RecordField>> {
239///         None // A Schema::Int is not a Schema::Record so there are no fields to return
240///     }
241///
242///     fn field_default() -> Option<serde_json::Value> {
243///         // Zero as default value. Can also be None if you don't want to provide a default value
244///         Some(0u8.into())
245///     }
246///}
247/// ```
248///
249/// ### Passthrough implementation
250///
251/// To construct a schema for a type is "transparent", such as for smart pointers, simply
252/// pass through the arguments to the inner type:
253/// ```
254/// # use apache_avro::{Schema, serde::{AvroSchemaComponent}, schema::{Name, NamespaceRef, RecordField}};
255/// # use serde::{Serialize, Deserialize};
256/// # use std::collections::HashSet;
257/// #[derive(Serialize, Deserialize)]
258/// #[serde(transparent)] // This attribute is important for all passthrough implementations!
259/// pub struct Transparent<T>(T);
260/// impl<T: AvroSchemaComponent> AvroSchemaComponent for Transparent<T> {
261///     fn get_schema_in_ctxt(named_schemas: &mut HashSet<Name>, enclosing_namespace: NamespaceRef) -> Schema {
262///         T::get_schema_in_ctxt(named_schemas, enclosing_namespace)
263///     }
264///
265///     fn get_record_fields_in_ctxt(named_schemas: &mut HashSet<Name>, enclosing_namespace: NamespaceRef) -> Option<Vec<RecordField>> {
266///         T::get_record_fields_in_ctxt(named_schemas, enclosing_namespace)
267///     }
268///
269///     fn field_default() -> Option<serde_json::Value> {
270///         T::field_default()
271///     }
272///}
273/// ```
274///
275/// ### Implementation for complex types
276/// When the schema you want to return is a complex type (a type with a name), special care has to
277/// be taken to avoid duplicate type definitions and getting the correct namespace.
278///
279/// Things to keep in mind:
280///  - If the fully qualified name already exists, return a [`Schema::Ref`]
281///  - Use the `AvroSchemaComponent` implementations to get the schemas for the subtypes
282///  - The ordering of fields in the schema **must** match with the ordering in Serde
283///  - Implement `get_record_fields_in_ctxt` as the default implementation has to be implemented
284///    with backtracking and a lot of cloning.
285///      - Even if your schema is not a record, still implement the function and just return `None`
286///  - Implement `field_default()` if you want to use `#[serde(skip_serializing{,_if})]`.
287///
288/// ```
289/// # use apache_avro::{Schema, serde::{AvroSchemaComponent}, schema::{Name, NamespaceRef, RecordField, RecordSchema}};
290/// # use serde::{Serialize, Deserialize};
291/// # use std::{time::Duration, collections::HashSet};
292/// pub struct Foo {
293///     one: String,
294///     two: i32,
295///     three: Option<Duration>
296/// }
297///
298/// impl AvroSchemaComponent for Foo {
299///     fn get_schema_in_ctxt(named_schemas: &mut HashSet<Name>, enclosing_namespace: NamespaceRef) -> Schema {
300///         // Create the fully qualified name for your type given the enclosing namespace
301///         let name = Name::new_with_enclosing_namespace("Foo", enclosing_namespace).expect("Name is valid");
302///         if named_schemas.contains(&name) {
303///             Schema::Ref { name }
304///         } else {
305///             let enclosing_namespace = name.namespace();
306///             // Do this before you start creating the schema, as otherwise recursive types will cause infinite recursion.
307///             named_schemas.insert(name.clone());
308///             let schema = Schema::Record(RecordSchema::builder()
309///                 .name(name.clone())
310///                 .fields(Self::get_record_fields_in_ctxt(named_schemas, enclosing_namespace).expect("Impossible!"))
311///                 .build()
312///             );
313///             schema
314///         }
315///     }
316///
317///     fn get_record_fields_in_ctxt(named_schemas: &mut HashSet<Name>, enclosing_namespace: NamespaceRef) -> Option<Vec<RecordField>> {
318///         Some(vec![
319///             RecordField::builder()
320///                 .name("one")
321///                 .schema(String::get_schema_in_ctxt(named_schemas, enclosing_namespace))
322///                 .build(),
323///             RecordField::builder()
324///                 .name("two")
325///                 .schema(i32::get_schema_in_ctxt(named_schemas, enclosing_namespace))
326///                 .build(),
327///             RecordField::builder()
328///                 .name("three")
329///                 .schema(<Option<Duration>>::get_schema_in_ctxt(named_schemas, enclosing_namespace))
330///                 .build(),
331///         ])
332///     }
333///
334///     fn field_default() -> Option<serde_json::Value> {
335///         // This type does not provide a default value
336///         None
337///     }
338///}
339/// ```
340pub trait AvroSchemaComponent {
341    /// Get the schema for this component
342    fn get_schema_in_ctxt(
343        named_schemas: &mut HashSet<Name>,
344        enclosing_namespace: NamespaceRef,
345    ) -> Schema;
346
347    /// Get the fields of this schema if it is a record.
348    ///
349    /// This returns `None` if the schema is not a record.
350    ///
351    /// The default implementation has to do a lot of extra work, so it is strongly recommended to
352    /// implement this function when manually implementing this trait.
353    fn get_record_fields_in_ctxt(
354        named_schemas: &mut HashSet<Name>,
355        enclosing_namespace: NamespaceRef,
356    ) -> Option<Vec<RecordField>> {
357        get_record_fields_in_ctxt(named_schemas, enclosing_namespace, Self::get_schema_in_ctxt)
358    }
359
360    /// The default value of this type when used for a record field.
361    ///
362    /// `None` means no default value, which is also the default implementation.
363    ///
364    /// Implementations of this trait provided by this crate return `None` except for `Option<T>`
365    /// which returns `Some(serde_json::Value::Null)`.
366    fn field_default() -> Option<serde_json::Value> {
367        None
368    }
369}
370
371/// Get the record fields from `schema_fn` without polluting `named_schemas` or causing duplicate names
372///
373/// This is public so the derive macro can use it for `#[avro(with = ||)]` and `#[avro(with = path)]`
374#[doc(hidden)]
375pub fn get_record_fields_in_ctxt(
376    named_schemas: &mut HashSet<Name>,
377    enclosing_namespace: NamespaceRef,
378    schema_fn: fn(named_schemas: &mut HashSet<Name>, enclosing_namespace: NamespaceRef) -> Schema,
379) -> Option<Vec<RecordField>> {
380    let mut record = match schema_fn(named_schemas, enclosing_namespace) {
381        Schema::Record(record) => record,
382        Schema::Ref { name } => {
383            // This schema already exists in `named_schemas` so temporarily remove it so we can
384            // get the actual schema.
385            assert!(
386                named_schemas.remove(&name),
387                "Name '{name}' should exist in `named_schemas` otherwise Ref is invalid: {named_schemas:?}"
388            );
389            // Get the schema
390            let schema = schema_fn(named_schemas, enclosing_namespace);
391            // Reinsert the old value
392            named_schemas.insert(name);
393
394            // Now check if we actually got a record and return the fields if that is the case
395            let Schema::Record(record) = schema else {
396                return None;
397            };
398            return Some(record.fields);
399        }
400        _ => return None,
401    };
402    // This schema did not yet exist in `named_schemas`, so we need to remove it if and only if
403    // it isn't used somewhere in the schema (recursive type).
404
405    // Find the first Schema::Ref that has the target name
406    fn find_first_ref<'a>(schema: &'a mut Schema, target: &Name) -> Option<&'a mut Schema> {
407        match schema {
408            Schema::Ref { name } if name == target => Some(schema),
409            Schema::Array(array) => find_first_ref(&mut array.items, target),
410            Schema::Map(map) => find_first_ref(&mut map.types, target),
411            Schema::Union(union) => {
412                for schema in &mut union.schemas {
413                    if let Some(schema) = find_first_ref(schema, target) {
414                        return Some(schema);
415                    }
416                }
417                None
418            }
419            Schema::Record(record) => {
420                assert_ne!(
421                    &record.name, target,
422                    "Only expecting a Ref named {target:?}"
423                );
424                for field in &mut record.fields {
425                    if let Some(schema) = find_first_ref(&mut field.schema, target) {
426                        return Some(schema);
427                    }
428                }
429                None
430            }
431            _ => None,
432        }
433    }
434
435    // Prepare the fields for the new record. All named types will become references.
436    let new_fields = record
437        .fields
438        .iter()
439        .map(|field| RecordField {
440            name: field.name.clone(),
441            doc: field.doc.clone(),
442            aliases: field.aliases.clone(),
443            default: field.default.clone(),
444            schema: if field.schema.is_named() {
445                Schema::Ref {
446                    name: field.schema.name().expect("Schema is named").clone(),
447                }
448            } else {
449                field.schema.clone()
450            },
451            custom_attributes: field.custom_attributes.clone(),
452        })
453        .collect();
454
455    // Remove the name in case it is not used
456    named_schemas.remove(&record.name);
457
458    // Find the first reference to this schema so we can replace it with the actual schema
459    for field in &mut record.fields {
460        if let Some(schema) = find_first_ref(&mut field.schema, &record.name) {
461            let new_schema = RecordSchema {
462                name: record.name,
463                aliases: record.aliases,
464                doc: record.doc,
465                fields: new_fields,
466                lookup: record.lookup,
467                attributes: record.attributes,
468            };
469
470            let name = match std::mem::replace(schema, Schema::Record(new_schema)) {
471                Schema::Ref { name } => name,
472                schema => {
473                    panic!("Only expected `Schema::Ref` from `find_first_ref`, got: {schema:?}")
474                }
475            };
476
477            // The schema is used, so reinsert it
478            named_schemas.insert(name.clone());
479
480            break;
481        }
482    }
483
484    Some(record.fields)
485}
486
487impl<T> AvroSchema for T
488where
489    T: AvroSchemaComponent + ?Sized,
490{
491    fn get_schema() -> Schema {
492        T::get_schema_in_ctxt(&mut HashSet::default(), None)
493    }
494}
495
496macro_rules! impl_schema (
497    ($type:ty, $variant_constructor:expr) => (
498        impl AvroSchemaComponent for $type {
499            fn get_schema_in_ctxt(_: &mut HashSet<Name>, _: NamespaceRef) -> Schema {
500                $variant_constructor
501            }
502
503            fn get_record_fields_in_ctxt(_: &mut HashSet<Name>, _: NamespaceRef) -> Option<Vec<RecordField>> {
504                None
505            }
506        }
507    );
508);
509
510impl_schema!(bool, Schema::Boolean);
511impl_schema!(i8, Schema::Int);
512impl_schema!(i16, Schema::Int);
513impl_schema!(i32, Schema::Int);
514impl_schema!(i64, Schema::Long);
515impl_schema!(u8, Schema::Int);
516impl_schema!(u16, Schema::Int);
517impl_schema!(u32, Schema::Long);
518impl_schema!(f32, Schema::Float);
519impl_schema!(f64, Schema::Double);
520impl_schema!(String, Schema::String);
521impl_schema!(str, Schema::String);
522impl_schema!(char, Schema::String);
523impl_schema!((), Schema::Null);
524
525macro_rules! impl_passthrough_schema (
526    ($type:ty where T: AvroSchemaComponent + ?Sized $(+ $bound:tt)*) => (
527        impl<T: AvroSchemaComponent $(+ $bound)* + ?Sized> AvroSchemaComponent for $type {
528            fn get_schema_in_ctxt(named_schemas: &mut HashSet<Name>, enclosing_namespace: NamespaceRef) -> Schema {
529                T::get_schema_in_ctxt(named_schemas, enclosing_namespace)
530            }
531
532            fn get_record_fields_in_ctxt(named_schemas: &mut HashSet<Name>, enclosing_namespace: NamespaceRef) -> Option<Vec<RecordField>> {
533                T::get_record_fields_in_ctxt(named_schemas, enclosing_namespace)
534            }
535
536            fn field_default() -> Option<serde_json::Value> {
537                T::field_default()
538            }
539        }
540    );
541);
542
543impl_passthrough_schema!(&T where T: AvroSchemaComponent + ?Sized);
544impl_passthrough_schema!(&mut T where T: AvroSchemaComponent + ?Sized);
545impl_passthrough_schema!(Box<T> where T: AvroSchemaComponent + ?Sized);
546impl_passthrough_schema!(Cow<'_, T> where T: AvroSchemaComponent + ?Sized + ToOwned);
547impl_passthrough_schema!(std::sync::Mutex<T> where T: AvroSchemaComponent + ?Sized);
548
549macro_rules! impl_array_schema (
550    ($type:ty where T: AvroSchemaComponent) => (
551        impl<T: AvroSchemaComponent> AvroSchemaComponent for $type {
552            fn get_schema_in_ctxt(named_schemas: &mut HashSet<Name>, enclosing_namespace: NamespaceRef) -> Schema {
553                Schema::array(T::get_schema_in_ctxt(named_schemas, enclosing_namespace)).build()
554            }
555
556            fn get_record_fields_in_ctxt(_: &mut HashSet<Name>, _: NamespaceRef) -> Option<Vec<RecordField>> {
557                None
558            }
559        }
560    );
561);
562
563impl_array_schema!([T] where T: AvroSchemaComponent);
564impl_array_schema!(Vec<T> where T: AvroSchemaComponent);
565
566impl<T> AvroSchemaComponent for HashMap<String, T>
567where
568    T: AvroSchemaComponent,
569{
570    fn get_schema_in_ctxt(
571        named_schemas: &mut HashSet<Name>,
572        enclosing_namespace: NamespaceRef,
573    ) -> Schema {
574        Schema::map(T::get_schema_in_ctxt(named_schemas, enclosing_namespace)).build()
575    }
576
577    fn get_record_fields_in_ctxt(
578        _: &mut HashSet<Name>,
579        _: NamespaceRef,
580    ) -> Option<Vec<RecordField>> {
581        None
582    }
583}
584
585impl<T> AvroSchemaComponent for Option<T>
586where
587    T: AvroSchemaComponent,
588{
589    fn get_schema_in_ctxt(
590        named_schemas: &mut HashSet<Name>,
591        enclosing_namespace: NamespaceRef,
592    ) -> Schema {
593        let variants = vec![
594            Schema::Null,
595            T::get_schema_in_ctxt(named_schemas, enclosing_namespace),
596        ];
597
598        Schema::Union(
599            UnionSchema::new(variants).expect("Option<T> must produce a valid (non-nested) union"),
600        )
601    }
602
603    fn get_record_fields_in_ctxt(
604        _: &mut HashSet<Name>,
605        _: NamespaceRef,
606    ) -> Option<Vec<RecordField>> {
607        None
608    }
609
610    fn field_default() -> Option<serde_json::Value> {
611        Some(serde_json::Value::Null)
612    }
613}
614
615impl AvroSchemaComponent for core::time::Duration {
616    /// The schema is [`Schema::Record`] with the name `org.apache.avro.rust.Duration`.
617    ///
618    /// It has two fields:
619    /// - `secs` with the schema `Schema::Fixed(name: "org.apache.avro.rust.u64", size: 8)`
620    /// - `nanos` with the schema `Schema::Long`
621    fn get_schema_in_ctxt(
622        named_schemas: &mut HashSet<Name>,
623        enclosing_namespace: NamespaceRef,
624    ) -> Schema {
625        let name = Name::new("org.apache.avro.rust.Duration").expect("Name is valid");
626        if named_schemas.contains(&name) {
627            Schema::Ref { name }
628        } else {
629            named_schemas.insert(name.clone());
630            Schema::record(name)
631                .fields(
632                    Self::get_record_fields_in_ctxt(named_schemas, enclosing_namespace)
633                        .expect("Unreachable!"),
634                )
635                .build()
636        }
637    }
638
639    fn get_record_fields_in_ctxt(
640        named_schemas: &mut HashSet<Name>,
641        enclosing_namespace: NamespaceRef,
642    ) -> Option<Vec<RecordField>> {
643        Some(vec![
644            // Secs is an u64
645            RecordField::builder()
646                .name("secs")
647                .schema(u64::get_schema_in_ctxt(named_schemas, enclosing_namespace))
648                .build(),
649            // Nanos is an u32
650            RecordField::builder()
651                .name("nanos")
652                .schema(Schema::Long)
653                .build(),
654        ])
655    }
656}
657
658impl AvroSchemaComponent for uuid::Uuid {
659    /// The schema is [`Schema::Uuid`] with the name `org.apache.avro.rust.Uuid`.
660    ///
661    /// The underlying schema is [`Schema::Fixed`] with a size of 16.
662    ///
663    /// If you're using `human_readable: true` you need to override this schema with a `Schema::String`.
664    fn get_schema_in_ctxt(named_schemas: &mut HashSet<Name>, _: NamespaceRef) -> Schema {
665        let name = Name::new("org.apache.avro.rust.Uuid").expect("Name is valid");
666        if named_schemas.contains(&name) {
667            Schema::Ref { name }
668        } else {
669            let schema = Schema::Uuid(UuidSchema::Fixed(FixedSchema {
670                name: name.clone(),
671                aliases: None,
672                doc: None,
673                size: 16,
674                attributes: Default::default(),
675            }));
676            named_schemas.insert(name);
677            schema
678        }
679    }
680
681    fn get_record_fields_in_ctxt(
682        _: &mut HashSet<Name>,
683        _: NamespaceRef,
684    ) -> Option<Vec<RecordField>> {
685        None
686    }
687}
688
689impl AvroSchemaComponent for u64 {
690    /// The schema is [`Schema::Fixed`] of size 8 with the name `org.apache.avro.rust.u64`.
691    fn get_schema_in_ctxt(named_schemas: &mut HashSet<Name>, _: NamespaceRef) -> Schema {
692        let name = Name::new("org.apache.avro.rust.u64").expect("Name is valid");
693        if named_schemas.contains(&name) {
694            Schema::Ref { name }
695        } else {
696            let schema = Schema::Fixed(FixedSchema {
697                name: name.clone(),
698                aliases: None,
699                doc: None,
700                size: 8,
701                attributes: Default::default(),
702            });
703            named_schemas.insert(name);
704            schema
705        }
706    }
707
708    fn get_record_fields_in_ctxt(
709        _: &mut HashSet<Name>,
710        _: NamespaceRef,
711    ) -> Option<Vec<RecordField>> {
712        None
713    }
714}
715
716impl AvroSchemaComponent for u128 {
717    /// The schema is [`Schema::Fixed`] of size 16 with the name `org.apache.avro.rust.u128`.
718    fn get_schema_in_ctxt(named_schemas: &mut HashSet<Name>, _: NamespaceRef) -> Schema {
719        let name = Name::new("org.apache.avro.rust.u128").expect("Name is valid");
720        if named_schemas.contains(&name) {
721            Schema::Ref { name }
722        } else {
723            let schema = Schema::Fixed(FixedSchema {
724                name: name.clone(),
725                aliases: None,
726                doc: None,
727                size: 16,
728                attributes: Default::default(),
729            });
730            named_schemas.insert(name);
731            schema
732        }
733    }
734
735    fn get_record_fields_in_ctxt(
736        _: &mut HashSet<Name>,
737        _: NamespaceRef,
738    ) -> Option<Vec<RecordField>> {
739        None
740    }
741}
742
743impl AvroSchemaComponent for i128 {
744    /// The schema is [`Schema::Fixed`] of size 16 with the name `org.apache.avro.rust.i128`.
745    fn get_schema_in_ctxt(named_schemas: &mut HashSet<Name>, _: NamespaceRef) -> Schema {
746        let name = Name::new("org.apache.avro.rust.i128").expect("Name is valid");
747        if named_schemas.contains(&name) {
748            Schema::Ref { name }
749        } else {
750            let schema = Schema::Fixed(FixedSchema {
751                name: name.clone(),
752                aliases: None,
753                doc: None,
754                size: 16,
755                attributes: Default::default(),
756            });
757            named_schemas.insert(name);
758            schema
759        }
760    }
761
762    fn get_record_fields_in_ctxt(
763        _: &mut HashSet<Name>,
764        _: NamespaceRef,
765    ) -> Option<Vec<RecordField>> {
766        None
767    }
768}
769
770/// Schema definition for `[T; N]`
771///
772/// Schema is defined as follows:
773/// - 0-sized arrays: [`Schema::Null`]
774/// - 1-sized arrays: `T::get_schema_in_ctxt`
775/// - N-sized arrays: [`Schema::Record`] with a field for every index
776///
777/// If you need or want a [`Schema::Array`], [`Schema::Bytes`], or [`Schema::Fixed`] instead,
778/// use [`apache_avro::serde::array`], [`apache_avro::serde::bytes`], or [`apache_avro::serde::fixed`] respectively.
779///
780/// [`apache_avro::serde::array`]: crate::serde::array
781/// [`apache_avro::serde::bytes`]: crate::serde::bytes
782/// [`apache_avro::serde::fixed`]: crate::serde::fixed
783impl<const N: usize, T: AvroSchemaComponent> AvroSchemaComponent for [T; N] {
784    fn get_schema_in_ctxt(
785        named_schemas: &mut HashSet<Name>,
786        enclosing_namespace: NamespaceRef,
787    ) -> Schema {
788        if N == 0 {
789            Schema::Null
790        } else if N == 1 {
791            T::get_schema_in_ctxt(named_schemas, enclosing_namespace)
792        } else {
793            let t_schema = T::get_schema_in_ctxt(named_schemas, enclosing_namespace);
794            let name = Name::new_with_enclosing_namespace(
795                format!("A{N}_{}", t_schema.unique_normalized_name()),
796                enclosing_namespace,
797            )
798            .expect("Name is valid");
799            if named_schemas.contains(&name) {
800                Schema::Ref { name }
801            } else {
802                named_schemas.insert(name.clone());
803
804                let t_default = T::field_default();
805                // If T is a named schema or contains named schemas, they'll now be a reference.
806                let t_ref = T::get_schema_in_ctxt(named_schemas, enclosing_namespace);
807                let fields = std::iter::once(
808                    RecordField::builder()
809                        .name("field_0".to_string())
810                        .schema(t_schema)
811                        .maybe_default(t_default.clone())
812                        .build(),
813                )
814                .chain((1..N).map(|n| {
815                    RecordField::builder()
816                        .name(format!("field_{n}"))
817                        .schema(t_ref.clone())
818                        .maybe_default(t_default.clone())
819                        .build()
820                }))
821                .collect();
822
823                Schema::record(name).fields(fields).build()
824            }
825        }
826    }
827
828    fn get_record_fields_in_ctxt(
829        named_schemas: &mut HashSet<Name>,
830        enclosing_namespace: NamespaceRef,
831    ) -> Option<Vec<RecordField>> {
832        if N == 0 {
833            None
834        } else if N == 1 {
835            T::get_record_fields_in_ctxt(named_schemas, enclosing_namespace)
836        } else {
837            let t_schema = T::get_schema_in_ctxt(named_schemas, enclosing_namespace);
838            let t_default = T::field_default();
839            // If T is a named schema or contains named schemas, they'll now be a reference.
840            let t_ref = T::get_schema_in_ctxt(named_schemas, enclosing_namespace);
841            let fields = std::iter::once(
842                RecordField::builder()
843                    .name("field_0".to_string())
844                    .schema(t_schema)
845                    .maybe_default(t_default.clone())
846                    .build(),
847            )
848            .chain((1..N).map(|n| {
849                RecordField::builder()
850                    .name(format!("field_{n}"))
851                    .schema(t_ref.clone())
852                    .maybe_default(t_default.clone())
853                    .build()
854            }))
855            .collect();
856            Some(fields)
857        }
858    }
859
860    /// `None` for 0-sized and N-sized arrays, `T::field_default` for 1-sized arrays
861    fn field_default() -> Option<serde_json::Value> {
862        if N == 1 { T::field_default() } else { None }
863    }
864}
865
866/// Schema definition for `(T₁, T₂, …, Tₙ)`.
867///
868/// Implemented for tuples of up to 16 elements.
869///
870/// Schema is defined as follows:
871/// - 1-tuple: `T::get_schema_in_ctxt`
872/// - N-tuple: [`Schema::Record`] with a field for every element
873#[cfg_attr(docsrs, doc(fake_variadic))]
874impl<T: AvroSchemaComponent> AvroSchemaComponent for (T,) {
875    fn get_schema_in_ctxt(
876        named_schemas: &mut HashSet<Name>,
877        enclosing_namespace: NamespaceRef,
878    ) -> Schema {
879        T::get_schema_in_ctxt(named_schemas, enclosing_namespace)
880    }
881
882    fn get_record_fields_in_ctxt(
883        named_schemas: &mut HashSet<Name>,
884        enclosing_namespace: NamespaceRef,
885    ) -> Option<Vec<RecordField>> {
886        T::get_record_fields_in_ctxt(named_schemas, enclosing_namespace)
887    }
888
889    /// `None` for N-tuples, `T::field_default()` for 1-tuple.
890    fn field_default() -> Option<serde_json::Value> {
891        T::field_default()
892    }
893}
894
895macro_rules! tuple_impls {
896    ($($len:expr => ($($name:ident)+))+) => {
897        $(
898            #[cfg_attr(docsrs, doc(hidden))]
899            impl<$($name: AvroSchemaComponent),+> AvroSchemaComponent for ($($name),+) {
900                fn get_schema_in_ctxt(named_schemas: &mut HashSet<Name>, enclosing_namespace: NamespaceRef) -> Schema {
901                    let schemas: [Schema; $len] = [$($name::get_schema_in_ctxt(named_schemas, enclosing_namespace)),+];
902
903                    let mut name = format!("T{}", $len);
904                    for schema in &schemas {
905                        name.push('_');
906                        name.push_str(&schema.unique_normalized_name());
907                    }
908                    let name = Name::new_with_enclosing_namespace(name, enclosing_namespace).expect("Name is valid");
909
910                    if named_schemas.contains(&name) {
911                        Schema::Ref { name }
912                    } else {
913                        named_schemas.insert(name.clone());
914
915                        let defaults: [Option<serde_json::Value>; $len] = [$($name::field_default()),+];
916
917                        let fields = schemas.into_iter().zip(defaults.into_iter()).enumerate().map(|(n, (schema, default))| {
918                            RecordField::builder()
919                                .name(format!("field_{n}"))
920                                .schema(schema)
921                                .maybe_default(default)
922                                .build()
923                        }).collect();
924
925                        Schema::record(name).fields(fields).build()
926                    }
927                }
928            }
929        )+
930    }
931}
932
933tuple_impls! {
934    2 => (T0 T1)
935    3 => (T0 T1 T2)
936    4 => (T0 T1 T2 T3)
937    5 => (T0 T1 T2 T3 T4)
938    6 => (T0 T1 T2 T3 T4 T5)
939    7 => (T0 T1 T2 T3 T4 T5 T6)
940    8 => (T0 T1 T2 T3 T4 T5 T6 T7)
941    9 => (T0 T1 T2 T3 T4 T5 T6 T7 T8)
942    10 => (T0 T1 T2 T3 T4 T5 T6 T7 T8 T9)
943    11 => (T0 T1 T2 T3 T4 T5 T6 T7 T8 T9 T10)
944    12 => (T0 T1 T2 T3 T4 T5 T6 T7 T8 T9 T10 T11)
945    13 => (T0 T1 T2 T3 T4 T5 T6 T7 T8 T9 T10 T11 T12)
946    14 => (T0 T1 T2 T3 T4 T5 T6 T7 T8 T9 T10 T11 T12 T13)
947    15 => (T0 T1 T2 T3 T4 T5 T6 T7 T8 T9 T10 T11 T12 T13 T14)
948    16 => (T0 T1 T2 T3 T4 T5 T6 T7 T8 T9 T10 T11 T12 T13 T14 T15)
949}
950
951#[cfg(test)]
952mod tests {
953    use apache_avro_test_helper::TestResult;
954
955    use crate::{
956        AvroSchema, Schema,
957        reader::datum::GenericDatumReader,
958        schema::{FixedSchema, Name},
959        writer::datum::GenericDatumWriter,
960    };
961
962    #[test]
963    fn avro_rs_401_str() -> TestResult {
964        let schema = str::get_schema();
965        assert_eq!(schema, Schema::String);
966
967        Ok(())
968    }
969
970    #[test]
971    fn avro_rs_401_references() -> TestResult {
972        let schema_ref = <&str>::get_schema();
973        let schema_ref_mut = <&mut str>::get_schema();
974
975        assert_eq!(schema_ref, Schema::String);
976        assert_eq!(schema_ref_mut, Schema::String);
977
978        Ok(())
979    }
980
981    #[test]
982    fn avro_rs_401_slice() -> TestResult {
983        let schema = <[u8]>::get_schema();
984        assert_eq!(schema, Schema::array(Schema::Int).build());
985
986        Ok(())
987    }
988
989    #[test]
990    fn avro_rs_401_option_ref_slice_array() -> TestResult {
991        let schema = <Option<&[u8]>>::get_schema();
992        assert_eq!(
993            schema,
994            Schema::union(vec![Schema::Null, Schema::array(Schema::Int).build()])?
995        );
996
997        Ok(())
998    }
999
1000    #[test]
1001    fn avro_rs_414_char() -> TestResult {
1002        let schema = char::get_schema();
1003        assert_eq!(schema, Schema::String);
1004
1005        Ok(())
1006    }
1007
1008    #[test]
1009    fn avro_rs_414_u64() -> TestResult {
1010        let schema = u64::get_schema();
1011        assert_eq!(
1012            schema,
1013            Schema::Fixed(FixedSchema {
1014                name: Name::new("org.apache.avro.rust.u64")?,
1015                aliases: None,
1016                doc: None,
1017                size: 8,
1018                attributes: Default::default(),
1019            })
1020        );
1021
1022        Ok(())
1023    }
1024
1025    #[test]
1026    fn avro_rs_414_i128() -> TestResult {
1027        let schema = i128::get_schema();
1028        assert_eq!(
1029            schema,
1030            Schema::Fixed(FixedSchema {
1031                name: Name::new("org.apache.avro.rust.i128")?,
1032                aliases: None,
1033                doc: None,
1034                size: 16,
1035                attributes: Default::default(),
1036            })
1037        );
1038
1039        Ok(())
1040    }
1041
1042    #[test]
1043    fn avro_rs_414_u128() -> TestResult {
1044        let schema = u128::get_schema();
1045        assert_eq!(
1046            schema,
1047            Schema::Fixed(FixedSchema {
1048                name: Name::new("org.apache.avro.rust.u128")?,
1049                aliases: None,
1050                doc: None,
1051                size: 16,
1052                attributes: Default::default(),
1053            })
1054        );
1055
1056        Ok(())
1057    }
1058
1059    #[test]
1060    fn avro_rs_486_unit() -> TestResult {
1061        let schema = <()>::get_schema();
1062        assert_eq!(schema, Schema::Null);
1063
1064        Ok(())
1065    }
1066
1067    #[test]
1068    #[should_panic(
1069        expected = "Option<T> must produce a valid (non-nested) union: Error { details: Unions cannot contain duplicate types, found at least two Null }"
1070    )]
1071    fn avro_rs_489_some_unit() {
1072        <Option<()>>::get_schema();
1073    }
1074
1075    #[test]
1076    #[should_panic(
1077        expected = "Option<T> must produce a valid (non-nested) union: Error { details: Unions may not directly contain a union }"
1078    )]
1079    fn avro_rs_489_option_option() {
1080        <Option<Option<i32>>>::get_schema();
1081    }
1082
1083    #[test]
1084    fn avro_rs_512_std_time_duration() -> TestResult {
1085        let schema = Schema::parse_str(
1086            r#"{
1087            "type": "record",
1088            "name": "Duration",
1089            "namespace": "org.apache.avro.rust",
1090            "fields": [
1091                { "name": "secs", "type": {"type": "fixed", "name": "u64", "namespace": "org.apache.avro.rust", "size": 8} },
1092                { "name": "nanos", "type": "long" }
1093            ]
1094        }"#,
1095        )?;
1096        let zero = std::time::Duration::ZERO;
1097        let max = std::time::Duration::MAX;
1098        assert_eq!(schema, std::time::Duration::get_schema());
1099
1100        let writer = GenericDatumWriter::builder(&schema).build()?;
1101        let written_zero = writer.write_ser_to_vec(&zero)?;
1102        let written_max = writer.write_ser_to_vec(&max)?;
1103
1104        let reader = GenericDatumReader::builder(&schema).build()?;
1105        let read_zero = reader.read_deser(&mut &written_zero[..])?;
1106        assert_eq!(zero, read_zero);
1107        let read_max = reader.read_deser(&mut &written_max[..])?;
1108        assert_eq!(max, read_max);
1109        Ok(())
1110    }
1111
1112    #[test]
1113    fn avro_rs_512_0_array() -> TestResult {
1114        assert_eq!(Schema::Null, <[String; 0]>::get_schema());
1115        assert_eq!(Schema::Null, <[(); 0]>::get_schema());
1116        assert_eq!(Schema::Null, <[bool; 0]>::get_schema());
1117        Ok(())
1118    }
1119
1120    #[test]
1121    fn avro_rs_512_1_array() -> TestResult {
1122        assert_eq!(Schema::String, <[String; 1]>::get_schema());
1123        assert_eq!(Schema::Null, <[(); 1]>::get_schema());
1124        assert_eq!(Schema::Boolean, <[bool; 1]>::get_schema());
1125        Ok(())
1126    }
1127
1128    #[test]
1129    fn avro_rs_512_n_array() -> TestResult {
1130        let schema = Schema::parse_str(
1131            r#"{
1132            "type": "record",
1133            "name": "A5_s",
1134            "fields": [
1135                { "name": "field_0", "type": "string" },
1136                { "name": "field_1", "type": "string" },
1137                { "name": "field_2", "type": "string" },
1138                { "name": "field_3", "type": "string" },
1139                { "name": "field_4", "type": "string" }
1140            ]
1141        }"#,
1142        )?;
1143
1144        assert_eq!(schema, <[String; 5]>::get_schema());
1145        Ok(())
1146    }
1147
1148    #[test]
1149    fn avro_rs_512_n_array_complex_type() -> TestResult {
1150        let schema = Schema::parse_str(
1151            r#"{
1152            "type": "record",
1153            "name": "A2_u2_n_r25_org_apache_avro_rust_Uuid",
1154            "fields": [
1155                { "name": "field_0", "type": ["null", {"type": "fixed", "logicalType": "uuid", "size": 16, "name": "Uuid", "namespace": "org.apache.avro.rust"}], "default": null },
1156                { "name": "field_1", "type": ["null", "org.apache.avro.rust.Uuid"], "default": null }
1157            ]
1158        }"#,
1159        )?;
1160
1161        assert_eq!(schema, <[Option<uuid::Uuid>; 2]>::get_schema());
1162        Ok(())
1163    }
1164
1165    #[test]
1166    fn avro_rs_512_1_tuple() -> TestResult {
1167        assert_eq!(Schema::String, <(String,)>::get_schema());
1168        assert_eq!(Schema::Null, <((),)>::get_schema());
1169        assert_eq!(Schema::Boolean, <(bool,)>::get_schema());
1170        Ok(())
1171    }
1172
1173    #[test]
1174    fn avro_rs_512_n_tuple() -> TestResult {
1175        let schema = Schema::parse_str(
1176            r#"{
1177            "type": "record",
1178            "name": "T5_s_i_l_B_n",
1179            "fields": [
1180                { "name": "field_0", "type": "string" },
1181                { "name": "field_1", "type": "int" },
1182                { "name": "field_2", "type": "long" },
1183                { "name": "field_3", "type": "boolean" },
1184                { "name": "field_4", "type": "null" }
1185            ]
1186        }"#,
1187        )?;
1188
1189        assert_eq!(schema, <(String, i32, i64, bool, ())>::get_schema());
1190        Ok(())
1191    }
1192
1193    #[test]
1194    fn avro_rs_512_n_tuple_complex_type() -> TestResult {
1195        let schema = Schema::parse_str(
1196            r#"{
1197            "type": "record",
1198            "name": "T3_u2_n_r25_org_apache_avro_rust_Uuid_r25_org_apache_avro_rust_Uuid_s",
1199            "fields": [
1200                { "name": "field_0", "type": ["null", {"type": "fixed", "logicalType": "uuid", "size": 16, "name": "Uuid", "namespace": "org.apache.avro.rust"}], "default": null },
1201                { "name": "field_1", "type": "org.apache.avro.rust.Uuid" },
1202                { "name": "field_2", "type": "string" }
1203            ]
1204        }"#,
1205        )?;
1206
1207        assert_eq!(
1208            schema,
1209            <(Option<uuid::Uuid>, uuid::Uuid, String)>::get_schema()
1210        );
1211        Ok(())
1212    }
1213}