apache_avro/serde/derive.rs
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use std::{
19 borrow::Cow,
20 collections::{HashMap, HashSet},
21};
22
23use crate::{
24 Schema,
25 schema::{FixedSchema, Name, NamespaceRef, RecordField, RecordSchema, UnionSchema, UuidSchema},
26};
27
28/// Trait for types that serve as an Avro data model.
29///
30/// **Do not implement directly!** Either derive it or implement [`AvroSchemaComponent`] to get this trait
31/// through a blanket implementation.
32///
33/// ## Deriving `AvroSchema`
34///
35/// Using the custom derive requires that you enable the `"derive"` cargo
36/// feature in your `Cargo.toml`:
37///
38/// ```toml
39/// [dependencies]
40/// apache-avro = { version = "..", features = ["derive"] }
41/// ```
42///
43/// Then, you add the `#[derive(AvroSchema)]` annotation to your `struct` and
44/// `enum` type definition:
45///
46/// ```
47/// # use serde::{Serialize, Deserialize};
48/// # use apache_avro::AvroSchema;
49/// #[derive(AvroSchema, Serialize, Deserialize)]
50/// pub struct Foo {
51/// bar: Vec<Bar>,
52/// }
53///
54/// #[derive(AvroSchema, Serialize, Deserialize)]
55/// pub enum Bar {
56/// Spam,
57/// Maps
58/// }
59/// ```
60///
61/// This will implement [`AvroSchemaComponent`] for the type, and `AvroSchema`
62/// through the blanket implementation for `T: AvroSchemaComponent`.
63///
64/// When deriving `struct`s, every member must also implement `AvroSchemaComponent`.
65///
66/// ## Changing the generated schema
67///
68/// The derive macro will read both the `avro` and `serde` attributes to modify the generated schema.
69/// It will also check for compatibility between the various attributes.
70///
71/// #### Container attributes
72///
73/// - `#[serde(rename = "name")]`
74///
75// TODO: Should we check if `name` contains any dots? As that would imply a namespace
76/// Set the `name` of the schema to the given string. Defaults to the name of the type.
77///
78/// - `#[avro(namespace = "some.name.space")]`
79///
80/// Set the `namespace` of the schema. This will be the relative namespace if the schema is included
81/// in another schema.
82///
83/// - `#[avro(doc = "Some documentation")]`
84///
85/// Set the `doc` attribute of the schema. Defaults to the documentation of the type.
86///
87/// - `#[avro(default = r#"{"field": 42, "other": "Spam"}"#)]`
88///
89/// Provide the default value for this type when it is used in a field.
90///
91/// - `#[avro(alias = "name")]`
92///
93/// Set the `alias` attribute of the schema. Can be specified multiple times.
94///
95/// - `#[serde(rename_all = "camelCase")]`
96///
97/// Rename all the fields or variants in the schema to follow the given case convention. The possible values
98/// are `"lowercase"`, `"UPPERCASE"`, `"PascalCase"`, `"camelCase"`, `"snake_case"`, `"kebab-case"`,
99/// `"SCREAMING_SNAKE_CASE"`, `"SCREAMING-KEBAB-CASE"`.
100///
101/// - `#[serde(transparent)]`
102///
103/// Use the schema of the inner field directly. Is only allowed on structs with only one unskipped field.
104///
105///
106/// #### Variant attributes
107///
108/// - `#[serde(rename = "name")]`
109///
110/// Rename the variant to the given name.
111///
112///
113/// #### Field attributes
114///
115/// - `#[serde(rename = "name")]`
116///
117/// Rename the field name to the given name.
118///
119/// - `#[avro(doc = "Some documentation")]`
120///
121/// Set the `doc` attribute of the field. Defaults to the documentation of the field.
122///
123/// - `#[avro(default = ..)]`
124///
125/// Control the `default` attribute of the field. When not used, it will use [`AvroSchemaComponent::field_default`]
126/// to get the default value for a type. To remove the `default` attribute for a field, set `default` to `false`: `#[avro(default = false)]`.
127///
128/// To override or set a default value, provide a JSON string:
129///
130/// - Null: `#[avro(default = "null")]`
131/// - Boolean: `#[avro(default = "true")]`.
132/// - Number: `#[avro(default = "42")]` or `#[avro(default = "42.5")]`
133/// - String: `#[avro(default = r#""String needs extra quotes""#)]`.
134/// - Array: `#[avro(default = r#"["One", "Two", "Three"]"#)]`.
135/// - Object: `#[avro(default = r#"{"One": 1}"#)]`.
136///
137/// See [the specification](https://avro.apache.org/docs/++version++/specification/#schema-record)
138/// for details on how to map a type to a JSON value.
139///
140/// - `#[serde(alias = "name")]`
141///
142/// Set the `alias` attribute of the field. Can be specified multiple times.
143///
144/// - `#[serde(flatten)]`
145///
146/// Flatten the content of this field into the container it is defined in.
147///
148/// - `#[serde(skip)]`
149///
150/// Do not include this field in the schema.
151///
152/// - `#[serde(skip_serializing)]`
153///
154/// When combined with `#[serde(skip_deserializing)]`, don't include this field in the schema.
155/// Otherwise, it will be included in the schema and the `#[avro(default)]` attribute **must** be
156/// set. That value will be used for serializing.
157///
158/// - `#[serde(skip_serializing_if)]`
159///
160/// Conditionally use the value of the field or the value provided by `#[avro(default)]`. The
161/// `#[avro(default)]` attribute **must** be set.
162///
163/// - `#[avro(with)]` and `#[serde(with = "module")]`
164///
165/// Override the schema used for this field. See [Working with foreign types](#working-with-foreign-types).
166///
167/// #### Incompatible Serde attributes
168///
169/// The derive macro is compatible with most Serde attributes, but it is incompatible with
170/// the following attributes:
171///
172/// - Container attributes
173/// - `tag`
174/// - `content`
175/// - `untagged`
176/// - `variant_identifier`
177/// - `field_identifier`
178/// - `remote`
179/// - `rename_all(serialize = "..", deserialize = "..")` where `serialize` != `deserialize`
180/// - Variant attributes
181/// - `other`
182/// - `untagged`
183/// - Field attributes
184/// - `getter`
185///
186/// ## Working with foreign types
187///
188/// Most foreign types won't have a [`AvroSchema`] implementation. This crate implements it only
189/// for built-in types and [`uuid::Uuid`].
190///
191/// To still be able to derive schemas for fields of foreign types, the `#[avro(with)`]
192/// attribute can be used to get the schema for those fields. It can be used in two ways:
193///
194/// 1. In combination with `#[serde(with = "path::to::module)]`
195///
196/// To get the schema, it will call the functions `fn get_schema_in_ctxt(&mut HashSet<Name>, NamespaceRef) -> Schema`
197/// and `fn get_record_fields_in_ctxt(&mut HashSet<Name>, NamespaceRef) -> Option<Vec<RecordField>>` in the module provided
198/// to the Serde attribute. See [`AvroSchemaComponent`] for details on how to implement those
199/// functions.
200///
201/// 2. By providing a function directly, `#[avro(with = some_fn)]`.
202///
203/// To get the schema, it will call the function provided. It must have the signature
204/// `fn(&mut HashSet<Name>, NamespaceRef) -> Schema`. When this is used for a `transparent` struct, the
205/// default implementation of [`AvroSchemaComponent::get_record_fields_in_ctxt`] will be used.
206/// This is only recommended for primitive types, as the default implementation cannot be efficiently
207/// implemented for complex types.
208///
209pub trait AvroSchema {
210 /// Construct the full schema that represents this type.
211 ///
212 /// The returned schema is fully independent and contains only `Schema::Ref` to named types defined
213 /// earlier in the schema.
214 fn get_schema() -> Schema;
215}
216
217/// Trait for types that serve as fully defined components inside an Avro data model.
218///
219/// This trait can be derived with [`#[derive(AvroSchema)]`](AvroSchema) when the `derive` feature is enabled.
220///
221/// # Implementation guide
222///
223/// ### Implementation for returning primitive types
224/// When the schema you want to return is a primitive type (a type without a name), the function
225/// arguments can be ignored.
226///
227/// For example, you have a custom integer type:
228/// ```
229/// # use apache_avro::{Schema, serde::{AvroSchemaComponent}, schema::{Name, NamespaceRef, RecordField}};
230/// # use std::collections::HashSet;
231/// // Make sure to implement `Serialize` and `Deserialize` to use the right serialization methods
232/// pub struct U24([u8; 3]);
233/// impl AvroSchemaComponent for U24 {
234/// fn get_schema_in_ctxt(_: &mut HashSet<Name>, _: NamespaceRef) -> Schema {
235/// Schema::Int
236/// }
237///
238/// fn get_record_fields_in_ctxt(_: &mut HashSet<Name>, _: NamespaceRef) -> Option<Vec<RecordField>> {
239/// None // A Schema::Int is not a Schema::Record so there are no fields to return
240/// }
241///
242/// fn field_default() -> Option<serde_json::Value> {
243/// // Zero as default value. Can also be None if you don't want to provide a default value
244/// Some(0u8.into())
245/// }
246///}
247/// ```
248///
249/// ### Passthrough implementation
250///
251/// To construct a schema for a type is "transparent", such as for smart pointers, simply
252/// pass through the arguments to the inner type:
253/// ```
254/// # use apache_avro::{Schema, serde::{AvroSchemaComponent}, schema::{Name, NamespaceRef, RecordField}};
255/// # use serde::{Serialize, Deserialize};
256/// # use std::collections::HashSet;
257/// #[derive(Serialize, Deserialize)]
258/// #[serde(transparent)] // This attribute is important for all passthrough implementations!
259/// pub struct Transparent<T>(T);
260/// impl<T: AvroSchemaComponent> AvroSchemaComponent for Transparent<T> {
261/// fn get_schema_in_ctxt(named_schemas: &mut HashSet<Name>, enclosing_namespace: NamespaceRef) -> Schema {
262/// T::get_schema_in_ctxt(named_schemas, enclosing_namespace)
263/// }
264///
265/// fn get_record_fields_in_ctxt(named_schemas: &mut HashSet<Name>, enclosing_namespace: NamespaceRef) -> Option<Vec<RecordField>> {
266/// T::get_record_fields_in_ctxt(named_schemas, enclosing_namespace)
267/// }
268///
269/// fn field_default() -> Option<serde_json::Value> {
270/// T::field_default()
271/// }
272///}
273/// ```
274///
275/// ### Implementation for complex types
276/// When the schema you want to return is a complex type (a type with a name), special care has to
277/// be taken to avoid duplicate type definitions and getting the correct namespace.
278///
279/// Things to keep in mind:
280/// - If the fully qualified name already exists, return a [`Schema::Ref`]
281/// - Use the `AvroSchemaComponent` implementations to get the schemas for the subtypes
282/// - The ordering of fields in the schema **must** match with the ordering in Serde
283/// - Implement `get_record_fields_in_ctxt` as the default implementation has to be implemented
284/// with backtracking and a lot of cloning.
285/// - Even if your schema is not a record, still implement the function and just return `None`
286/// - Implement `field_default()` if you want to use `#[serde(skip_serializing{,_if})]`.
287///
288/// ```
289/// # use apache_avro::{Schema, serde::{AvroSchemaComponent}, schema::{Name, NamespaceRef, RecordField, RecordSchema}};
290/// # use serde::{Serialize, Deserialize};
291/// # use std::{time::Duration, collections::HashSet};
292/// pub struct Foo {
293/// one: String,
294/// two: i32,
295/// three: Option<Duration>
296/// }
297///
298/// impl AvroSchemaComponent for Foo {
299/// fn get_schema_in_ctxt(named_schemas: &mut HashSet<Name>, enclosing_namespace: NamespaceRef) -> Schema {
300/// // Create the fully qualified name for your type given the enclosing namespace
301/// let name = Name::new_with_enclosing_namespace("Foo", enclosing_namespace).expect("Name is valid");
302/// if named_schemas.contains(&name) {
303/// Schema::Ref { name }
304/// } else {
305/// let enclosing_namespace = name.namespace();
306/// // Do this before you start creating the schema, as otherwise recursive types will cause infinite recursion.
307/// named_schemas.insert(name.clone());
308/// let schema = Schema::Record(RecordSchema::builder()
309/// .name(name.clone())
310/// .fields(Self::get_record_fields_in_ctxt(named_schemas, enclosing_namespace).expect("Impossible!"))
311/// .build()
312/// );
313/// schema
314/// }
315/// }
316///
317/// fn get_record_fields_in_ctxt(named_schemas: &mut HashSet<Name>, enclosing_namespace: NamespaceRef) -> Option<Vec<RecordField>> {
318/// Some(vec![
319/// RecordField::builder()
320/// .name("one")
321/// .schema(String::get_schema_in_ctxt(named_schemas, enclosing_namespace))
322/// .build(),
323/// RecordField::builder()
324/// .name("two")
325/// .schema(i32::get_schema_in_ctxt(named_schemas, enclosing_namespace))
326/// .build(),
327/// RecordField::builder()
328/// .name("three")
329/// .schema(<Option<Duration>>::get_schema_in_ctxt(named_schemas, enclosing_namespace))
330/// .build(),
331/// ])
332/// }
333///
334/// fn field_default() -> Option<serde_json::Value> {
335/// // This type does not provide a default value
336/// None
337/// }
338///}
339/// ```
340pub trait AvroSchemaComponent {
341 /// Get the schema for this component
342 fn get_schema_in_ctxt(
343 named_schemas: &mut HashSet<Name>,
344 enclosing_namespace: NamespaceRef,
345 ) -> Schema;
346
347 /// Get the fields of this schema if it is a record.
348 ///
349 /// This returns `None` if the schema is not a record.
350 ///
351 /// The default implementation has to do a lot of extra work, so it is strongly recommended to
352 /// implement this function when manually implementing this trait.
353 fn get_record_fields_in_ctxt(
354 named_schemas: &mut HashSet<Name>,
355 enclosing_namespace: NamespaceRef,
356 ) -> Option<Vec<RecordField>> {
357 get_record_fields_in_ctxt(named_schemas, enclosing_namespace, Self::get_schema_in_ctxt)
358 }
359
360 /// The default value of this type when used for a record field.
361 ///
362 /// `None` means no default value, which is also the default implementation.
363 ///
364 /// Implementations of this trait provided by this crate return `None` except for `Option<T>`
365 /// which returns `Some(serde_json::Value::Null)`.
366 fn field_default() -> Option<serde_json::Value> {
367 None
368 }
369}
370
371/// Get the record fields from `schema_fn` without polluting `named_schemas` or causing duplicate names
372///
373/// This is public so the derive macro can use it for `#[avro(with = ||)]` and `#[avro(with = path)]`
374#[doc(hidden)]
375pub fn get_record_fields_in_ctxt(
376 named_schemas: &mut HashSet<Name>,
377 enclosing_namespace: NamespaceRef,
378 schema_fn: fn(named_schemas: &mut HashSet<Name>, enclosing_namespace: NamespaceRef) -> Schema,
379) -> Option<Vec<RecordField>> {
380 let mut record = match schema_fn(named_schemas, enclosing_namespace) {
381 Schema::Record(record) => record,
382 Schema::Ref { name } => {
383 // This schema already exists in `named_schemas` so temporarily remove it so we can
384 // get the actual schema.
385 assert!(
386 named_schemas.remove(&name),
387 "Name '{name}' should exist in `named_schemas` otherwise Ref is invalid: {named_schemas:?}"
388 );
389 // Get the schema
390 let schema = schema_fn(named_schemas, enclosing_namespace);
391 // Reinsert the old value
392 named_schemas.insert(name);
393
394 // Now check if we actually got a record and return the fields if that is the case
395 let Schema::Record(record) = schema else {
396 return None;
397 };
398 return Some(record.fields);
399 }
400 _ => return None,
401 };
402 // This schema did not yet exist in `named_schemas`, so we need to remove it if and only if
403 // it isn't used somewhere in the schema (recursive type).
404
405 // Find the first Schema::Ref that has the target name
406 fn find_first_ref<'a>(schema: &'a mut Schema, target: &Name) -> Option<&'a mut Schema> {
407 match schema {
408 Schema::Ref { name } if name == target => Some(schema),
409 Schema::Array(array) => find_first_ref(&mut array.items, target),
410 Schema::Map(map) => find_first_ref(&mut map.types, target),
411 Schema::Union(union) => {
412 for schema in &mut union.schemas {
413 if let Some(schema) = find_first_ref(schema, target) {
414 return Some(schema);
415 }
416 }
417 None
418 }
419 Schema::Record(record) => {
420 assert_ne!(
421 &record.name, target,
422 "Only expecting a Ref named {target:?}"
423 );
424 for field in &mut record.fields {
425 if let Some(schema) = find_first_ref(&mut field.schema, target) {
426 return Some(schema);
427 }
428 }
429 None
430 }
431 _ => None,
432 }
433 }
434
435 // Prepare the fields for the new record. All named types will become references.
436 let new_fields = record
437 .fields
438 .iter()
439 .map(|field| RecordField {
440 name: field.name.clone(),
441 doc: field.doc.clone(),
442 aliases: field.aliases.clone(),
443 default: field.default.clone(),
444 schema: if field.schema.is_named() {
445 Schema::Ref {
446 name: field.schema.name().expect("Schema is named").clone(),
447 }
448 } else {
449 field.schema.clone()
450 },
451 custom_attributes: field.custom_attributes.clone(),
452 })
453 .collect();
454
455 // Remove the name in case it is not used
456 named_schemas.remove(&record.name);
457
458 // Find the first reference to this schema so we can replace it with the actual schema
459 for field in &mut record.fields {
460 if let Some(schema) = find_first_ref(&mut field.schema, &record.name) {
461 let new_schema = RecordSchema {
462 name: record.name,
463 aliases: record.aliases,
464 doc: record.doc,
465 fields: new_fields,
466 lookup: record.lookup,
467 attributes: record.attributes,
468 };
469
470 let name = match std::mem::replace(schema, Schema::Record(new_schema)) {
471 Schema::Ref { name } => name,
472 schema => {
473 panic!("Only expected `Schema::Ref` from `find_first_ref`, got: {schema:?}")
474 }
475 };
476
477 // The schema is used, so reinsert it
478 named_schemas.insert(name.clone());
479
480 break;
481 }
482 }
483
484 Some(record.fields)
485}
486
487impl<T> AvroSchema for T
488where
489 T: AvroSchemaComponent + ?Sized,
490{
491 fn get_schema() -> Schema {
492 T::get_schema_in_ctxt(&mut HashSet::default(), None)
493 }
494}
495
496macro_rules! impl_schema (
497 ($type:ty, $variant_constructor:expr) => (
498 impl AvroSchemaComponent for $type {
499 fn get_schema_in_ctxt(_: &mut HashSet<Name>, _: NamespaceRef) -> Schema {
500 $variant_constructor
501 }
502
503 fn get_record_fields_in_ctxt(_: &mut HashSet<Name>, _: NamespaceRef) -> Option<Vec<RecordField>> {
504 None
505 }
506 }
507 );
508);
509
510impl_schema!(bool, Schema::Boolean);
511impl_schema!(i8, Schema::Int);
512impl_schema!(i16, Schema::Int);
513impl_schema!(i32, Schema::Int);
514impl_schema!(i64, Schema::Long);
515impl_schema!(u8, Schema::Int);
516impl_schema!(u16, Schema::Int);
517impl_schema!(u32, Schema::Long);
518impl_schema!(f32, Schema::Float);
519impl_schema!(f64, Schema::Double);
520impl_schema!(String, Schema::String);
521impl_schema!(str, Schema::String);
522impl_schema!(char, Schema::String);
523impl_schema!((), Schema::Null);
524
525macro_rules! impl_passthrough_schema (
526 ($type:ty where T: AvroSchemaComponent + ?Sized $(+ $bound:tt)*) => (
527 impl<T: AvroSchemaComponent $(+ $bound)* + ?Sized> AvroSchemaComponent for $type {
528 fn get_schema_in_ctxt(named_schemas: &mut HashSet<Name>, enclosing_namespace: NamespaceRef) -> Schema {
529 T::get_schema_in_ctxt(named_schemas, enclosing_namespace)
530 }
531
532 fn get_record_fields_in_ctxt(named_schemas: &mut HashSet<Name>, enclosing_namespace: NamespaceRef) -> Option<Vec<RecordField>> {
533 T::get_record_fields_in_ctxt(named_schemas, enclosing_namespace)
534 }
535
536 fn field_default() -> Option<serde_json::Value> {
537 T::field_default()
538 }
539 }
540 );
541);
542
543impl_passthrough_schema!(&T where T: AvroSchemaComponent + ?Sized);
544impl_passthrough_schema!(&mut T where T: AvroSchemaComponent + ?Sized);
545impl_passthrough_schema!(Box<T> where T: AvroSchemaComponent + ?Sized);
546impl_passthrough_schema!(Cow<'_, T> where T: AvroSchemaComponent + ?Sized + ToOwned);
547impl_passthrough_schema!(std::sync::Mutex<T> where T: AvroSchemaComponent + ?Sized);
548
549macro_rules! impl_array_schema (
550 ($type:ty where T: AvroSchemaComponent) => (
551 impl<T: AvroSchemaComponent> AvroSchemaComponent for $type {
552 fn get_schema_in_ctxt(named_schemas: &mut HashSet<Name>, enclosing_namespace: NamespaceRef) -> Schema {
553 Schema::array(T::get_schema_in_ctxt(named_schemas, enclosing_namespace)).build()
554 }
555
556 fn get_record_fields_in_ctxt(_: &mut HashSet<Name>, _: NamespaceRef) -> Option<Vec<RecordField>> {
557 None
558 }
559 }
560 );
561);
562
563impl_array_schema!([T] where T: AvroSchemaComponent);
564impl_array_schema!(Vec<T> where T: AvroSchemaComponent);
565
566impl<T> AvroSchemaComponent for HashMap<String, T>
567where
568 T: AvroSchemaComponent,
569{
570 fn get_schema_in_ctxt(
571 named_schemas: &mut HashSet<Name>,
572 enclosing_namespace: NamespaceRef,
573 ) -> Schema {
574 Schema::map(T::get_schema_in_ctxt(named_schemas, enclosing_namespace)).build()
575 }
576
577 fn get_record_fields_in_ctxt(
578 _: &mut HashSet<Name>,
579 _: NamespaceRef,
580 ) -> Option<Vec<RecordField>> {
581 None
582 }
583}
584
585impl<T> AvroSchemaComponent for Option<T>
586where
587 T: AvroSchemaComponent,
588{
589 fn get_schema_in_ctxt(
590 named_schemas: &mut HashSet<Name>,
591 enclosing_namespace: NamespaceRef,
592 ) -> Schema {
593 let variants = vec![
594 Schema::Null,
595 T::get_schema_in_ctxt(named_schemas, enclosing_namespace),
596 ];
597
598 Schema::Union(
599 UnionSchema::new(variants).expect("Option<T> must produce a valid (non-nested) union"),
600 )
601 }
602
603 fn get_record_fields_in_ctxt(
604 _: &mut HashSet<Name>,
605 _: NamespaceRef,
606 ) -> Option<Vec<RecordField>> {
607 None
608 }
609
610 fn field_default() -> Option<serde_json::Value> {
611 Some(serde_json::Value::Null)
612 }
613}
614
615impl AvroSchemaComponent for core::time::Duration {
616 /// The schema is [`Schema::Record`] with the name `Duration`.
617 ///
618 /// It has two fields:
619 /// - `secs` with the schema `Schema::Fixed(name: "u64", size: 8)`
620 /// - `nanos` with the schema `Schema::Long`
621 fn get_schema_in_ctxt(
622 named_schemas: &mut HashSet<Name>,
623 enclosing_namespace: NamespaceRef,
624 ) -> Schema {
625 let name = Name::new_with_enclosing_namespace("Duration", enclosing_namespace)
626 .expect("Name is valid");
627 if named_schemas.contains(&name) {
628 Schema::Ref { name }
629 } else {
630 named_schemas.insert(name.clone());
631 Schema::record(name)
632 .fields(
633 Self::get_record_fields_in_ctxt(named_schemas, enclosing_namespace)
634 .expect("Unreachable!"),
635 )
636 .build()
637 }
638 }
639
640 fn get_record_fields_in_ctxt(
641 named_schemas: &mut HashSet<Name>,
642 enclosing_namespace: NamespaceRef,
643 ) -> Option<Vec<RecordField>> {
644 Some(vec![
645 // Secs is an u64
646 RecordField::builder()
647 .name("secs")
648 .schema(u64::get_schema_in_ctxt(named_schemas, enclosing_namespace))
649 .build(),
650 // Nanos is an u32
651 RecordField::builder()
652 .name("nanos")
653 .schema(Schema::Long)
654 .build(),
655 ])
656 }
657}
658
659impl AvroSchemaComponent for uuid::Uuid {
660 /// The schema is [`Schema::Uuid`] with the name `uuid`.
661 ///
662 /// The underlying schema is [`Schema::Fixed`] with a size of 16.
663 ///
664 /// If you're using `human_readable: true` you need to override this schema with a `Schema::String`.
665 fn get_schema_in_ctxt(
666 named_schemas: &mut HashSet<Name>,
667 enclosing_namespace: NamespaceRef,
668 ) -> Schema {
669 let name =
670 Name::new_with_enclosing_namespace("uuid", enclosing_namespace).expect("Name is valid");
671 if named_schemas.contains(&name) {
672 Schema::Ref { name }
673 } else {
674 let schema = Schema::Uuid(UuidSchema::Fixed(FixedSchema {
675 name: name.clone(),
676 aliases: None,
677 doc: None,
678 size: 16,
679 attributes: Default::default(),
680 }));
681 named_schemas.insert(name);
682 schema
683 }
684 }
685
686 fn get_record_fields_in_ctxt(
687 _: &mut HashSet<Name>,
688 _: NamespaceRef,
689 ) -> Option<Vec<RecordField>> {
690 None
691 }
692}
693
694impl AvroSchemaComponent for u64 {
695 /// The schema is [`Schema::Fixed`] of size 8 with the name `u64`.
696 fn get_schema_in_ctxt(
697 named_schemas: &mut HashSet<Name>,
698 enclosing_namespace: NamespaceRef,
699 ) -> Schema {
700 let name =
701 Name::new_with_enclosing_namespace("u64", enclosing_namespace).expect("Name is valid");
702 if named_schemas.contains(&name) {
703 Schema::Ref { name }
704 } else {
705 let schema = Schema::Fixed(FixedSchema {
706 name: name.clone(),
707 aliases: None,
708 doc: None,
709 size: 8,
710 attributes: Default::default(),
711 });
712 named_schemas.insert(name);
713 schema
714 }
715 }
716
717 fn get_record_fields_in_ctxt(
718 _: &mut HashSet<Name>,
719 _: NamespaceRef,
720 ) -> Option<Vec<RecordField>> {
721 None
722 }
723}
724
725impl AvroSchemaComponent for u128 {
726 /// The schema is [`Schema::Fixed`] of size 16 with the name `u128`.
727 fn get_schema_in_ctxt(
728 named_schemas: &mut HashSet<Name>,
729 enclosing_namespace: NamespaceRef,
730 ) -> Schema {
731 let name =
732 Name::new_with_enclosing_namespace("u128", enclosing_namespace).expect("Name is valid");
733 if named_schemas.contains(&name) {
734 Schema::Ref { name }
735 } else {
736 let schema = Schema::Fixed(FixedSchema {
737 name: name.clone(),
738 aliases: None,
739 doc: None,
740 size: 16,
741 attributes: Default::default(),
742 });
743 named_schemas.insert(name);
744 schema
745 }
746 }
747
748 fn get_record_fields_in_ctxt(
749 _: &mut HashSet<Name>,
750 _: NamespaceRef,
751 ) -> Option<Vec<RecordField>> {
752 None
753 }
754}
755
756impl AvroSchemaComponent for i128 {
757 /// The schema is [`Schema::Fixed`] of size 16 with the name `i128`.
758 fn get_schema_in_ctxt(
759 named_schemas: &mut HashSet<Name>,
760 enclosing_namespace: NamespaceRef,
761 ) -> Schema {
762 let name =
763 Name::new_with_enclosing_namespace("i128", enclosing_namespace).expect("Name is valid");
764 if named_schemas.contains(&name) {
765 Schema::Ref { name }
766 } else {
767 let schema = Schema::Fixed(FixedSchema {
768 name: name.clone(),
769 aliases: None,
770 doc: None,
771 size: 16,
772 attributes: Default::default(),
773 });
774 named_schemas.insert(name);
775 schema
776 }
777 }
778
779 fn get_record_fields_in_ctxt(
780 _: &mut HashSet<Name>,
781 _: NamespaceRef,
782 ) -> Option<Vec<RecordField>> {
783 None
784 }
785}
786
787/// Schema definition for `[T; N]`
788///
789/// Schema is defined as follows:
790/// - 0-sized arrays: [`Schema::Null`]
791/// - 1-sized arrays: `T::get_schema_in_ctxt`
792/// - N-sized arrays: [`Schema::Record`] with a field for every index
793///
794/// If you need or want a [`Schema::Array`], [`Schema::Bytes`], or [`Schema::Fixed`] instead,
795/// use [`apache_avro::serde::array`], [`apache_avro::serde::bytes`], or [`apache_avro::serde::fixed`] respectively.
796///
797/// [`apache_avro::serde::array`]: crate::serde::array
798/// [`apache_avro::serde::bytes`]: crate::serde::bytes
799/// [`apache_avro::serde::fixed`]: crate::serde::fixed
800impl<const N: usize, T: AvroSchemaComponent> AvroSchemaComponent for [T; N] {
801 fn get_schema_in_ctxt(
802 named_schemas: &mut HashSet<Name>,
803 enclosing_namespace: NamespaceRef,
804 ) -> Schema {
805 if N == 0 {
806 Schema::Null
807 } else if N == 1 {
808 T::get_schema_in_ctxt(named_schemas, enclosing_namespace)
809 } else {
810 let t_schema = T::get_schema_in_ctxt(named_schemas, enclosing_namespace);
811 let name = Name::new_with_enclosing_namespace(
812 format!("A{N}_{}", t_schema.unique_normalized_name()),
813 enclosing_namespace,
814 )
815 .expect("Name is valid");
816 if named_schemas.contains(&name) {
817 Schema::Ref { name }
818 } else {
819 named_schemas.insert(name.clone());
820
821 let t_default = T::field_default();
822 // If T is a named schema or contains named schemas, they'll now be a reference.
823 let t_ref = T::get_schema_in_ctxt(named_schemas, enclosing_namespace);
824 let fields = std::iter::once(
825 RecordField::builder()
826 .name("field_0".to_string())
827 .schema(t_schema)
828 .maybe_default(t_default.clone())
829 .build(),
830 )
831 .chain((1..N).map(|n| {
832 RecordField::builder()
833 .name(format!("field_{n}"))
834 .schema(t_ref.clone())
835 .maybe_default(t_default.clone())
836 .build()
837 }))
838 .collect();
839
840 Schema::record(name).fields(fields).build()
841 }
842 }
843 }
844
845 fn get_record_fields_in_ctxt(
846 named_schemas: &mut HashSet<Name>,
847 enclosing_namespace: NamespaceRef,
848 ) -> Option<Vec<RecordField>> {
849 if N == 0 {
850 None
851 } else if N == 1 {
852 T::get_record_fields_in_ctxt(named_schemas, enclosing_namespace)
853 } else {
854 let t_schema = T::get_schema_in_ctxt(named_schemas, enclosing_namespace);
855 let t_default = T::field_default();
856 // If T is a named schema or contains named schemas, they'll now be a reference.
857 let t_ref = T::get_schema_in_ctxt(named_schemas, enclosing_namespace);
858 let fields = std::iter::once(
859 RecordField::builder()
860 .name("field_0".to_string())
861 .schema(t_schema)
862 .maybe_default(t_default.clone())
863 .build(),
864 )
865 .chain((1..N).map(|n| {
866 RecordField::builder()
867 .name(format!("field_{n}"))
868 .schema(t_ref.clone())
869 .maybe_default(t_default.clone())
870 .build()
871 }))
872 .collect();
873 Some(fields)
874 }
875 }
876
877 /// `None` for 0-sized and N-sized arrays, `T::field_default` for 1-sized arrays
878 fn field_default() -> Option<serde_json::Value> {
879 if N == 1 { T::field_default() } else { None }
880 }
881}
882
883/// Schema definition for `(T₁, T₂, …, Tₙ)`.
884///
885/// Implemented for tuples of up to 16 elements.
886///
887/// Schema is defined as follows:
888/// - 1-tuple: `T::get_schema_in_ctxt`
889/// - N-tuple: [`Schema::Record`] with a field for every element
890#[cfg_attr(docsrs, doc(fake_variadic))]
891impl<T: AvroSchemaComponent> AvroSchemaComponent for (T,) {
892 fn get_schema_in_ctxt(
893 named_schemas: &mut HashSet<Name>,
894 enclosing_namespace: NamespaceRef,
895 ) -> Schema {
896 T::get_schema_in_ctxt(named_schemas, enclosing_namespace)
897 }
898
899 fn get_record_fields_in_ctxt(
900 named_schemas: &mut HashSet<Name>,
901 enclosing_namespace: NamespaceRef,
902 ) -> Option<Vec<RecordField>> {
903 T::get_record_fields_in_ctxt(named_schemas, enclosing_namespace)
904 }
905
906 /// `None` for N-tuples, `T::field_default()` for 1-tuple.
907 fn field_default() -> Option<serde_json::Value> {
908 T::field_default()
909 }
910}
911
912macro_rules! tuple_impls {
913 ($($len:expr => ($($name:ident)+))+) => {
914 $(
915 #[cfg_attr(docsrs, doc(hidden))]
916 impl<$($name: AvroSchemaComponent),+> AvroSchemaComponent for ($($name),+) {
917 fn get_schema_in_ctxt(named_schemas: &mut HashSet<Name>, enclosing_namespace: NamespaceRef) -> Schema {
918 let schemas: [Schema; $len] = [$($name::get_schema_in_ctxt(named_schemas, enclosing_namespace)),+];
919
920 let mut name = format!("T{}", $len);
921 for schema in &schemas {
922 name.push('_');
923 name.push_str(&schema.unique_normalized_name());
924 }
925 let name = Name::new_with_enclosing_namespace(name, enclosing_namespace).expect("Name is valid");
926
927 if named_schemas.contains(&name) {
928 Schema::Ref { name }
929 } else {
930 named_schemas.insert(name.clone());
931
932 let defaults: [Option<serde_json::Value>; $len] = [$($name::field_default()),+];
933
934 let fields = schemas.into_iter().zip(defaults.into_iter()).enumerate().map(|(n, (schema, default))| {
935 RecordField::builder()
936 .name(format!("field_{n}"))
937 .schema(schema)
938 .maybe_default(default)
939 .build()
940 }).collect();
941
942 Schema::record(name).fields(fields).build()
943 }
944 }
945 }
946 )+
947 }
948}
949
950tuple_impls! {
951 2 => (T0 T1)
952 3 => (T0 T1 T2)
953 4 => (T0 T1 T2 T3)
954 5 => (T0 T1 T2 T3 T4)
955 6 => (T0 T1 T2 T3 T4 T5)
956 7 => (T0 T1 T2 T3 T4 T5 T6)
957 8 => (T0 T1 T2 T3 T4 T5 T6 T7)
958 9 => (T0 T1 T2 T3 T4 T5 T6 T7 T8)
959 10 => (T0 T1 T2 T3 T4 T5 T6 T7 T8 T9)
960 11 => (T0 T1 T2 T3 T4 T5 T6 T7 T8 T9 T10)
961 12 => (T0 T1 T2 T3 T4 T5 T6 T7 T8 T9 T10 T11)
962 13 => (T0 T1 T2 T3 T4 T5 T6 T7 T8 T9 T10 T11 T12)
963 14 => (T0 T1 T2 T3 T4 T5 T6 T7 T8 T9 T10 T11 T12 T13)
964 15 => (T0 T1 T2 T3 T4 T5 T6 T7 T8 T9 T10 T11 T12 T13 T14)
965 16 => (T0 T1 T2 T3 T4 T5 T6 T7 T8 T9 T10 T11 T12 T13 T14 T15)
966}
967
968#[cfg(test)]
969mod tests {
970 use apache_avro_test_helper::TestResult;
971
972 use crate::{
973 AvroSchema, Schema,
974 reader::datum::GenericDatumReader,
975 schema::{FixedSchema, Name},
976 writer::datum::GenericDatumWriter,
977 };
978
979 #[test]
980 fn avro_rs_401_str() -> TestResult {
981 let schema = str::get_schema();
982 assert_eq!(schema, Schema::String);
983
984 Ok(())
985 }
986
987 #[test]
988 fn avro_rs_401_references() -> TestResult {
989 let schema_ref = <&str>::get_schema();
990 let schema_ref_mut = <&mut str>::get_schema();
991
992 assert_eq!(schema_ref, Schema::String);
993 assert_eq!(schema_ref_mut, Schema::String);
994
995 Ok(())
996 }
997
998 #[test]
999 fn avro_rs_401_slice() -> TestResult {
1000 let schema = <[u8]>::get_schema();
1001 assert_eq!(schema, Schema::array(Schema::Int).build());
1002
1003 Ok(())
1004 }
1005
1006 #[test]
1007 fn avro_rs_401_option_ref_slice_array() -> TestResult {
1008 let schema = <Option<&[u8]>>::get_schema();
1009 assert_eq!(
1010 schema,
1011 Schema::union(vec![Schema::Null, Schema::array(Schema::Int).build()])?
1012 );
1013
1014 Ok(())
1015 }
1016
1017 #[test]
1018 fn avro_rs_414_char() -> TestResult {
1019 let schema = char::get_schema();
1020 assert_eq!(schema, Schema::String);
1021
1022 Ok(())
1023 }
1024
1025 #[test]
1026 fn avro_rs_414_u64() -> TestResult {
1027 let schema = u64::get_schema();
1028 assert_eq!(
1029 schema,
1030 Schema::Fixed(FixedSchema {
1031 name: Name::new("u64")?,
1032 aliases: None,
1033 doc: None,
1034 size: 8,
1035 attributes: Default::default(),
1036 })
1037 );
1038
1039 Ok(())
1040 }
1041
1042 #[test]
1043 fn avro_rs_414_i128() -> TestResult {
1044 let schema = i128::get_schema();
1045 assert_eq!(
1046 schema,
1047 Schema::Fixed(FixedSchema {
1048 name: Name::new("i128")?,
1049 aliases: None,
1050 doc: None,
1051 size: 16,
1052 attributes: Default::default(),
1053 })
1054 );
1055
1056 Ok(())
1057 }
1058
1059 #[test]
1060 fn avro_rs_414_u128() -> TestResult {
1061 let schema = u128::get_schema();
1062 assert_eq!(
1063 schema,
1064 Schema::Fixed(FixedSchema {
1065 name: Name::new("u128")?,
1066 aliases: None,
1067 doc: None,
1068 size: 16,
1069 attributes: Default::default(),
1070 })
1071 );
1072
1073 Ok(())
1074 }
1075
1076 #[test]
1077 fn avro_rs_486_unit() -> TestResult {
1078 let schema = <()>::get_schema();
1079 assert_eq!(schema, Schema::Null);
1080
1081 Ok(())
1082 }
1083
1084 #[test]
1085 #[should_panic(
1086 expected = "Option<T> must produce a valid (non-nested) union: Error { details: Unions cannot contain duplicate types, found at least two Null }"
1087 )]
1088 fn avro_rs_489_some_unit() {
1089 <Option<()>>::get_schema();
1090 }
1091
1092 #[test]
1093 #[should_panic(
1094 expected = "Option<T> must produce a valid (non-nested) union: Error { details: Unions may not directly contain a union }"
1095 )]
1096 fn avro_rs_489_option_option() {
1097 <Option<Option<i32>>>::get_schema();
1098 }
1099
1100 #[test]
1101 fn avro_rs_512_std_time_duration() -> TestResult {
1102 let schema = Schema::parse_str(
1103 r#"{
1104 "type": "record",
1105 "name": "Duration",
1106 "fields": [
1107 { "name": "secs", "type": {"type": "fixed", "name": "u64", "size": 8} },
1108 { "name": "nanos", "type": "long" }
1109 ]
1110 }"#,
1111 )?;
1112 let zero = std::time::Duration::ZERO;
1113 let max = std::time::Duration::MAX;
1114 assert_eq!(schema, std::time::Duration::get_schema());
1115
1116 let writer = GenericDatumWriter::builder(&schema).build()?;
1117 let written_zero = writer.write_ser_to_vec(&zero)?;
1118 let written_max = writer.write_ser_to_vec(&max)?;
1119
1120 let reader = GenericDatumReader::builder(&schema).build()?;
1121 let read_zero = reader.read_deser(&mut &written_zero[..])?;
1122 assert_eq!(zero, read_zero);
1123 let read_max = reader.read_deser(&mut &written_max[..])?;
1124 assert_eq!(max, read_max);
1125 Ok(())
1126 }
1127
1128 #[test]
1129 fn avro_rs_512_0_array() -> TestResult {
1130 assert_eq!(Schema::Null, <[String; 0]>::get_schema());
1131 assert_eq!(Schema::Null, <[(); 0]>::get_schema());
1132 assert_eq!(Schema::Null, <[bool; 0]>::get_schema());
1133 Ok(())
1134 }
1135
1136 #[test]
1137 fn avro_rs_512_1_array() -> TestResult {
1138 assert_eq!(Schema::String, <[String; 1]>::get_schema());
1139 assert_eq!(Schema::Null, <[(); 1]>::get_schema());
1140 assert_eq!(Schema::Boolean, <[bool; 1]>::get_schema());
1141 Ok(())
1142 }
1143
1144 #[test]
1145 fn avro_rs_512_n_array() -> TestResult {
1146 let schema = Schema::parse_str(
1147 r#"{
1148 "type": "record",
1149 "name": "A5_s",
1150 "fields": [
1151 { "name": "field_0", "type": "string" },
1152 { "name": "field_1", "type": "string" },
1153 { "name": "field_2", "type": "string" },
1154 { "name": "field_3", "type": "string" },
1155 { "name": "field_4", "type": "string" }
1156 ]
1157 }"#,
1158 )?;
1159
1160 assert_eq!(schema, <[String; 5]>::get_schema());
1161 Ok(())
1162 }
1163
1164 #[test]
1165 fn avro_rs_512_n_array_complex_type() -> TestResult {
1166 let schema = Schema::parse_str(
1167 r#"{
1168 "type": "record",
1169 "name": "A2_u2_n_r4_uuid",
1170 "fields": [
1171 { "name": "field_0", "type": ["null", {"type": "fixed", "logicalType": "uuid", "size": 16, "name": "uuid"}], "default": null },
1172 { "name": "field_1", "type": ["null", "uuid"], "default": null }
1173 ]
1174 }"#,
1175 )?;
1176
1177 assert_eq!(schema, <[Option<uuid::Uuid>; 2]>::get_schema());
1178 Ok(())
1179 }
1180
1181 #[test]
1182 fn avro_rs_512_1_tuple() -> TestResult {
1183 assert_eq!(Schema::String, <(String,)>::get_schema());
1184 assert_eq!(Schema::Null, <((),)>::get_schema());
1185 assert_eq!(Schema::Boolean, <(bool,)>::get_schema());
1186 Ok(())
1187 }
1188
1189 #[test]
1190 fn avro_rs_512_n_tuple() -> TestResult {
1191 let schema = Schema::parse_str(
1192 r#"{
1193 "type": "record",
1194 "name": "T5_s_i_l_B_n",
1195 "fields": [
1196 { "name": "field_0", "type": "string" },
1197 { "name": "field_1", "type": "int" },
1198 { "name": "field_2", "type": "long" },
1199 { "name": "field_3", "type": "boolean" },
1200 { "name": "field_4", "type": "null" }
1201 ]
1202 }"#,
1203 )?;
1204
1205 assert_eq!(schema, <(String, i32, i64, bool, ())>::get_schema());
1206 Ok(())
1207 }
1208
1209 #[test]
1210 fn avro_rs_512_n_tuple_complex_type() -> TestResult {
1211 let schema = Schema::parse_str(
1212 r#"{
1213 "type": "record",
1214 "name": "T3_u2_n_r4_uuid_r4_uuid_s",
1215 "fields": [
1216 { "name": "field_0", "type": ["null", {"type": "fixed", "logicalType": "uuid", "size": 16, "name": "uuid"}], "default": null },
1217 { "name": "field_1", "type": "uuid" },
1218 { "name": "field_2", "type": "string" }
1219 ]
1220 }"#,
1221 )?;
1222
1223 assert_eq!(
1224 schema,
1225 <(Option<uuid::Uuid>, uuid::Uuid, String)>::get_schema()
1226 );
1227 Ok(())
1228 }
1229}