apache_avro/serde/derive.rs
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use std::{
19 borrow::Cow,
20 collections::{HashMap, HashSet},
21};
22
23use crate::{
24 Schema,
25 schema::{FixedSchema, Name, NamespaceRef, RecordField, RecordSchema, UnionSchema, UuidSchema},
26};
27
28/// Trait for types that serve as an Avro data model.
29///
30/// **Do not implement directly!** Either derive it or implement [`AvroSchemaComponent`] to get this trait
31/// through a blanket implementation.
32///
33/// ## Deriving `AvroSchema`
34///
35/// Using the custom derive requires that you enable the `"derive"` cargo
36/// feature in your `Cargo.toml`:
37///
38/// ```toml
39/// [dependencies]
40/// apache-avro = { version = "..", features = ["derive"] }
41/// ```
42///
43/// Then, you add the `#[derive(AvroSchema)]` annotation to your `struct` and
44/// `enum` type definition:
45///
46/// ```
47/// # use serde::{Serialize, Deserialize};
48/// # use apache_avro::AvroSchema;
49/// #[derive(AvroSchema, Serialize, Deserialize)]
50/// pub struct Foo {
51/// bar: Vec<Bar>,
52/// }
53///
54/// #[derive(AvroSchema, Serialize, Deserialize)]
55/// pub enum Bar {
56/// Spam,
57/// Maps
58/// }
59/// ```
60///
61/// This will implement [`AvroSchemaComponent`] for the type, and `AvroSchema`
62/// through the blanket implementation for `T: AvroSchemaComponent`.
63///
64/// When deriving `struct`s, every member must also implement `AvroSchemaComponent`.
65///
66/// ## Changing the generated schema
67///
68/// The derive macro will read both the `avro` and `serde` attributes to modify the generated schema.
69/// It will also check for compatibility between the various attributes.
70///
71/// #### Container attributes
72///
73/// - `#[serde(rename = "name")]`
74///
75// TODO: Should we check if `name` contains any dots? As that would imply a namespace
76/// Set the `name` of the schema to the given string. Defaults to the name of the type.
77///
78/// - `#[avro(namespace = "some.name.space")]`
79///
80/// Set the `namespace` of the schema. This will be the relative namespace if the schema is included
81/// in another schema.
82///
83/// - `#[avro(doc = "Some documentation")]`
84///
85/// Set the `doc` attribute of the schema. Defaults to the documentation of the type.
86///
87/// - `#[avro(default = r#"{"field": 42, "other": "Spam"}"#)]`
88///
89/// Provide the default value for this type when it is used in a field.
90///
91/// - `#[avro(alias = "name")]`
92///
93/// Set the `alias` attribute of the schema. Can be specified multiple times.
94///
95/// - `#[serde(rename_all = "camelCase")]`
96///
97/// Rename all the fields or variants in the schema to follow the given case convention. The possible values
98/// are `"lowercase"`, `"UPPERCASE"`, `"PascalCase"`, `"camelCase"`, `"snake_case"`, `"kebab-case"`,
99/// `"SCREAMING_SNAKE_CASE"`, `"SCREAMING-KEBAB-CASE"`.
100///
101/// - `#[serde(transparent)]`
102///
103/// Use the schema of the inner field directly. Is only allowed on structs with only one unskipped field.
104///
105///
106/// #### Variant attributes
107///
108/// - `#[serde(rename = "name")]`
109///
110/// Rename the variant to the given name.
111///
112///
113/// #### Field attributes
114///
115/// - `#[serde(rename = "name")]`
116///
117/// Rename the field name to the given name.
118///
119/// - `#[avro(doc = "Some documentation")]`
120///
121/// Set the `doc` attribute of the field. Defaults to the documentation of the field.
122///
123/// - `#[avro(default = ..)]`
124///
125/// Control the `default` attribute of the field. When not used, it will use [`AvroSchemaComponent::field_default`]
126/// to get the default value for a type. To remove the `default` attribute for a field, set `default` to `false`: `#[avro(default = false)]`.
127///
128/// To override or set a default value, provide a JSON string:
129///
130/// - Null: `#[avro(default = "null")]`
131/// - Boolean: `#[avro(default = "true")]`.
132/// - Number: `#[avro(default = "42")]` or `#[avro(default = "42.5")]`
133/// - String: `#[avro(default = r#""String needs extra quotes""#)]`.
134/// - Array: `#[avro(default = r#"["One", "Two", "Three"]"#)]`.
135/// - Object: `#[avro(default = r#"{"One": 1}"#)]`.
136///
137/// See [the specification](https://avro.apache.org/docs/++version++/specification/#schema-record)
138/// for details on how to map a type to a JSON value.
139///
140/// - `#[serde(alias = "name")]`
141///
142/// Set the `alias` attribute of the field. Can be specified multiple times.
143///
144/// - `#[serde(flatten)]`
145///
146/// Flatten the content of this field into the container it is defined in.
147///
148/// - `#[serde(skip)]`
149///
150/// Do not include this field in the schema.
151///
152/// - `#[serde(skip_serializing)]`
153///
154/// When combined with `#[serde(skip_deserializing)]`, don't include this field in the schema.
155/// Otherwise, it will be included in the schema and the `#[avro(default)]` attribute **must** be
156/// set. That value will be used for serializing.
157///
158/// - `#[serde(skip_serializing_if)]`
159///
160/// Conditionally use the value of the field or the value provided by `#[avro(default)]`. The
161/// `#[avro(default)]` attribute **must** be set.
162///
163/// - `#[avro(with)]` and `#[serde(with = "module")]`
164///
165/// Override the schema used for this field. See [Working with foreign types](#working-with-foreign-types).
166///
167/// #### Incompatible Serde attributes
168///
169/// The derive macro is compatible with most Serde attributes, but it is incompatible with
170/// the following attributes:
171///
172/// - Container attributes
173/// - `tag`
174/// - `content`
175/// - `untagged`
176/// - `variant_identifier`
177/// - `field_identifier`
178/// - `remote`
179/// - `rename_all(serialize = "..", deserialize = "..")` where `serialize` != `deserialize`
180/// - Variant attributes
181/// - `other`
182/// - `untagged`
183/// - Field attributes
184/// - `getter`
185///
186/// ## Working with foreign types
187///
188/// Most foreign types won't have a [`AvroSchema`] implementation. This crate implements it only
189/// for built-in types and [`uuid::Uuid`].
190///
191/// To still be able to derive schemas for fields of foreign types, the `#[avro(with)`]
192/// attribute can be used to get the schema for those fields. It can be used in two ways:
193///
194/// 1. In combination with `#[serde(with = "path::to::module)]`
195///
196/// To get the schema, it will call the functions `fn get_schema_in_ctxt(&mut HashSet<Name>, NamespaceRef) -> Schema`
197/// and `fn get_record_fields_in_ctxt(&mut HashSet<Name>, NamespaceRef) -> Option<Vec<RecordField>>` in the module provided
198/// to the Serde attribute. See [`AvroSchemaComponent`] for details on how to implement those
199/// functions.
200///
201/// 2. By providing a function directly, `#[avro(with = some_fn)]`.
202///
203/// To get the schema, it will call the function provided. It must have the signature
204/// `fn(&mut HashSet<Name>, NamespaceRef) -> Schema`. When this is used for a `transparent` struct, the
205/// default implementation of [`AvroSchemaComponent::get_record_fields_in_ctxt`] will be used.
206/// This is only recommended for primitive types, as the default implementation cannot be efficiently
207/// implemented for complex types.
208///
209pub trait AvroSchema {
210 /// Construct the full schema that represents this type.
211 ///
212 /// The returned schema is fully independent and contains only `Schema::Ref` to named types defined
213 /// earlier in the schema.
214 fn get_schema() -> Schema;
215}
216
217/// Trait for types that serve as fully defined components inside an Avro data model.
218///
219/// This trait can be derived with [`#[derive(AvroSchema)]`](AvroSchema) when the `derive` feature is enabled.
220///
221/// # Implementation guide
222///
223/// ### Implementation for returning primitive types
224/// When the schema you want to return is a primitive type (a type without a name), the function
225/// arguments can be ignored.
226///
227/// For example, you have a custom integer type:
228/// ```
229/// # use apache_avro::{Schema, serde::{AvroSchemaComponent}, schema::{Name, NamespaceRef, RecordField}};
230/// # use std::collections::HashSet;
231/// // Make sure to implement `Serialize` and `Deserialize` to use the right serialization methods
232/// pub struct U24([u8; 3]);
233/// impl AvroSchemaComponent for U24 {
234/// fn get_schema_in_ctxt(_: &mut HashSet<Name>, _: NamespaceRef) -> Schema {
235/// Schema::Int
236/// }
237///
238/// fn get_record_fields_in_ctxt(_: &mut HashSet<Name>, _: NamespaceRef) -> Option<Vec<RecordField>> {
239/// None // A Schema::Int is not a Schema::Record so there are no fields to return
240/// }
241///
242/// fn field_default() -> Option<serde_json::Value> {
243/// // Zero as default value. Can also be None if you don't want to provide a default value
244/// Some(0u8.into())
245/// }
246///}
247/// ```
248///
249/// ### Passthrough implementation
250///
251/// To construct a schema for a type is "transparent", such as for smart pointers, simply
252/// pass through the arguments to the inner type:
253/// ```
254/// # use apache_avro::{Schema, serde::{AvroSchemaComponent}, schema::{Name, NamespaceRef, RecordField}};
255/// # use serde::{Serialize, Deserialize};
256/// # use std::collections::HashSet;
257/// #[derive(Serialize, Deserialize)]
258/// #[serde(transparent)] // This attribute is important for all passthrough implementations!
259/// pub struct Transparent<T>(T);
260/// impl<T: AvroSchemaComponent> AvroSchemaComponent for Transparent<T> {
261/// fn get_schema_in_ctxt(named_schemas: &mut HashSet<Name>, enclosing_namespace: NamespaceRef) -> Schema {
262/// T::get_schema_in_ctxt(named_schemas, enclosing_namespace)
263/// }
264///
265/// fn get_record_fields_in_ctxt(named_schemas: &mut HashSet<Name>, enclosing_namespace: NamespaceRef) -> Option<Vec<RecordField>> {
266/// T::get_record_fields_in_ctxt(named_schemas, enclosing_namespace)
267/// }
268///
269/// fn field_default() -> Option<serde_json::Value> {
270/// T::field_default()
271/// }
272///}
273/// ```
274///
275/// ### Implementation for complex types
276/// When the schema you want to return is a complex type (a type with a name), special care has to
277/// be taken to avoid duplicate type definitions and getting the correct namespace.
278///
279/// Things to keep in mind:
280/// - If the fully qualified name already exists, return a [`Schema::Ref`]
281/// - Use the `AvroSchemaComponent` implementations to get the schemas for the subtypes
282/// - The ordering of fields in the schema **must** match with the ordering in Serde
283/// - Implement `get_record_fields_in_ctxt` as the default implementation has to be implemented
284/// with backtracking and a lot of cloning.
285/// - Even if your schema is not a record, still implement the function and just return `None`
286/// - Implement `field_default()` if you want to use `#[serde(skip_serializing{,_if})]`.
287///
288/// ```
289/// # use apache_avro::{Schema, serde::{AvroSchemaComponent}, schema::{Name, NamespaceRef, RecordField, RecordSchema}};
290/// # use serde::{Serialize, Deserialize};
291/// # use std::{time::Duration, collections::HashSet};
292/// pub struct Foo {
293/// one: String,
294/// two: i32,
295/// three: Option<Duration>
296/// }
297///
298/// impl AvroSchemaComponent for Foo {
299/// fn get_schema_in_ctxt(named_schemas: &mut HashSet<Name>, enclosing_namespace: NamespaceRef) -> Schema {
300/// // Create the fully qualified name for your type given the enclosing namespace
301/// let name = Name::new_with_enclosing_namespace("Foo", enclosing_namespace).expect("Name is valid");
302/// if named_schemas.contains(&name) {
303/// Schema::Ref { name }
304/// } else {
305/// let enclosing_namespace = name.namespace();
306/// // Do this before you start creating the schema, as otherwise recursive types will cause infinite recursion.
307/// named_schemas.insert(name.clone());
308/// let schema = Schema::Record(RecordSchema::builder()
309/// .name(name.clone())
310/// .fields(Self::get_record_fields_in_ctxt(named_schemas, enclosing_namespace).expect("Impossible!"))
311/// .build()
312/// );
313/// schema
314/// }
315/// }
316///
317/// fn get_record_fields_in_ctxt(named_schemas: &mut HashSet<Name>, enclosing_namespace: NamespaceRef) -> Option<Vec<RecordField>> {
318/// Some(vec![
319/// RecordField::builder()
320/// .name("one")
321/// .schema(String::get_schema_in_ctxt(named_schemas, enclosing_namespace))
322/// .build(),
323/// RecordField::builder()
324/// .name("two")
325/// .schema(i32::get_schema_in_ctxt(named_schemas, enclosing_namespace))
326/// .build(),
327/// RecordField::builder()
328/// .name("three")
329/// .schema(<Option<Duration>>::get_schema_in_ctxt(named_schemas, enclosing_namespace))
330/// .build(),
331/// ])
332/// }
333///
334/// fn field_default() -> Option<serde_json::Value> {
335/// // This type does not provide a default value
336/// None
337/// }
338///}
339/// ```
340pub trait AvroSchemaComponent {
341 /// Get the schema for this component
342 fn get_schema_in_ctxt(
343 named_schemas: &mut HashSet<Name>,
344 enclosing_namespace: NamespaceRef,
345 ) -> Schema;
346
347 /// Get the fields of this schema if it is a record.
348 ///
349 /// This returns `None` if the schema is not a record.
350 ///
351 /// The default implementation has to do a lot of extra work, so it is strongly recommended to
352 /// implement this function when manually implementing this trait.
353 fn get_record_fields_in_ctxt(
354 named_schemas: &mut HashSet<Name>,
355 enclosing_namespace: NamespaceRef,
356 ) -> Option<Vec<RecordField>> {
357 get_record_fields_in_ctxt(named_schemas, enclosing_namespace, Self::get_schema_in_ctxt)
358 }
359
360 /// The default value of this type when used for a record field.
361 ///
362 /// `None` means no default value, which is also the default implementation.
363 ///
364 /// Implementations of this trait provided by this crate return `None` except for `Option<T>`
365 /// which returns `Some(serde_json::Value::Null)`.
366 fn field_default() -> Option<serde_json::Value> {
367 None
368 }
369}
370
371/// Get the record fields from `schema_fn` without polluting `named_schemas` or causing duplicate names
372///
373/// This is public so the derive macro can use it for `#[avro(with = ||)]` and `#[avro(with = path)]`
374#[doc(hidden)]
375pub fn get_record_fields_in_ctxt(
376 named_schemas: &mut HashSet<Name>,
377 enclosing_namespace: NamespaceRef,
378 schema_fn: fn(named_schemas: &mut HashSet<Name>, enclosing_namespace: NamespaceRef) -> Schema,
379) -> Option<Vec<RecordField>> {
380 let mut record = match schema_fn(named_schemas, enclosing_namespace) {
381 Schema::Record(record) => record,
382 Schema::Ref { name } => {
383 // This schema already exists in `named_schemas` so temporarily remove it so we can
384 // get the actual schema.
385 assert!(
386 named_schemas.remove(&name),
387 "Name '{name}' should exist in `named_schemas` otherwise Ref is invalid: {named_schemas:?}"
388 );
389 // Get the schema
390 let schema = schema_fn(named_schemas, enclosing_namespace);
391 // Reinsert the old value
392 named_schemas.insert(name);
393
394 // Now check if we actually got a record and return the fields if that is the case
395 let Schema::Record(record) = schema else {
396 return None;
397 };
398 return Some(record.fields);
399 }
400 _ => return None,
401 };
402 // This schema did not yet exist in `named_schemas`, so we need to remove it if and only if
403 // it isn't used somewhere in the schema (recursive type).
404
405 // Find the first Schema::Ref that has the target name
406 fn find_first_ref<'a>(schema: &'a mut Schema, target: &Name) -> Option<&'a mut Schema> {
407 match schema {
408 Schema::Ref { name } if name == target => Some(schema),
409 Schema::Array(array) => find_first_ref(&mut array.items, target),
410 Schema::Map(map) => find_first_ref(&mut map.types, target),
411 Schema::Union(union) => {
412 for schema in &mut union.schemas {
413 if let Some(schema) = find_first_ref(schema, target) {
414 return Some(schema);
415 }
416 }
417 None
418 }
419 Schema::Record(record) => {
420 assert_ne!(
421 &record.name, target,
422 "Only expecting a Ref named {target:?}"
423 );
424 for field in &mut record.fields {
425 if let Some(schema) = find_first_ref(&mut field.schema, target) {
426 return Some(schema);
427 }
428 }
429 None
430 }
431 _ => None,
432 }
433 }
434
435 // Prepare the fields for the new record. All named types will become references.
436 let new_fields = record
437 .fields
438 .iter()
439 .map(|field| RecordField {
440 name: field.name.clone(),
441 doc: field.doc.clone(),
442 aliases: field.aliases.clone(),
443 default: field.default.clone(),
444 schema: if field.schema.is_named() {
445 Schema::Ref {
446 name: field.schema.name().expect("Schema is named").clone(),
447 }
448 } else {
449 field.schema.clone()
450 },
451 custom_attributes: field.custom_attributes.clone(),
452 })
453 .collect();
454
455 // Remove the name in case it is not used
456 named_schemas.remove(&record.name);
457
458 // Find the first reference to this schema so we can replace it with the actual schema
459 for field in &mut record.fields {
460 if let Some(schema) = find_first_ref(&mut field.schema, &record.name) {
461 let new_schema = RecordSchema {
462 name: record.name,
463 aliases: record.aliases,
464 doc: record.doc,
465 fields: new_fields,
466 lookup: record.lookup,
467 attributes: record.attributes,
468 };
469
470 let name = match std::mem::replace(schema, Schema::Record(new_schema)) {
471 Schema::Ref { name } => name,
472 schema => {
473 panic!("Only expected `Schema::Ref` from `find_first_ref`, got: {schema:?}")
474 }
475 };
476
477 // The schema is used, so reinsert it
478 named_schemas.insert(name.clone());
479
480 break;
481 }
482 }
483
484 Some(record.fields)
485}
486
487impl<T> AvroSchema for T
488where
489 T: AvroSchemaComponent + ?Sized,
490{
491 fn get_schema() -> Schema {
492 T::get_schema_in_ctxt(&mut HashSet::default(), None)
493 }
494}
495
496macro_rules! impl_schema (
497 ($type:ty, $variant_constructor:expr) => (
498 impl AvroSchemaComponent for $type {
499 fn get_schema_in_ctxt(_: &mut HashSet<Name>, _: NamespaceRef) -> Schema {
500 $variant_constructor
501 }
502
503 fn get_record_fields_in_ctxt(_: &mut HashSet<Name>, _: NamespaceRef) -> Option<Vec<RecordField>> {
504 None
505 }
506 }
507 );
508);
509
510impl_schema!(bool, Schema::Boolean);
511impl_schema!(i8, Schema::Int);
512impl_schema!(i16, Schema::Int);
513impl_schema!(i32, Schema::Int);
514impl_schema!(i64, Schema::Long);
515impl_schema!(u8, Schema::Int);
516impl_schema!(u16, Schema::Int);
517impl_schema!(u32, Schema::Long);
518impl_schema!(f32, Schema::Float);
519impl_schema!(f64, Schema::Double);
520impl_schema!(String, Schema::String);
521impl_schema!(str, Schema::String);
522impl_schema!(char, Schema::String);
523impl_schema!((), Schema::Null);
524
525macro_rules! impl_passthrough_schema (
526 ($type:ty where T: AvroSchemaComponent + ?Sized $(+ $bound:tt)*) => (
527 impl<T: AvroSchemaComponent $(+ $bound)* + ?Sized> AvroSchemaComponent for $type {
528 fn get_schema_in_ctxt(named_schemas: &mut HashSet<Name>, enclosing_namespace: NamespaceRef) -> Schema {
529 T::get_schema_in_ctxt(named_schemas, enclosing_namespace)
530 }
531
532 fn get_record_fields_in_ctxt(named_schemas: &mut HashSet<Name>, enclosing_namespace: NamespaceRef) -> Option<Vec<RecordField>> {
533 T::get_record_fields_in_ctxt(named_schemas, enclosing_namespace)
534 }
535
536 fn field_default() -> Option<serde_json::Value> {
537 T::field_default()
538 }
539 }
540 );
541);
542
543impl_passthrough_schema!(&T where T: AvroSchemaComponent + ?Sized);
544impl_passthrough_schema!(&mut T where T: AvroSchemaComponent + ?Sized);
545impl_passthrough_schema!(Box<T> where T: AvroSchemaComponent + ?Sized);
546impl_passthrough_schema!(Cow<'_, T> where T: AvroSchemaComponent + ?Sized + ToOwned);
547impl_passthrough_schema!(std::sync::Mutex<T> where T: AvroSchemaComponent + ?Sized);
548
549macro_rules! impl_array_schema (
550 ($type:ty where T: AvroSchemaComponent) => (
551 impl<T: AvroSchemaComponent> AvroSchemaComponent for $type {
552 fn get_schema_in_ctxt(named_schemas: &mut HashSet<Name>, enclosing_namespace: NamespaceRef) -> Schema {
553 Schema::array(T::get_schema_in_ctxt(named_schemas, enclosing_namespace)).build()
554 }
555
556 fn get_record_fields_in_ctxt(_: &mut HashSet<Name>, _: NamespaceRef) -> Option<Vec<RecordField>> {
557 None
558 }
559 }
560 );
561);
562
563impl_array_schema!([T] where T: AvroSchemaComponent);
564impl_array_schema!(Vec<T> where T: AvroSchemaComponent);
565
566impl<T> AvroSchemaComponent for HashMap<String, T>
567where
568 T: AvroSchemaComponent,
569{
570 fn get_schema_in_ctxt(
571 named_schemas: &mut HashSet<Name>,
572 enclosing_namespace: NamespaceRef,
573 ) -> Schema {
574 Schema::map(T::get_schema_in_ctxt(named_schemas, enclosing_namespace)).build()
575 }
576
577 fn get_record_fields_in_ctxt(
578 _: &mut HashSet<Name>,
579 _: NamespaceRef,
580 ) -> Option<Vec<RecordField>> {
581 None
582 }
583}
584
585impl<T> AvroSchemaComponent for Option<T>
586where
587 T: AvroSchemaComponent,
588{
589 fn get_schema_in_ctxt(
590 named_schemas: &mut HashSet<Name>,
591 enclosing_namespace: NamespaceRef,
592 ) -> Schema {
593 let variants = vec![
594 Schema::Null,
595 T::get_schema_in_ctxt(named_schemas, enclosing_namespace),
596 ];
597
598 Schema::Union(
599 UnionSchema::new(variants).expect("Option<T> must produce a valid (non-nested) union"),
600 )
601 }
602
603 fn get_record_fields_in_ctxt(
604 _: &mut HashSet<Name>,
605 _: NamespaceRef,
606 ) -> Option<Vec<RecordField>> {
607 None
608 }
609
610 fn field_default() -> Option<serde_json::Value> {
611 Some(serde_json::Value::Null)
612 }
613}
614
615impl AvroSchemaComponent for core::time::Duration {
616 /// The schema is [`Schema::Record`] with the name `org.apache.avro.rust.Duration`.
617 ///
618 /// It has two fields:
619 /// - `secs` with the schema `Schema::Fixed(name: "org.apache.avro.rust.u64", size: 8)`
620 /// - `nanos` with the schema `Schema::Long`
621 fn get_schema_in_ctxt(
622 named_schemas: &mut HashSet<Name>,
623 enclosing_namespace: NamespaceRef,
624 ) -> Schema {
625 let name = Name::new("org.apache.avro.rust.Duration").expect("Name is valid");
626 if named_schemas.contains(&name) {
627 Schema::Ref { name }
628 } else {
629 named_schemas.insert(name.clone());
630 Schema::record(name)
631 .fields(
632 Self::get_record_fields_in_ctxt(named_schemas, enclosing_namespace)
633 .expect("Unreachable!"),
634 )
635 .build()
636 }
637 }
638
639 fn get_record_fields_in_ctxt(
640 named_schemas: &mut HashSet<Name>,
641 enclosing_namespace: NamespaceRef,
642 ) -> Option<Vec<RecordField>> {
643 Some(vec![
644 // Secs is an u64
645 RecordField::builder()
646 .name("secs")
647 .schema(u64::get_schema_in_ctxt(named_schemas, enclosing_namespace))
648 .build(),
649 // Nanos is an u32
650 RecordField::builder()
651 .name("nanos")
652 .schema(Schema::Long)
653 .build(),
654 ])
655 }
656}
657
658impl AvroSchemaComponent for uuid::Uuid {
659 /// The schema is [`Schema::Uuid`] with the name `org.apache.avro.rust.Uuid`.
660 ///
661 /// The underlying schema is [`Schema::Fixed`] with a size of 16.
662 ///
663 /// If you're using `human_readable: true` you need to override this schema with a `Schema::String`.
664 fn get_schema_in_ctxt(named_schemas: &mut HashSet<Name>, _: NamespaceRef) -> Schema {
665 let name = Name::new("org.apache.avro.rust.Uuid").expect("Name is valid");
666 if named_schemas.contains(&name) {
667 Schema::Ref { name }
668 } else {
669 let schema = Schema::Uuid(UuidSchema::Fixed(FixedSchema {
670 name: name.clone(),
671 aliases: None,
672 doc: None,
673 size: 16,
674 attributes: Default::default(),
675 }));
676 named_schemas.insert(name);
677 schema
678 }
679 }
680
681 fn get_record_fields_in_ctxt(
682 _: &mut HashSet<Name>,
683 _: NamespaceRef,
684 ) -> Option<Vec<RecordField>> {
685 None
686 }
687}
688
689impl AvroSchemaComponent for u64 {
690 /// The schema is [`Schema::Fixed`] of size 8 with the name `org.apache.avro.rust.u64`.
691 fn get_schema_in_ctxt(named_schemas: &mut HashSet<Name>, _: NamespaceRef) -> Schema {
692 let name = Name::new("org.apache.avro.rust.u64").expect("Name is valid");
693 if named_schemas.contains(&name) {
694 Schema::Ref { name }
695 } else {
696 let schema = Schema::Fixed(FixedSchema {
697 name: name.clone(),
698 aliases: None,
699 doc: None,
700 size: 8,
701 attributes: Default::default(),
702 });
703 named_schemas.insert(name);
704 schema
705 }
706 }
707
708 fn get_record_fields_in_ctxt(
709 _: &mut HashSet<Name>,
710 _: NamespaceRef,
711 ) -> Option<Vec<RecordField>> {
712 None
713 }
714}
715
716impl AvroSchemaComponent for u128 {
717 /// The schema is [`Schema::Fixed`] of size 16 with the name `org.apache.avro.rust.u128`.
718 fn get_schema_in_ctxt(named_schemas: &mut HashSet<Name>, _: NamespaceRef) -> Schema {
719 let name = Name::new("org.apache.avro.rust.u128").expect("Name is valid");
720 if named_schemas.contains(&name) {
721 Schema::Ref { name }
722 } else {
723 let schema = Schema::Fixed(FixedSchema {
724 name: name.clone(),
725 aliases: None,
726 doc: None,
727 size: 16,
728 attributes: Default::default(),
729 });
730 named_schemas.insert(name);
731 schema
732 }
733 }
734
735 fn get_record_fields_in_ctxt(
736 _: &mut HashSet<Name>,
737 _: NamespaceRef,
738 ) -> Option<Vec<RecordField>> {
739 None
740 }
741}
742
743impl AvroSchemaComponent for i128 {
744 /// The schema is [`Schema::Fixed`] of size 16 with the name `org.apache.avro.rust.i128`.
745 fn get_schema_in_ctxt(named_schemas: &mut HashSet<Name>, _: NamespaceRef) -> Schema {
746 let name = Name::new("org.apache.avro.rust.i128").expect("Name is valid");
747 if named_schemas.contains(&name) {
748 Schema::Ref { name }
749 } else {
750 let schema = Schema::Fixed(FixedSchema {
751 name: name.clone(),
752 aliases: None,
753 doc: None,
754 size: 16,
755 attributes: Default::default(),
756 });
757 named_schemas.insert(name);
758 schema
759 }
760 }
761
762 fn get_record_fields_in_ctxt(
763 _: &mut HashSet<Name>,
764 _: NamespaceRef,
765 ) -> Option<Vec<RecordField>> {
766 None
767 }
768}
769
770/// Schema definition for `[T; N]`
771///
772/// Schema is defined as follows:
773/// - 0-sized arrays: [`Schema::Null`]
774/// - 1-sized arrays: `T::get_schema_in_ctxt`
775/// - N-sized arrays: [`Schema::Record`] with a field for every index
776///
777/// If you need or want a [`Schema::Array`], [`Schema::Bytes`], or [`Schema::Fixed`] instead,
778/// use [`apache_avro::serde::array`], [`apache_avro::serde::bytes`], or [`apache_avro::serde::fixed`] respectively.
779///
780/// [`apache_avro::serde::array`]: crate::serde::array
781/// [`apache_avro::serde::bytes`]: crate::serde::bytes
782/// [`apache_avro::serde::fixed`]: crate::serde::fixed
783impl<const N: usize, T: AvroSchemaComponent> AvroSchemaComponent for [T; N] {
784 fn get_schema_in_ctxt(
785 named_schemas: &mut HashSet<Name>,
786 enclosing_namespace: NamespaceRef,
787 ) -> Schema {
788 if N == 0 {
789 Schema::Null
790 } else if N == 1 {
791 T::get_schema_in_ctxt(named_schemas, enclosing_namespace)
792 } else {
793 let t_schema = T::get_schema_in_ctxt(named_schemas, enclosing_namespace);
794 let name = Name::new_with_enclosing_namespace(
795 format!("A{N}_{}", t_schema.unique_normalized_name()),
796 enclosing_namespace,
797 )
798 .expect("Name is valid");
799 if named_schemas.contains(&name) {
800 Schema::Ref { name }
801 } else {
802 named_schemas.insert(name.clone());
803
804 let t_default = T::field_default();
805 // If T is a named schema or contains named schemas, they'll now be a reference.
806 let t_ref = T::get_schema_in_ctxt(named_schemas, enclosing_namespace);
807 let fields = std::iter::once(
808 RecordField::builder()
809 .name("field_0".to_string())
810 .schema(t_schema)
811 .maybe_default(t_default.clone())
812 .build(),
813 )
814 .chain((1..N).map(|n| {
815 RecordField::builder()
816 .name(format!("field_{n}"))
817 .schema(t_ref.clone())
818 .maybe_default(t_default.clone())
819 .build()
820 }))
821 .collect();
822
823 Schema::record(name).fields(fields).build()
824 }
825 }
826 }
827
828 fn get_record_fields_in_ctxt(
829 named_schemas: &mut HashSet<Name>,
830 enclosing_namespace: NamespaceRef,
831 ) -> Option<Vec<RecordField>> {
832 if N == 0 {
833 None
834 } else if N == 1 {
835 T::get_record_fields_in_ctxt(named_schemas, enclosing_namespace)
836 } else {
837 let t_schema = T::get_schema_in_ctxt(named_schemas, enclosing_namespace);
838 let t_default = T::field_default();
839 // If T is a named schema or contains named schemas, they'll now be a reference.
840 let t_ref = T::get_schema_in_ctxt(named_schemas, enclosing_namespace);
841 let fields = std::iter::once(
842 RecordField::builder()
843 .name("field_0".to_string())
844 .schema(t_schema)
845 .maybe_default(t_default.clone())
846 .build(),
847 )
848 .chain((1..N).map(|n| {
849 RecordField::builder()
850 .name(format!("field_{n}"))
851 .schema(t_ref.clone())
852 .maybe_default(t_default.clone())
853 .build()
854 }))
855 .collect();
856 Some(fields)
857 }
858 }
859
860 /// `None` for 0-sized and N-sized arrays, `T::field_default` for 1-sized arrays
861 fn field_default() -> Option<serde_json::Value> {
862 if N == 1 { T::field_default() } else { None }
863 }
864}
865
866/// Schema definition for `(T₁, T₂, …, Tₙ)`.
867///
868/// Implemented for tuples of up to 16 elements.
869///
870/// Schema is defined as follows:
871/// - 1-tuple: `T::get_schema_in_ctxt`
872/// - N-tuple: [`Schema::Record`] with a field for every element
873#[cfg_attr(docsrs, doc(fake_variadic))]
874impl<T: AvroSchemaComponent> AvroSchemaComponent for (T,) {
875 fn get_schema_in_ctxt(
876 named_schemas: &mut HashSet<Name>,
877 enclosing_namespace: NamespaceRef,
878 ) -> Schema {
879 T::get_schema_in_ctxt(named_schemas, enclosing_namespace)
880 }
881
882 fn get_record_fields_in_ctxt(
883 named_schemas: &mut HashSet<Name>,
884 enclosing_namespace: NamespaceRef,
885 ) -> Option<Vec<RecordField>> {
886 T::get_record_fields_in_ctxt(named_schemas, enclosing_namespace)
887 }
888
889 /// `None` for N-tuples, `T::field_default()` for 1-tuple.
890 fn field_default() -> Option<serde_json::Value> {
891 T::field_default()
892 }
893}
894
895macro_rules! tuple_impls {
896 ($($len:expr => ($($name:ident)+))+) => {
897 $(
898 #[cfg_attr(docsrs, doc(hidden))]
899 impl<$($name: AvroSchemaComponent),+> AvroSchemaComponent for ($($name),+) {
900 fn get_schema_in_ctxt(named_schemas: &mut HashSet<Name>, enclosing_namespace: NamespaceRef) -> Schema {
901 let schemas: [Schema; $len] = [$($name::get_schema_in_ctxt(named_schemas, enclosing_namespace)),+];
902
903 let mut name = format!("T{}", $len);
904 for schema in &schemas {
905 name.push('_');
906 name.push_str(&schema.unique_normalized_name());
907 }
908 let name = Name::new_with_enclosing_namespace(name, enclosing_namespace).expect("Name is valid");
909
910 if named_schemas.contains(&name) {
911 Schema::Ref { name }
912 } else {
913 named_schemas.insert(name.clone());
914
915 let defaults: [Option<serde_json::Value>; $len] = [$($name::field_default()),+];
916
917 let fields = schemas.into_iter().zip(defaults.into_iter()).enumerate().map(|(n, (schema, default))| {
918 RecordField::builder()
919 .name(format!("field_{n}"))
920 .schema(schema)
921 .maybe_default(default)
922 .build()
923 }).collect();
924
925 Schema::record(name).fields(fields).build()
926 }
927 }
928 }
929 )+
930 }
931}
932
933tuple_impls! {
934 2 => (T0 T1)
935 3 => (T0 T1 T2)
936 4 => (T0 T1 T2 T3)
937 5 => (T0 T1 T2 T3 T4)
938 6 => (T0 T1 T2 T3 T4 T5)
939 7 => (T0 T1 T2 T3 T4 T5 T6)
940 8 => (T0 T1 T2 T3 T4 T5 T6 T7)
941 9 => (T0 T1 T2 T3 T4 T5 T6 T7 T8)
942 10 => (T0 T1 T2 T3 T4 T5 T6 T7 T8 T9)
943 11 => (T0 T1 T2 T3 T4 T5 T6 T7 T8 T9 T10)
944 12 => (T0 T1 T2 T3 T4 T5 T6 T7 T8 T9 T10 T11)
945 13 => (T0 T1 T2 T3 T4 T5 T6 T7 T8 T9 T10 T11 T12)
946 14 => (T0 T1 T2 T3 T4 T5 T6 T7 T8 T9 T10 T11 T12 T13)
947 15 => (T0 T1 T2 T3 T4 T5 T6 T7 T8 T9 T10 T11 T12 T13 T14)
948 16 => (T0 T1 T2 T3 T4 T5 T6 T7 T8 T9 T10 T11 T12 T13 T14 T15)
949}
950
951#[cfg(test)]
952mod tests {
953 use apache_avro_test_helper::TestResult;
954
955 use crate::{
956 AvroSchema, Schema,
957 reader::datum::GenericDatumReader,
958 schema::{FixedSchema, Name},
959 writer::datum::GenericDatumWriter,
960 };
961
962 #[test]
963 fn avro_rs_401_str() -> TestResult {
964 let schema = str::get_schema();
965 assert_eq!(schema, Schema::String);
966
967 Ok(())
968 }
969
970 #[test]
971 fn avro_rs_401_references() -> TestResult {
972 let schema_ref = <&str>::get_schema();
973 let schema_ref_mut = <&mut str>::get_schema();
974
975 assert_eq!(schema_ref, Schema::String);
976 assert_eq!(schema_ref_mut, Schema::String);
977
978 Ok(())
979 }
980
981 #[test]
982 fn avro_rs_401_slice() -> TestResult {
983 let schema = <[u8]>::get_schema();
984 assert_eq!(schema, Schema::array(Schema::Int).build());
985
986 Ok(())
987 }
988
989 #[test]
990 fn avro_rs_401_option_ref_slice_array() -> TestResult {
991 let schema = <Option<&[u8]>>::get_schema();
992 assert_eq!(
993 schema,
994 Schema::union(vec![Schema::Null, Schema::array(Schema::Int).build()])?
995 );
996
997 Ok(())
998 }
999
1000 #[test]
1001 fn avro_rs_414_char() -> TestResult {
1002 let schema = char::get_schema();
1003 assert_eq!(schema, Schema::String);
1004
1005 Ok(())
1006 }
1007
1008 #[test]
1009 fn avro_rs_414_u64() -> TestResult {
1010 let schema = u64::get_schema();
1011 assert_eq!(
1012 schema,
1013 Schema::Fixed(FixedSchema {
1014 name: Name::new("org.apache.avro.rust.u64")?,
1015 aliases: None,
1016 doc: None,
1017 size: 8,
1018 attributes: Default::default(),
1019 })
1020 );
1021
1022 Ok(())
1023 }
1024
1025 #[test]
1026 fn avro_rs_414_i128() -> TestResult {
1027 let schema = i128::get_schema();
1028 assert_eq!(
1029 schema,
1030 Schema::Fixed(FixedSchema {
1031 name: Name::new("org.apache.avro.rust.i128")?,
1032 aliases: None,
1033 doc: None,
1034 size: 16,
1035 attributes: Default::default(),
1036 })
1037 );
1038
1039 Ok(())
1040 }
1041
1042 #[test]
1043 fn avro_rs_414_u128() -> TestResult {
1044 let schema = u128::get_schema();
1045 assert_eq!(
1046 schema,
1047 Schema::Fixed(FixedSchema {
1048 name: Name::new("org.apache.avro.rust.u128")?,
1049 aliases: None,
1050 doc: None,
1051 size: 16,
1052 attributes: Default::default(),
1053 })
1054 );
1055
1056 Ok(())
1057 }
1058
1059 #[test]
1060 fn avro_rs_486_unit() -> TestResult {
1061 let schema = <()>::get_schema();
1062 assert_eq!(schema, Schema::Null);
1063
1064 Ok(())
1065 }
1066
1067 #[test]
1068 #[should_panic(
1069 expected = "Option<T> must produce a valid (non-nested) union: Error { details: Unions cannot contain duplicate types, found at least two Null }"
1070 )]
1071 fn avro_rs_489_some_unit() {
1072 <Option<()>>::get_schema();
1073 }
1074
1075 #[test]
1076 #[should_panic(
1077 expected = "Option<T> must produce a valid (non-nested) union: Error { details: Unions may not directly contain a union }"
1078 )]
1079 fn avro_rs_489_option_option() {
1080 <Option<Option<i32>>>::get_schema();
1081 }
1082
1083 #[test]
1084 fn avro_rs_512_std_time_duration() -> TestResult {
1085 let schema = Schema::parse_str(
1086 r#"{
1087 "type": "record",
1088 "name": "Duration",
1089 "namespace": "org.apache.avro.rust",
1090 "fields": [
1091 { "name": "secs", "type": {"type": "fixed", "name": "u64", "namespace": "org.apache.avro.rust", "size": 8} },
1092 { "name": "nanos", "type": "long" }
1093 ]
1094 }"#,
1095 )?;
1096 let zero = std::time::Duration::ZERO;
1097 let max = std::time::Duration::MAX;
1098 assert_eq!(schema, std::time::Duration::get_schema());
1099
1100 let writer = GenericDatumWriter::builder(&schema).build()?;
1101 let written_zero = writer.write_ser_to_vec(&zero)?;
1102 let written_max = writer.write_ser_to_vec(&max)?;
1103
1104 let reader = GenericDatumReader::builder(&schema).build()?;
1105 let read_zero = reader.read_deser(&mut &written_zero[..])?;
1106 assert_eq!(zero, read_zero);
1107 let read_max = reader.read_deser(&mut &written_max[..])?;
1108 assert_eq!(max, read_max);
1109 Ok(())
1110 }
1111
1112 #[test]
1113 fn avro_rs_512_0_array() -> TestResult {
1114 assert_eq!(Schema::Null, <[String; 0]>::get_schema());
1115 assert_eq!(Schema::Null, <[(); 0]>::get_schema());
1116 assert_eq!(Schema::Null, <[bool; 0]>::get_schema());
1117 Ok(())
1118 }
1119
1120 #[test]
1121 fn avro_rs_512_1_array() -> TestResult {
1122 assert_eq!(Schema::String, <[String; 1]>::get_schema());
1123 assert_eq!(Schema::Null, <[(); 1]>::get_schema());
1124 assert_eq!(Schema::Boolean, <[bool; 1]>::get_schema());
1125 Ok(())
1126 }
1127
1128 #[test]
1129 fn avro_rs_512_n_array() -> TestResult {
1130 let schema = Schema::parse_str(
1131 r#"{
1132 "type": "record",
1133 "name": "A5_s",
1134 "fields": [
1135 { "name": "field_0", "type": "string" },
1136 { "name": "field_1", "type": "string" },
1137 { "name": "field_2", "type": "string" },
1138 { "name": "field_3", "type": "string" },
1139 { "name": "field_4", "type": "string" }
1140 ]
1141 }"#,
1142 )?;
1143
1144 assert_eq!(schema, <[String; 5]>::get_schema());
1145 Ok(())
1146 }
1147
1148 #[test]
1149 fn avro_rs_512_n_array_complex_type() -> TestResult {
1150 let schema = Schema::parse_str(
1151 r#"{
1152 "type": "record",
1153 "name": "A2_u2_n_r25_org_apache_avro_rust_Uuid",
1154 "fields": [
1155 { "name": "field_0", "type": ["null", {"type": "fixed", "logicalType": "uuid", "size": 16, "name": "Uuid", "namespace": "org.apache.avro.rust"}], "default": null },
1156 { "name": "field_1", "type": ["null", "org.apache.avro.rust.Uuid"], "default": null }
1157 ]
1158 }"#,
1159 )?;
1160
1161 assert_eq!(schema, <[Option<uuid::Uuid>; 2]>::get_schema());
1162 Ok(())
1163 }
1164
1165 #[test]
1166 fn avro_rs_512_1_tuple() -> TestResult {
1167 assert_eq!(Schema::String, <(String,)>::get_schema());
1168 assert_eq!(Schema::Null, <((),)>::get_schema());
1169 assert_eq!(Schema::Boolean, <(bool,)>::get_schema());
1170 Ok(())
1171 }
1172
1173 #[test]
1174 fn avro_rs_512_n_tuple() -> TestResult {
1175 let schema = Schema::parse_str(
1176 r#"{
1177 "type": "record",
1178 "name": "T5_s_i_l_B_n",
1179 "fields": [
1180 { "name": "field_0", "type": "string" },
1181 { "name": "field_1", "type": "int" },
1182 { "name": "field_2", "type": "long" },
1183 { "name": "field_3", "type": "boolean" },
1184 { "name": "field_4", "type": "null" }
1185 ]
1186 }"#,
1187 )?;
1188
1189 assert_eq!(schema, <(String, i32, i64, bool, ())>::get_schema());
1190 Ok(())
1191 }
1192
1193 #[test]
1194 fn avro_rs_512_n_tuple_complex_type() -> TestResult {
1195 let schema = Schema::parse_str(
1196 r#"{
1197 "type": "record",
1198 "name": "T3_u2_n_r25_org_apache_avro_rust_Uuid_r25_org_apache_avro_rust_Uuid_s",
1199 "fields": [
1200 { "name": "field_0", "type": ["null", {"type": "fixed", "logicalType": "uuid", "size": 16, "name": "Uuid", "namespace": "org.apache.avro.rust"}], "default": null },
1201 { "name": "field_1", "type": "org.apache.avro.rust.Uuid" },
1202 { "name": "field_2", "type": "string" }
1203 ]
1204 }"#,
1205 )?;
1206
1207 assert_eq!(
1208 schema,
1209 <(Option<uuid::Uuid>, uuid::Uuid, String)>::get_schema()
1210 );
1211 Ok(())
1212 }
1213}