apache_avro/serde/mod.rs
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! # Using Avro in Rust, the Serde way.
19//!
20//! Avro is a schema-based format, this means it requires a few extra steps to use compared to
21//! a data format like JSON.
22//!
23//! ## Schemas
24//! It's strongly recommended to derive the schemas for your types using the [`AvroSchema`] derive macro.
25//! The macro uses the Serde attributes to generate a matching schema and checks that no attributes are
26//! used that are incompatible with the Serde implementation in this crate. See [the trait documentation] for
27//! details on how to change the generated schema.
28//!
29//! Alternatively, you can write your own schema. If you go down this path, it is recommended you start with
30//! the schema derived by [`AvroSchema`] and then modify it to fit your needs.
31//!
32//! #### Performance pitfall
33//! One performance pitfall with Serde is (de)serializing bytes. The implementation of [`Serialize`][`serde::Serialize`]
34//! and [`Deserialize`][`serde::Deserialize`] for types as `Vec<u8>`, `&[u8]` and `Cow<[u8]>` will
35//! all use the array of integers representation. This can normally be fixed using the [`serde_bytes`]
36//! crate, however this crate also needs some extra information. Therefore, you need to use the
37//! [`bytes`], [`bytes_opt`], [`fixed`], [`fixed_opt`], [`mod@slice`], and [`slice_opt`] modules of
38//! this crate instead.
39//!
40//! #### Using existing schemas
41//! If you have schemas that are already being used in other parts of your software stack, generating types
42//! from the schema can be very useful. There is a **third-party** crate [`rsgen-avro`] that implements this.
43//!
44//! ## Serializing data
45//! Writing data is very simple. Use [`T::get_schema()`](AvroSchema::get_schema()) to get the schema
46//! for the type you want to serialize. It is recommended to keep this schema around as long as possible
47//! as generating the schema is quite expensive. Then create a [`Writer`](crate::Writer) with your schema
48//! and use the [`append_ser()`](crate::Writer::append_ser()) function to serialize your data.
49//!
50//! ## Deserializing data
51//! Reading data is both simpler and more complex than writing. On the one hand, you don't need to
52//! generate a schema, as the Avro file has it embedded. But you can't directly deserialize from a
53//! [`Reader`](crate::Reader). Instead, you have to iterate over the [`Value`](crate::types::Value)s
54//! in the reader and deserialize from those via [`from_value`].
55//!
56//! ## Putting it all together
57//!
58//! The following is an example of how to combine everything showed so far and it is meant to be a
59//! quick reference of the Serde interface:
60//!
61//! ```
62//! # use std::io::Cursor;
63//! # use serde::{Serialize, Deserialize};
64//! # use apache_avro::{AvroSchema, Error, Reader, Writer, serde::{from_value, to_value}};
65//! #[derive(AvroSchema, Serialize, Deserialize, PartialEq, Debug)]
66//! struct Foo {
67//! a: i64,
68//! b: String,
69//! // Otherwise it will be serialized as an array of integers
70//! #[avro(with)]
71//! #[serde(with = "apache_avro::serde::bytes")]
72//! c: Vec<u8>,
73//! }
74//!
75//! // Creating this schema is expensive, reuse it as much as possible
76//! let schema = Foo::get_schema();
77//! // A writer needs the schema of the type that is going to be written
78//! let mut writer = Writer::new(&schema, Vec::new())?;
79//!
80//! let foo = Foo {
81//! a: 42,
82//! b: "Hello".to_string(),
83//! c: b"Data".to_vec()
84//! };
85//!
86//! // Serialize as many items as you want.
87//! writer.append_ser(&foo)?;
88//! writer.append_ser(&foo)?;
89//! writer.append_ser(&foo)?;
90//!
91//! // Always flush
92//! writer.flush()?;
93//! // Or consume the writer
94//! let data = writer.into_inner()?;
95//!
96//! // The reader does not need a schema as it's included in the data
97//! let reader = Reader::new(Cursor::new(data))?;
98//! // The reader is an iterator
99//! for result in reader {
100//! let value = result?;
101//! let new_foo: Foo = from_value(&value)?;
102//! assert_eq!(new_foo, foo);
103//! }
104//! # Ok::<(), Error>(())
105//! ```
106//!
107//! [`rsgen-avro`]: https://docs.rs/rsgen-avro/latest/rsgen_avro/
108//! [the trait documentation]: AvroSchema
109
110mod de;
111mod derive;
112mod ser;
113pub(crate) mod ser_schema;
114mod util;
115mod with;
116
117pub use de::from_value;
118pub use derive::{AvroSchema, AvroSchemaComponent};
119pub use ser::to_value;
120pub use with::{bytes, bytes_opt, fixed, fixed_opt, slice, slice_opt};
121
122#[doc(hidden)]
123pub use derive::get_record_fields_in_ctxt;