apache_avro/
lib.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! **[Apache Avro](https://avro.apache.org/)** is a data serialization system which provides rich
19//! data structures and a compact, fast, binary data format. If you are not familiar with the data
20//! format, please read [`documentation::primer`] first.
21//!
22//! There are two ways of working with Avro data in this crate:
23//!
24//! 1. Via the generic [`Value`](types::Value) type.
25//! 2. Via types implementing [`AvroSchema`] and Serde's [`Serialize`] and [`Deserialize`].
26//!
27//! The first option is great for dealing with Avro data in a dynamic way. For example, when working
28//! with unknown or rapidly changing schemas or when you don't want or need to map to Rust types. The
29//! module documentation of [`documentation::dynamic`] explains how to work in this dynamic way.
30//!
31//! The second option is great when dealing with static schemas that should be decoded to and encoded
32//! from Rust types. The module documentation of [`serde`] explains how to work in this static way.
33//!
34//! # Features
35//!
36//! - `derive`: enable support for deriving [`AvroSchema`]
37//! - `snappy`: enable support for the Snappy codec
38//! - `zstandard`: enable support for the Zstandard codec
39//! - `bzip`: enable support for the Bzip2 codec
40//! - `xz`: enable support for the Xz codec
41//!
42//! # MSRV
43//!
44//! The current MSRV is 1.88.0.
45//!
46//! The MSRV may be bumped in minor releases.
47//!
48// These are links because otherwise `cargo rdme` gets angry
49//! [`Serialize`]: https://docs.rs/serde/latest/serde/trait.Serialize.html
50//! [`Deserialize`]: https://docs.rs/serde/latest/serde/trait.Deserialize.html
51
52mod bigdecimal;
53mod bytes;
54mod codec;
55mod decimal;
56mod decode;
57mod duration;
58mod encode;
59
60#[cfg(doc)]
61pub mod documentation;
62pub mod error;
63pub mod headers;
64pub mod rabin;
65pub mod reader;
66pub mod schema;
67pub mod schema_compatibility;
68pub mod schema_equality;
69pub mod serde;
70pub mod types;
71pub mod util;
72pub mod validator;
73pub mod writer;
74
75#[expect(deprecated)]
76pub use crate::{
77    bigdecimal::BigDecimal,
78    bytes::{
79        serde_avro_bytes, serde_avro_bytes_opt, serde_avro_fixed, serde_avro_fixed_opt,
80        serde_avro_slice, serde_avro_slice_opt,
81    },
82};
83#[cfg(feature = "bzip")]
84pub use codec::bzip::Bzip2Settings;
85#[cfg(feature = "xz")]
86pub use codec::xz::XzSettings;
87#[cfg(feature = "zstandard")]
88pub use codec::zstandard::ZstandardSettings;
89pub use codec::{Codec, DeflateSettings};
90pub use decimal::Decimal;
91pub use duration::{Days, Duration, Millis, Months};
92pub use error::Error;
93#[expect(
94    deprecated,
95    reason = "Still need to export it until we remove it completely"
96)]
97pub use reader::{
98    Reader,
99    datum::{from_avro_datum, from_avro_datum_reader_schemata, from_avro_datum_schemata},
100    read_marker,
101    single_object::{GenericSingleObjectReader, SpecificSingleObjectReader},
102};
103pub use schema::Schema;
104pub use serde::{AvroSchema, AvroSchemaComponent, from_value, to_value};
105pub use uuid::Uuid;
106#[expect(
107    deprecated,
108    reason = "Still need to export it until we remove it completely"
109)]
110pub use writer::{
111    Clearable, Writer, WriterBuilder,
112    datum::{to_avro_datum, to_avro_datum_schemata, write_avro_datum_ref},
113    single_object::{GenericSingleObjectWriter, SpecificSingleObjectWriter},
114};
115
116#[cfg(feature = "derive")]
117pub use apache_avro_derive::AvroSchema;
118
119/// A convenience type alias for `Result`s with `Error`s.
120pub type AvroResult<T> = Result<T, Error>;
121
122/// Set the maximum number of bytes that can be allocated when decoding data.
123///
124/// This function only changes the setting once. On subsequent calls the value will stay the same
125/// as the first time it is called. It is automatically called on first allocation and defaults to
126/// [`util::DEFAULT_MAX_ALLOCATION_BYTES`].
127///
128/// # Returns
129/// The configured maximum, which might be different from what the function was called with if the
130/// value was already set before.
131#[deprecated(
132    since = "0.21.0",
133    note = "Please use apache_avro::util::max_allocation_bytes"
134)]
135pub fn max_allocation_bytes(num_bytes: usize) -> usize {
136    util::max_allocation_bytes(num_bytes)
137}
138
139/// Set whether the serializer and deserializer should indicate to types that the format is human-readable.
140///
141/// This function only changes the setting once. On subsequent calls the value will stay the same
142/// as the first time it is called. It is automatically called on first allocation and defaults to
143/// [`util::DEFAULT_SERDE_HUMAN_READABLE`].
144///
145/// *NOTE*: Changing this setting can change the output of [`from_value`] and the
146/// accepted input of [`to_value`].
147///
148/// # Returns
149/// The configured human-readable value, which might be different from what the function was called
150/// with if the value was already set before.
151#[deprecated(
152    since = "0.21.0",
153    note = "Please use apache_avro::util::set_serde_human_readable"
154)]
155pub fn set_serde_human_readable(human_readable: bool) -> bool {
156    util::set_serde_human_readable(human_readable)
157}
158
159#[cfg(test)]
160mod tests {
161    use crate::{
162        Codec, Reader, Schema, Writer,
163        reader::datum::GenericDatumReader,
164        types::{Record, Value},
165    };
166    use apache_avro_test_helper::TestResult;
167    use pretty_assertions::assert_eq;
168
169    //TODO: move where it fits better
170    #[test]
171    fn test_enum_default() {
172        let writer_raw_schema = r#"
173            {
174                "type": "record",
175                "name": "test",
176                "fields": [
177                    {"name": "a", "type": "long", "default": 42},
178                    {"name": "b", "type": "string"}
179                ]
180            }
181        "#;
182        let reader_raw_schema = r#"
183            {
184                "type": "record",
185                "name": "test",
186                "fields": [
187                    {"name": "a", "type": "long", "default": 42},
188                    {"name": "b", "type": "string"},
189                    {
190                        "name": "c",
191                        "type": {
192                            "type": "enum",
193                            "name": "suit",
194                            "symbols": ["diamonds", "spades", "clubs", "hearts"]
195                        },
196                        "default": "spades"
197                    }
198                ]
199            }
200        "#;
201        let writer_schema = Schema::parse_str(writer_raw_schema).unwrap();
202        let reader_schema = Schema::parse_str(reader_raw_schema).unwrap();
203        let mut writer = Writer::with_codec(&writer_schema, Vec::new(), Codec::Null).unwrap();
204        let mut record = Record::new(writer.schema()).unwrap();
205        record.put("a", 27i64);
206        record.put("b", "foo");
207        writer.append_value(record).unwrap();
208        let input = writer.into_inner().unwrap();
209        let mut reader = Reader::builder(&input[..])
210            .reader_schema(&reader_schema)
211            .build()
212            .unwrap();
213        assert_eq!(
214            reader.next().unwrap().unwrap(),
215            Value::Record(vec![
216                ("a".to_string(), Value::Long(27)),
217                ("b".to_string(), Value::String("foo".to_string())),
218                ("c".to_string(), Value::Enum(1, "spades".to_string())),
219            ])
220        );
221        assert!(reader.next().is_none());
222    }
223
224    //TODO: move where it fits better
225    #[test]
226    fn test_enum_string_value() {
227        let raw_schema = r#"
228            {
229                "type": "record",
230                "name": "test",
231                "fields": [
232                    {"name": "a", "type": "long", "default": 42},
233                    {"name": "b", "type": "string"},
234                    {
235                        "name": "c",
236                        "type": {
237                            "type": "enum",
238                            "name": "suit",
239                            "symbols": ["diamonds", "spades", "clubs", "hearts"]
240                        },
241                        "default": "spades"
242                    }
243                ]
244            }
245        "#;
246        let schema = Schema::parse_str(raw_schema).unwrap();
247        let mut writer = Writer::with_codec(&schema, Vec::new(), Codec::Null).unwrap();
248        let mut record = Record::new(writer.schema()).unwrap();
249        record.put("a", 27i64);
250        record.put("b", "foo");
251        record.put("c", "clubs");
252        writer.append_value(record).unwrap();
253        let input = writer.into_inner().unwrap();
254        let mut reader = Reader::builder(&input[..])
255            .reader_schema(&schema)
256            .build()
257            .unwrap();
258        assert_eq!(
259            reader.next().unwrap().unwrap(),
260            Value::Record(vec![
261                ("a".to_string(), Value::Long(27)),
262                ("b".to_string(), Value::String("foo".to_string())),
263                ("c".to_string(), Value::Enum(2, "clubs".to_string())),
264            ])
265        );
266        assert!(reader.next().is_none());
267    }
268
269    //TODO: move where it fits better
270    #[test]
271    fn test_enum_no_reader_schema() {
272        let writer_raw_schema = r#"
273            {
274                "type": "record",
275                "name": "test",
276                "fields": [
277                    {"name": "a", "type": "long", "default": 42},
278                    {"name": "b", "type": "string"},
279                    {
280                        "name": "c",
281                        "type": {
282                            "type": "enum",
283                            "name": "suit",
284                            "symbols": ["diamonds", "spades", "clubs", "hearts"]
285                        },
286                        "default": "spades"
287                    }
288                ]
289            }
290        "#;
291        let writer_schema = Schema::parse_str(writer_raw_schema).unwrap();
292        let mut writer = Writer::with_codec(&writer_schema, Vec::new(), Codec::Null).unwrap();
293        let mut record = Record::new(writer.schema()).unwrap();
294        record.put("a", 27i64);
295        record.put("b", "foo");
296        record.put("c", "clubs");
297        writer.append_value(record).unwrap();
298        let input = writer.into_inner().unwrap();
299        let mut reader = Reader::new(&input[..]).unwrap();
300        assert_eq!(
301            reader.next().unwrap().unwrap(),
302            Value::Record(vec![
303                ("a".to_string(), Value::Long(27)),
304                ("b".to_string(), Value::String("foo".to_string())),
305                ("c".to_string(), Value::Enum(2, "clubs".to_string())),
306            ])
307        );
308    }
309
310    #[test]
311    fn test_illformed_length() -> TestResult {
312        let raw_schema = r#"
313            {
314                "type": "record",
315                "name": "test",
316                "fields": [
317                    {"name": "a", "type": "long", "default": 42},
318                    {"name": "b", "type": "string"}
319                ]
320            }
321        "#;
322
323        let schema = Schema::parse_str(raw_schema).unwrap();
324
325        // Would allocate 18446744073709551605 bytes
326        let illformed: &[u8] = &[0x3e, 0x15, 0xff, 0x1f, 0x15, 0xff];
327
328        let value = GenericDatumReader::builder(&schema)
329            .build()?
330            .read_value(&mut &*illformed);
331        assert!(value.is_err());
332
333        Ok(())
334    }
335}