apache_avro/
lib.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! **[Apache Avro](https://avro.apache.org/)** is a data serialization system which provides rich
19//! data structures and a compact, fast, binary data format. If you are not familiar with the data
20//! format, please read [`documentation::primer`] first.
21//!
22//! There are two ways of working with Avro data in this crate:
23//!
24//! 1. Via the generic [`Value`](types::Value) type.
25//! 2. Via types implementing [`AvroSchema`] and Serde's [`Serialize`] and [`Deserialize`].
26//!
27//! The first option is great for dealing with Avro data in a dynamic way. For example, when working
28//! with unknown or rapidly changing schemas or when you don't want or need to map to Rust types. The
29//! module documentation of [`documentation::dynamic`] explains how to work in this dynamic way.
30//!
31//! The second option is great when dealing with static schemas that should be decoded to and encoded
32//! from Rust types. The module documentation of [`serde`] explains how to work in this static way.
33//!
34//! # Features
35//!
36//! - `derive`: enable support for deriving [`AvroSchema`]
37//! - `snappy`: enable support for the Snappy codec
38//! - `zstandard`: enable support for the Zstandard codec
39//! - `bzip`: enable support for the Bzip2 codec
40//! - `xz`: enable support for the Xz codec
41//!
42//! # MSRV
43//!
44//! The current MSRV is 1.88.0.
45//!
46//! The MSRV may be bumped in minor releases.
47//!
48// These are links because otherwise `cargo rdme` gets angry
49//! [`Serialize`]: https://docs.rs/serde/latest/serde/trait.Serialize.html
50//! [`Deserialize`]: https://docs.rs/serde/latest/serde/trait.Deserialize.html
51
52// Enable using `fake_variadic` on docs.rs
53#![cfg_attr(docsrs, feature(rustdoc_internals))]
54#![cfg_attr(docsrs, allow(internal_features))]
55
56mod bigdecimal;
57mod bytes;
58mod codec;
59mod decimal;
60mod decode;
61mod duration;
62mod encode;
63
64#[cfg(doc)]
65pub mod documentation;
66pub mod error;
67pub mod headers;
68pub mod rabin;
69pub mod reader;
70pub mod schema;
71pub mod schema_compatibility;
72pub mod schema_equality;
73pub mod serde;
74pub mod types;
75pub mod util;
76pub mod validator;
77pub mod writer;
78
79#[expect(deprecated)]
80pub use crate::{
81    bigdecimal::BigDecimal,
82    bytes::{
83        serde_avro_bytes, serde_avro_bytes_opt, serde_avro_fixed, serde_avro_fixed_opt,
84        serde_avro_slice, serde_avro_slice_opt,
85    },
86};
87#[cfg(feature = "bzip")]
88pub use codec::bzip::Bzip2Settings;
89#[cfg(feature = "xz")]
90pub use codec::xz::XzSettings;
91#[cfg(feature = "zstandard")]
92pub use codec::zstandard::ZstandardSettings;
93pub use codec::{Codec, DeflateSettings};
94pub use decimal::Decimal;
95pub use duration::{Days, Duration, Millis, Months};
96pub use error::Error;
97#[expect(
98    deprecated,
99    reason = "Still need to export it until we remove it completely"
100)]
101pub use reader::{
102    Reader,
103    datum::{from_avro_datum, from_avro_datum_reader_schemata, from_avro_datum_schemata},
104    read_marker,
105    single_object::{GenericSingleObjectReader, SpecificSingleObjectReader},
106};
107pub use schema::Schema;
108pub use serde::{AvroSchema, AvroSchemaComponent, from_value, to_value};
109pub use uuid::Uuid;
110#[expect(
111    deprecated,
112    reason = "Still need to export it until we remove it completely"
113)]
114pub use writer::{
115    Clearable, Writer, WriterBuilder,
116    datum::{to_avro_datum, to_avro_datum_schemata, write_avro_datum_ref},
117    single_object::{GenericSingleObjectWriter, SpecificSingleObjectWriter},
118};
119
120#[cfg(feature = "derive")]
121pub use apache_avro_derive::AvroSchema;
122
123/// A convenience type alias for `Result`s with `Error`s.
124pub type AvroResult<T> = Result<T, Error>;
125
126/// Set the maximum number of bytes that can be allocated when decoding data.
127///
128/// This function only changes the setting once. On subsequent calls the value will stay the same
129/// as the first time it is called. It is automatically called on first allocation and defaults to
130/// [`util::DEFAULT_MAX_ALLOCATION_BYTES`].
131///
132/// # Returns
133/// The configured maximum, which might be different from what the function was called with if the
134/// value was already set before.
135#[deprecated(
136    since = "0.21.0",
137    note = "Please use apache_avro::util::max_allocation_bytes"
138)]
139pub fn max_allocation_bytes(num_bytes: usize) -> usize {
140    util::max_allocation_bytes(num_bytes)
141}
142
143/// Set whether the serializer and deserializer should indicate to types that the format is human-readable.
144///
145/// This function only changes the setting once. On subsequent calls the value will stay the same
146/// as the first time it is called. It is automatically called on first allocation and defaults to
147/// [`util::DEFAULT_SERDE_HUMAN_READABLE`].
148///
149/// *NOTE*: Changing this setting can change the output of [`from_value`] and the
150/// accepted input of [`to_value`].
151///
152/// # Returns
153/// The configured human-readable value, which might be different from what the function was called
154/// with if the value was already set before.
155#[deprecated(
156    since = "0.21.0",
157    note = "Please use apache_avro::util::set_serde_human_readable"
158)]
159pub fn set_serde_human_readable(human_readable: bool) -> bool {
160    util::set_serde_human_readable(human_readable)
161}
162
163#[cfg(test)]
164mod tests {
165    use crate::{
166        Codec, Reader, Schema, Writer,
167        reader::datum::GenericDatumReader,
168        types::{Record, Value},
169    };
170    use apache_avro_test_helper::TestResult;
171    use pretty_assertions::assert_eq;
172
173    //TODO: move where it fits better
174    #[test]
175    fn test_enum_default() {
176        let writer_raw_schema = r#"
177            {
178                "type": "record",
179                "name": "test",
180                "fields": [
181                    {"name": "a", "type": "long", "default": 42},
182                    {"name": "b", "type": "string"}
183                ]
184            }
185        "#;
186        let reader_raw_schema = r#"
187            {
188                "type": "record",
189                "name": "test",
190                "fields": [
191                    {"name": "a", "type": "long", "default": 42},
192                    {"name": "b", "type": "string"},
193                    {
194                        "name": "c",
195                        "type": {
196                            "type": "enum",
197                            "name": "suit",
198                            "symbols": ["diamonds", "spades", "clubs", "hearts"]
199                        },
200                        "default": "spades"
201                    }
202                ]
203            }
204        "#;
205        let writer_schema = Schema::parse_str(writer_raw_schema).unwrap();
206        let reader_schema = Schema::parse_str(reader_raw_schema).unwrap();
207        let mut writer = Writer::with_codec(&writer_schema, Vec::new(), Codec::Null).unwrap();
208        let mut record = Record::new(writer.schema()).unwrap();
209        record.put("a", 27i64);
210        record.put("b", "foo");
211        writer.append_value(record).unwrap();
212        let input = writer.into_inner().unwrap();
213        let mut reader = Reader::builder(&input[..])
214            .reader_schema(&reader_schema)
215            .build()
216            .unwrap();
217        assert_eq!(
218            reader.next().unwrap().unwrap(),
219            Value::Record(vec![
220                ("a".to_string(), Value::Long(27)),
221                ("b".to_string(), Value::String("foo".to_string())),
222                ("c".to_string(), Value::Enum(1, "spades".to_string())),
223            ])
224        );
225        assert!(reader.next().is_none());
226    }
227
228    //TODO: move where it fits better
229    #[test]
230    fn test_enum_string_value() {
231        let raw_schema = r#"
232            {
233                "type": "record",
234                "name": "test",
235                "fields": [
236                    {"name": "a", "type": "long", "default": 42},
237                    {"name": "b", "type": "string"},
238                    {
239                        "name": "c",
240                        "type": {
241                            "type": "enum",
242                            "name": "suit",
243                            "symbols": ["diamonds", "spades", "clubs", "hearts"]
244                        },
245                        "default": "spades"
246                    }
247                ]
248            }
249        "#;
250        let schema = Schema::parse_str(raw_schema).unwrap();
251        let mut writer = Writer::with_codec(&schema, Vec::new(), Codec::Null).unwrap();
252        let mut record = Record::new(writer.schema()).unwrap();
253        record.put("a", 27i64);
254        record.put("b", "foo");
255        record.put("c", "clubs");
256        writer.append_value(record).unwrap();
257        let input = writer.into_inner().unwrap();
258        let mut reader = Reader::builder(&input[..])
259            .reader_schema(&schema)
260            .build()
261            .unwrap();
262        assert_eq!(
263            reader.next().unwrap().unwrap(),
264            Value::Record(vec![
265                ("a".to_string(), Value::Long(27)),
266                ("b".to_string(), Value::String("foo".to_string())),
267                ("c".to_string(), Value::Enum(2, "clubs".to_string())),
268            ])
269        );
270        assert!(reader.next().is_none());
271    }
272
273    //TODO: move where it fits better
274    #[test]
275    fn test_enum_no_reader_schema() {
276        let writer_raw_schema = r#"
277            {
278                "type": "record",
279                "name": "test",
280                "fields": [
281                    {"name": "a", "type": "long", "default": 42},
282                    {"name": "b", "type": "string"},
283                    {
284                        "name": "c",
285                        "type": {
286                            "type": "enum",
287                            "name": "suit",
288                            "symbols": ["diamonds", "spades", "clubs", "hearts"]
289                        },
290                        "default": "spades"
291                    }
292                ]
293            }
294        "#;
295        let writer_schema = Schema::parse_str(writer_raw_schema).unwrap();
296        let mut writer = Writer::with_codec(&writer_schema, Vec::new(), Codec::Null).unwrap();
297        let mut record = Record::new(writer.schema()).unwrap();
298        record.put("a", 27i64);
299        record.put("b", "foo");
300        record.put("c", "clubs");
301        writer.append_value(record).unwrap();
302        let input = writer.into_inner().unwrap();
303        let mut reader = Reader::new(&input[..]).unwrap();
304        assert_eq!(
305            reader.next().unwrap().unwrap(),
306            Value::Record(vec![
307                ("a".to_string(), Value::Long(27)),
308                ("b".to_string(), Value::String("foo".to_string())),
309                ("c".to_string(), Value::Enum(2, "clubs".to_string())),
310            ])
311        );
312    }
313
314    #[test]
315    fn test_illformed_length() -> TestResult {
316        let raw_schema = r#"
317            {
318                "type": "record",
319                "name": "test",
320                "fields": [
321                    {"name": "a", "type": "long", "default": 42},
322                    {"name": "b", "type": "string"}
323                ]
324            }
325        "#;
326
327        let schema = Schema::parse_str(raw_schema).unwrap();
328
329        // Would allocate 18446744073709551605 bytes
330        let illformed: &[u8] = &[0x3e, 0x15, 0xff, 0x1f, 0x15, 0xff];
331
332        let value = GenericDatumReader::builder(&schema)
333            .build()?
334            .read_value(&mut &*illformed);
335        assert!(value.is_err());
336
337        Ok(())
338    }
339}