apache_avro/
lib.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! **[Apache Avro](https://avro.apache.org/)** is a data serialization system which provides rich
19//! data structures and a compact, fast, binary data format. If you are not familiar with the data
20//! format, please read [`documentation::primer`] first.
21//!
22//! There are two ways of working with Avro data in this crate:
23//!
24//! 1. Via the generic [`Value`](types::Value) type.
25//! 2. Via types implementing [`AvroSchema`] and Serde's [`Serialize`] and [`Deserialize`].
26//!
27//! The first option is great for dealing with Avro data in a dynamic way. For example, when working
28//! with unknown or rapidly changing schemas or when you don't want or need to map to Rust types. The
29//! module documentation of [`documentation::dynamic`] explains how to work in this dynamic way.
30//!
31//! The second option is great when dealing with static schemas that should be decoded to and encoded
32//! from Rust types. The module documentation of [`serde`] explains how to work in this static way.
33//!
34//! # Features
35//!
36//! - `derive`: enable support for deriving [`AvroSchema`]
37//! - `snappy`: enable support for the Snappy codec
38//! - `zstandard`: enable support for the Zstandard codec
39//! - `bzip`: enable support for the Bzip2 codec
40//! - `xz`: enable support for the Xz codec
41//!
42//! # MSRV
43//!
44//! The current MSRV is 1.88.0.
45//!
46//! The MSRV may be bumped in minor releases.
47//!
48// These are links because otherwise `cargo rdme` gets angry
49//! [`Serialize`]: https://docs.rs/serde/latest/serde/trait.Serialize.html
50//! [`Deserialize`]: https://docs.rs/serde/latest/serde/trait.Deserialize.html
51
52mod bigdecimal;
53mod bytes;
54mod codec;
55mod decimal;
56mod decode;
57mod duration;
58mod encode;
59mod reader;
60mod writer;
61
62#[cfg(doc)]
63pub mod documentation;
64pub mod error;
65pub mod headers;
66pub mod rabin;
67pub mod schema;
68pub mod schema_compatibility;
69pub mod schema_equality;
70pub mod serde;
71pub mod types;
72pub mod util;
73pub mod validator;
74
75#[expect(deprecated)]
76pub use crate::{
77    bigdecimal::BigDecimal,
78    bytes::{
79        serde_avro_bytes, serde_avro_bytes_opt, serde_avro_fixed, serde_avro_fixed_opt,
80        serde_avro_slice, serde_avro_slice_opt,
81    },
82};
83#[cfg(feature = "bzip")]
84pub use codec::bzip::Bzip2Settings;
85#[cfg(feature = "xz")]
86pub use codec::xz::XzSettings;
87#[cfg(feature = "zstandard")]
88pub use codec::zstandard::ZstandardSettings;
89pub use codec::{Codec, DeflateSettings};
90pub use decimal::Decimal;
91pub use duration::{Days, Duration, Millis, Months};
92pub use error::Error;
93pub use reader::{
94    Reader, from_avro_datum, from_avro_datum_reader_schemata, from_avro_datum_schemata,
95    read_marker,
96    single_object::{GenericSingleObjectReader, SpecificSingleObjectReader},
97};
98pub use schema::Schema;
99pub use serde::{AvroSchema, AvroSchemaComponent, from_value, to_value};
100pub use uuid::Uuid;
101pub use writer::{
102    Clearable, GenericSingleObjectWriter, SpecificSingleObjectWriter, Writer, WriterBuilder,
103    to_avro_datum, to_avro_datum_schemata, write_avro_datum_ref,
104};
105
106#[cfg(feature = "derive")]
107pub use apache_avro_derive::AvroSchema;
108
109/// A convenience type alias for `Result`s with `Error`s.
110pub type AvroResult<T> = Result<T, Error>;
111
112/// Set the maximum number of bytes that can be allocated when decoding data.
113///
114/// This function only changes the setting once. On subsequent calls the value will stay the same
115/// as the first time it is called. It is automatically called on first allocation and defaults to
116/// [`util::DEFAULT_MAX_ALLOCATION_BYTES`].
117///
118/// # Returns
119/// The configured maximum, which might be different from what the function was called with if the
120/// value was already set before.
121#[deprecated(
122    since = "0.21.0",
123    note = "Please use apache_avro::util::max_allocation_bytes"
124)]
125pub fn max_allocation_bytes(num_bytes: usize) -> usize {
126    util::max_allocation_bytes(num_bytes)
127}
128
129/// Set whether the serializer and deserializer should indicate to types that the format is human-readable.
130///
131/// This function only changes the setting once. On subsequent calls the value will stay the same
132/// as the first time it is called. It is automatically called on first allocation and defaults to
133/// [`util::DEFAULT_SERDE_HUMAN_READABLE`].
134///
135/// *NOTE*: Changing this setting can change the output of [`from_value`] and the
136/// accepted input of [`to_value`].
137///
138/// # Returns
139/// The configured human-readable value, which might be different from what the function was called
140/// with if the value was already set before.
141#[deprecated(
142    since = "0.21.0",
143    note = "Please use apache_avro::util::set_serde_human_readable"
144)]
145pub fn set_serde_human_readable(human_readable: bool) -> bool {
146    util::set_serde_human_readable(human_readable)
147}
148
149#[cfg(test)]
150mod tests {
151    use crate::{
152        Codec, Reader, Schema, Writer, from_avro_datum,
153        types::{Record, Value},
154    };
155    use pretty_assertions::assert_eq;
156
157    //TODO: move where it fits better
158    #[test]
159    fn test_enum_default() {
160        let writer_raw_schema = r#"
161            {
162                "type": "record",
163                "name": "test",
164                "fields": [
165                    {"name": "a", "type": "long", "default": 42},
166                    {"name": "b", "type": "string"}
167                ]
168            }
169        "#;
170        let reader_raw_schema = r#"
171            {
172                "type": "record",
173                "name": "test",
174                "fields": [
175                    {"name": "a", "type": "long", "default": 42},
176                    {"name": "b", "type": "string"},
177                    {
178                        "name": "c",
179                        "type": {
180                            "type": "enum",
181                            "name": "suit",
182                            "symbols": ["diamonds", "spades", "clubs", "hearts"]
183                        },
184                        "default": "spades"
185                    }
186                ]
187            }
188        "#;
189        let writer_schema = Schema::parse_str(writer_raw_schema).unwrap();
190        let reader_schema = Schema::parse_str(reader_raw_schema).unwrap();
191        let mut writer = Writer::with_codec(&writer_schema, Vec::new(), Codec::Null).unwrap();
192        let mut record = Record::new(writer.schema()).unwrap();
193        record.put("a", 27i64);
194        record.put("b", "foo");
195        writer.append_value(record).unwrap();
196        let input = writer.into_inner().unwrap();
197        let mut reader = Reader::builder(&input[..])
198            .reader_schema(&reader_schema)
199            .build()
200            .unwrap();
201        assert_eq!(
202            reader.next().unwrap().unwrap(),
203            Value::Record(vec![
204                ("a".to_string(), Value::Long(27)),
205                ("b".to_string(), Value::String("foo".to_string())),
206                ("c".to_string(), Value::Enum(1, "spades".to_string())),
207            ])
208        );
209        assert!(reader.next().is_none());
210    }
211
212    //TODO: move where it fits better
213    #[test]
214    fn test_enum_string_value() {
215        let raw_schema = r#"
216            {
217                "type": "record",
218                "name": "test",
219                "fields": [
220                    {"name": "a", "type": "long", "default": 42},
221                    {"name": "b", "type": "string"},
222                    {
223                        "name": "c",
224                        "type": {
225                            "type": "enum",
226                            "name": "suit",
227                            "symbols": ["diamonds", "spades", "clubs", "hearts"]
228                        },
229                        "default": "spades"
230                    }
231                ]
232            }
233        "#;
234        let schema = Schema::parse_str(raw_schema).unwrap();
235        let mut writer = Writer::with_codec(&schema, Vec::new(), Codec::Null).unwrap();
236        let mut record = Record::new(writer.schema()).unwrap();
237        record.put("a", 27i64);
238        record.put("b", "foo");
239        record.put("c", "clubs");
240        writer.append_value(record).unwrap();
241        let input = writer.into_inner().unwrap();
242        let mut reader = Reader::builder(&input[..])
243            .reader_schema(&schema)
244            .build()
245            .unwrap();
246        assert_eq!(
247            reader.next().unwrap().unwrap(),
248            Value::Record(vec![
249                ("a".to_string(), Value::Long(27)),
250                ("b".to_string(), Value::String("foo".to_string())),
251                ("c".to_string(), Value::Enum(2, "clubs".to_string())),
252            ])
253        );
254        assert!(reader.next().is_none());
255    }
256
257    //TODO: move where it fits better
258    #[test]
259    fn test_enum_no_reader_schema() {
260        let writer_raw_schema = r#"
261            {
262                "type": "record",
263                "name": "test",
264                "fields": [
265                    {"name": "a", "type": "long", "default": 42},
266                    {"name": "b", "type": "string"},
267                    {
268                        "name": "c",
269                        "type": {
270                            "type": "enum",
271                            "name": "suit",
272                            "symbols": ["diamonds", "spades", "clubs", "hearts"]
273                        },
274                        "default": "spades"
275                    }
276                ]
277            }
278        "#;
279        let writer_schema = Schema::parse_str(writer_raw_schema).unwrap();
280        let mut writer = Writer::with_codec(&writer_schema, Vec::new(), Codec::Null).unwrap();
281        let mut record = Record::new(writer.schema()).unwrap();
282        record.put("a", 27i64);
283        record.put("b", "foo");
284        record.put("c", "clubs");
285        writer.append_value(record).unwrap();
286        let input = writer.into_inner().unwrap();
287        let mut reader = Reader::new(&input[..]).unwrap();
288        assert_eq!(
289            reader.next().unwrap().unwrap(),
290            Value::Record(vec![
291                ("a".to_string(), Value::Long(27)),
292                ("b".to_string(), Value::String("foo".to_string())),
293                ("c".to_string(), Value::Enum(2, "clubs".to_string())),
294            ])
295        );
296    }
297
298    #[test]
299    fn test_illformed_length() {
300        let raw_schema = r#"
301            {
302                "type": "record",
303                "name": "test",
304                "fields": [
305                    {"name": "a", "type": "long", "default": 42},
306                    {"name": "b", "type": "string"}
307                ]
308            }
309        "#;
310
311        let schema = Schema::parse_str(raw_schema).unwrap();
312
313        // Would allocate 18446744073709551605 bytes
314        let illformed: &[u8] = &[0x3e, 0x15, 0xff, 0x1f, 0x15, 0xff];
315
316        let value = from_avro_datum(&schema, &mut &*illformed, None);
317        assert!(value.is_err());
318    }
319}