apache_avro/
lib.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! [![rust continuous integration][rust continuous integration img]][rust continuous integration]
19//! [![rust clippy check][rust clippy check img]][rust clippy check]
20//! [![rust security audit][rust security audit img]][rust security audit]
21//! [![rust continuous integration ARM64][rust continuous integration ARM64 img]][rust continuous integration ARM64]
22//!
23//! [rust continuous integration]: https://github.com/apache/avro-rs/actions/workflows/test-lang-rust-ci.yml
24//! [rust continuous integration ARM64]: https://github.com/apache/avro-rs/actions/workflows/test-lang-rust-ci-ARM.yml
25//! [rust clippy check]:           https://github.com/apache/avro-rs/actions/workflows/test-lang-rust-clippy.yml
26//! [rust security audit]:         https://github.com/apache/avro-rs/actions/workflows/test-lang-rust-audit.yml
27//!
28//! [rust continuous integration img]: https://github.com/apache/avro-rs/actions/workflows/test-lang-rust-ci.yml/badge.svg
29//! [rust clippy check img]:           https://github.com/apache/avro-rs/actions/workflows/test-lang-rust-clippy.yml/badge.svg
30//! [rust security audit img]:         https://github.com/apache/avro-rs/actions/workflows/test-lang-rust-audit.yml/badge.svg
31//! [rust continuous integration ARM64 img]: https://github.com/apache/avro-rs/actions/workflows/test-lang-rust-ci-ARM.yml/badge.svg
32//!
33//! A library for working with [Apache Avro](https://avro.apache.org/) in Rust.
34//!
35//! Please check our [documentation](https://docs.rs/apache-avro) for examples, tutorials and API reference.
36//!
37//! **[Apache Avro](https://avro.apache.org/)** is a data serialization system which provides rich
38//! data structures and a compact, fast, binary data format.
39//!
40//! All data in Avro is schematized, as in the following example:
41//!
42//! ```json
43//! {
44//!     "type": "record",
45//!     "name": "test",
46//!     "fields": [
47//!         {"name": "a", "type": "long", "default": 42},
48//!         {"name": "b", "type": "string"}
49//!     ]
50//! }
51//! ```
52//!
53//! There are basically two ways of handling Avro data in Rust:
54//!
55//! * **as Avro-specialized data types** based on an Avro schema;
56//! * **as generic Rust serde-compatible types** implementing/deriving `Serialize` and `Deserialize`;
57//!
58//! **apache-avro** provides a way to read and write both these data representations easily and
59//! efficiently.
60//!
61//! # Installing the library
62//!
63//!
64//! Add to your `Cargo.toml`:
65//!
66//! ```toml
67//! [dependencies]
68//! apache-avro = "x.y"
69//! ```
70//!
71//! Or in case you want to leverage the **Snappy** codec:
72//!
73//! ```toml
74//! [dependencies.apache-avro]
75//! version = "x.y"
76//! features = ["snappy"]
77//! ```
78//!
79//! Or in case you want to leverage the **Zstandard** codec:
80//!
81//! ```toml
82//! [dependencies.apache-avro]
83//! version = "x.y"
84//! features = ["zstandard"]
85//! ```
86//!
87//! Or in case you want to leverage the **Bzip2** codec:
88//!
89//! ```toml
90//! [dependencies.apache-avro]
91//! version = "x.y"
92//! features = ["bzip"]
93//! ```
94//!
95//! Or in case you want to leverage the **Xz** codec:
96//!
97//! ```toml
98//! [dependencies.apache-avro]
99//! version = "x.y"
100//! features = ["xz"]
101//! ```
102//!
103//! # Upgrading to a newer minor version
104//!
105//! The library is still in beta, so there might be backward-incompatible changes between minor
106//! versions. If you have troubles upgrading, check the release notes.
107//!
108//! # Minimum supported Rust version
109//!
110//! 1.74.0
111//!
112//! # Defining a schema
113//!
114//! An Avro data cannot exist without an Avro schema. Schemas **must** be used while writing and
115//! **can** be used while reading and they carry the information regarding the type of data we are
116//! handling. Avro schemas are used for both schema validation and resolution of Avro data.
117//!
118//! Avro schemas are defined in **JSON** format and can just be parsed out of a raw string:
119//!
120//! ```
121//! use apache_avro::Schema;
122//!
123//! let raw_schema = r#"
124//!     {
125//!         "type": "record",
126//!         "name": "test",
127//!         "fields": [
128//!             {"name": "a", "type": "long", "default": 42},
129//!             {"name": "b", "type": "string"}
130//!         ]
131//!     }
132//! "#;
133//!
134//! // if the schema is not valid, this function will return an error
135//! let schema = Schema::parse_str(raw_schema).unwrap();
136//!
137//! // schemas can be printed for debugging
138//! println!("{:?}", schema);
139//! ```
140//!
141//! Additionally, a list of of definitions (which may depend on each other) can be given and all of
142//! them will be parsed into the corresponding schemas.
143//!
144//! ```
145//! use apache_avro::Schema;
146//!
147//! let raw_schema_1 = r#"{
148//!         "name": "A",
149//!         "type": "record",
150//!         "fields": [
151//!             {"name": "field_one", "type": "float"}
152//!         ]
153//!     }"#;
154//!
155//! // This definition depends on the definition of A above
156//! let raw_schema_2 = r#"{
157//!         "name": "B",
158//!         "type": "record",
159//!         "fields": [
160//!             {"name": "field_one", "type": "A"}
161//!         ]
162//!     }"#;
163//!
164//! // if the schemas are not valid, this function will return an error
165//! let schemas = Schema::parse_list(&[raw_schema_1, raw_schema_2]).unwrap();
166//!
167//! // schemas can be printed for debugging
168//! println!("{:?}", schemas);
169//! ```
170//! *N.B.* It is important to note that the composition of schema definitions requires schemas with names.
171//! For this reason, only schemas of type Record, Enum, and Fixed should be input into this function.
172//!
173//! The library provides also a programmatic interface to define schemas without encoding them in
174//! JSON (for advanced use), but we highly recommend the JSON interface. Please read the API
175//! reference in case you are interested.
176//!
177//! For more information about schemas and what kind of information you can encapsulate in them,
178//! please refer to the appropriate section of the
179//! [Avro Specification](https://avro.apache.org/docs/current/specification/#schema-declaration).
180//!
181//! # Writing data
182//!
183//! Once we have defined a schema, we are ready to serialize data in Avro, validating them against
184//! the provided schema in the process. As mentioned before, there are two ways of handling Avro
185//! data in Rust.
186//!
187//! **NOTE:** The library also provides a low-level interface for encoding a single datum in Avro
188//! bytecode without generating markers and headers (for advanced use), but we highly recommend the
189//! `Writer` interface to be totally Avro-compatible. Please read the API reference in case you are
190//! interested.
191//!
192//! ## The avro way
193//!
194//! Given that the schema we defined above is that of an Avro *Record*, we are going to use the
195//! associated type provided by the library to specify the data we want to serialize:
196//!
197//! ```
198//! # use apache_avro::Schema;
199//! use apache_avro::types::Record;
200//! use apache_avro::Writer;
201//! #
202//! # let raw_schema = r#"
203//! #     {
204//! #         "type": "record",
205//! #         "name": "test",
206//! #         "fields": [
207//! #             {"name": "a", "type": "long", "default": 42},
208//! #             {"name": "b", "type": "string"}
209//! #         ]
210//! #     }
211//! # "#;
212//! # let schema = Schema::parse_str(raw_schema).unwrap();
213//! // a writer needs a schema and something to write to
214//! let mut writer = Writer::new(&schema, Vec::new());
215//!
216//! // the Record type models our Record schema
217//! let mut record = Record::new(writer.schema()).unwrap();
218//! record.put("a", 27i64);
219//! record.put("b", "foo");
220//!
221//! // schema validation happens here
222//! writer.append(record).unwrap();
223//!
224//! // this is how to get back the resulting avro bytecode
225//! // this performs a flush operation to make sure data has been written, so it can fail
226//! // you can also call `writer.flush()` yourself without consuming the writer
227//! let encoded = writer.into_inner().unwrap();
228//! ```
229//!
230//! The vast majority of the times, schemas tend to define a record as a top-level container
231//! encapsulating all the values to convert as fields and providing documentation for them, but in
232//! case we want to directly define an Avro value, the library offers that capability via the
233//! `Value` interface.
234//!
235//! ```
236//! use apache_avro::types::Value;
237//!
238//! let mut value = Value::String("foo".to_string());
239//! ```
240//!
241//! ## The serde way
242//!
243//! Given that the schema we defined above is an Avro *Record*, we can directly use a Rust struct
244//! deriving `Serialize` to model our data:
245//!
246//! ```
247//! # use apache_avro::Schema;
248//! # use serde::Serialize;
249//! use apache_avro::Writer;
250//!
251//! #[derive(Debug, Serialize)]
252//! struct Test {
253//!     a: i64,
254//!     b: String,
255//! }
256//!
257//! # let raw_schema = r#"
258//! #     {
259//! #         "type": "record",
260//! #         "name": "test",
261//! #         "fields": [
262//! #             {"name": "a", "type": "long", "default": 42},
263//! #             {"name": "b", "type": "string"}
264//! #         ]
265//! #     }
266//! # "#;
267//! # let schema = Schema::parse_str(raw_schema).unwrap();
268//! // a writer needs a schema and something to write to
269//! let mut writer = Writer::new(&schema, Vec::new());
270//!
271//! // the structure models our Record schema
272//! let test = Test {
273//!     a: 27,
274//!     b: "foo".to_owned(),
275//! };
276//!
277//! // schema validation happens here
278//! writer.append_ser(test).unwrap();
279//!
280//! // this is how to get back the resulting avro bytecode
281//! // this performs a flush operation to make sure data is written, so it can fail
282//! // you can also call `writer.flush()` yourself without consuming the writer
283//! let encoded = writer.into_inner();
284//! ```
285//!
286//! The vast majority of the times, schemas tend to define a record as a top-level container
287//! encapsulating all the values to convert as fields and providing documentation for them, but in
288//! case we want to directly define an Avro value, any type implementing `Serialize` should work.
289//!
290//! ```
291//! let mut value = "foo".to_string();
292//! ```
293//!
294//! ## Using codecs to compress data
295//!
296//! Avro supports three different compression codecs when encoding data:
297//!
298//! * **Null**: leaves data uncompressed;
299//! * **Deflate**: writes the data block using the deflate algorithm as specified in RFC 1951, and
300//!   typically implemented using the zlib library. Note that this format (unlike the "zlib format" in
301//!   RFC 1950) does not have a checksum.
302//! * **Snappy**: uses Google's [Snappy](http://google.github.io/snappy/) compression library. Each
303//!   compressed block is followed by the 4-byte, big-endianCRC32 checksum of the uncompressed data in
304//!   the block. You must enable the `snappy` feature to use this codec.
305//! * **Zstandard**: uses Facebook's [Zstandard](https://facebook.github.io/zstd/) compression library.
306//!   You must enable the `zstandard` feature to use this codec.
307//! * **Bzip2**: uses [BZip2](https://sourceware.org/bzip2/) compression library.
308//!   You must enable the `bzip` feature to use this codec.
309//! * **Xz**: uses [xz2](https://github.com/alexcrichton/xz2-rs) compression library.
310//!   You must enable the `xz` feature to use this codec.
311//!
312//! To specify a codec to use to compress data, just specify it while creating a `Writer`:
313//! ```
314//! use apache_avro::{Codec, DeflateSettings, Schema, Writer};
315//! #
316//! # let raw_schema = r#"
317//! #     {
318//! #         "type": "record",
319//! #         "name": "test",
320//! #         "fields": [
321//! #             {"name": "a", "type": "long", "default": 42},
322//! #             {"name": "b", "type": "string"}
323//! #         ]
324//! #     }
325//! # "#;
326//! # let schema = Schema::parse_str(raw_schema).unwrap();
327//! let mut writer = Writer::with_codec(&schema, Vec::new(), Codec::Deflate(DeflateSettings::default()));
328//! ```
329//!
330//! # Reading data
331//!
332//! As far as reading Avro encoded data goes, we can just use the schema encoded with the data to
333//! read them. The library will do it automatically for us, as it already does for the compression
334//! codec:
335//!
336//! ```
337//! use apache_avro::Reader;
338//! # use apache_avro::Schema;
339//! # use apache_avro::types::Record;
340//! # use apache_avro::Writer;
341//! #
342//! # let raw_schema = r#"
343//! #     {
344//! #         "type": "record",
345//! #         "name": "test",
346//! #         "fields": [
347//! #             {"name": "a", "type": "long", "default": 42},
348//! #             {"name": "b", "type": "string"}
349//! #         ]
350//! #     }
351//! # "#;
352//! # let schema = Schema::parse_str(raw_schema).unwrap();
353//! # let mut writer = Writer::new(&schema, Vec::new());
354//! # let mut record = Record::new(writer.schema()).unwrap();
355//! # record.put("a", 27i64);
356//! # record.put("b", "foo");
357//! # writer.append(record).unwrap();
358//! # let input = writer.into_inner().unwrap();
359//! // reader creation can fail in case the input to read from is not Avro-compatible or malformed
360//! let reader = Reader::new(&input[..]).unwrap();
361//! ```
362//!
363//! In case, instead, we want to specify a different (but compatible) reader schema from the schema
364//! the data has been written with, we can just do as the following:
365//! ```
366//! use apache_avro::Schema;
367//! use apache_avro::Reader;
368//! # use apache_avro::types::Record;
369//! # use apache_avro::Writer;
370//! #
371//! # let writer_raw_schema = r#"
372//! #     {
373//! #         "type": "record",
374//! #         "name": "test",
375//! #         "fields": [
376//! #             {"name": "a", "type": "long", "default": 42},
377//! #             {"name": "b", "type": "string"}
378//! #         ]
379//! #     }
380//! # "#;
381//! # let writer_schema = Schema::parse_str(writer_raw_schema).unwrap();
382//! # let mut writer = Writer::new(&writer_schema, Vec::new());
383//! # let mut record = Record::new(writer.schema()).unwrap();
384//! # record.put("a", 27i64);
385//! # record.put("b", "foo");
386//! # writer.append(record).unwrap();
387//! # let input = writer.into_inner().unwrap();
388//!
389//! let reader_raw_schema = r#"
390//!     {
391//!         "type": "record",
392//!         "name": "test",
393//!         "fields": [
394//!             {"name": "a", "type": "long", "default": 42},
395//!             {"name": "b", "type": "string"},
396//!             {"name": "c", "type": "long", "default": 43}
397//!         ]
398//!     }
399//! "#;
400//!
401//! let reader_schema = Schema::parse_str(reader_raw_schema).unwrap();
402//!
403//! // reader creation can fail in case the input to read from is not Avro-compatible or malformed
404//! let reader = Reader::with_schema(&reader_schema, &input[..]).unwrap();
405//! ```
406//!
407//! The library will also automatically perform schema resolution while reading the data.
408//!
409//! For more information about schema compatibility and resolution, please refer to the
410//! [Avro Specification](https://avro.apache.org/docs/current/specification/#schema-declaration).
411//!
412//! As usual, there are two ways to handle Avro data in Rust, as you can see below.
413//!
414//! **NOTE:** The library also provides a low-level interface for decoding a single datum in Avro
415//! bytecode without markers and header (for advanced use), but we highly recommend the `Reader`
416//! interface to leverage all Avro features. Please read the API reference in case you are
417//! interested.
418//!
419//!
420//! ## The avro way
421//!
422//! We can just read directly instances of `Value` out of the `Reader` iterator:
423//!
424//! ```
425//! # use apache_avro::Schema;
426//! # use apache_avro::types::Record;
427//! # use apache_avro::Writer;
428//! use apache_avro::Reader;
429//! #
430//! # let raw_schema = r#"
431//! #     {
432//! #         "type": "record",
433//! #         "name": "test",
434//! #         "fields": [
435//! #             {"name": "a", "type": "long", "default": 42},
436//! #             {"name": "b", "type": "string"}
437//! #         ]
438//! #     }
439//! # "#;
440//! # let schema = Schema::parse_str(raw_schema).unwrap();
441//! # let schema = Schema::parse_str(raw_schema).unwrap();
442//! # let mut writer = Writer::new(&schema, Vec::new());
443//! # let mut record = Record::new(writer.schema()).unwrap();
444//! # record.put("a", 27i64);
445//! # record.put("b", "foo");
446//! # writer.append(record).unwrap();
447//! # let input = writer.into_inner().unwrap();
448//! let reader = Reader::new(&input[..]).unwrap();
449//!
450//! // value is a Result  of an Avro Value in case the read operation fails
451//! for value in reader {
452//!     println!("{:?}", value.unwrap());
453//! }
454//!
455//! ```
456//!
457//! ## The serde way
458//!
459//! Alternatively, we can use a Rust type implementing `Deserialize` and representing our schema to
460//! read the data into:
461//!
462//! ```
463//! # use apache_avro::Schema;
464//! # use apache_avro::Writer;
465//! # use serde::{Deserialize, Serialize};
466//! use apache_avro::Reader;
467//! use apache_avro::from_value;
468//!
469//! # #[derive(Serialize)]
470//! #[derive(Debug, Deserialize)]
471//! struct Test {
472//!     a: i64,
473//!     b: String,
474//! }
475//!
476//! # let raw_schema = r#"
477//! #     {
478//! #         "type": "record",
479//! #         "name": "test",
480//! #         "fields": [
481//! #             {"name": "a", "type": "long", "default": 42},
482//! #             {"name": "b", "type": "string"}
483//! #         ]
484//! #     }
485//! # "#;
486//! # let schema = Schema::parse_str(raw_schema).unwrap();
487//! # let mut writer = Writer::new(&schema, Vec::new());
488//! # let test = Test {
489//! #     a: 27,
490//! #     b: "foo".to_owned(),
491//! # };
492//! # writer.append_ser(test).unwrap();
493//! # let input = writer.into_inner().unwrap();
494//! let reader = Reader::new(&input[..]).unwrap();
495//!
496//! // value is a Result in case the read operation fails
497//! for value in reader {
498//!     println!("{:?}", from_value::<Test>(&value.unwrap()));
499//! }
500//! ```
501//!
502//! # Putting everything together
503//!
504//! The following is an example of how to combine everything showed so far and it is meant to be a
505//! quick reference of the library interface:
506//!
507//! ```
508//! use apache_avro::{Codec, DeflateSettings, Reader, Schema, Writer, from_value, types::Record, Error};
509//! use serde::{Deserialize, Serialize};
510//!
511//! #[derive(Debug, Deserialize, Serialize)]
512//! struct Test {
513//!     a: i64,
514//!     b: String,
515//! }
516//!
517//! fn main() -> Result<(), Error> {
518//!     let raw_schema = r#"
519//!         {
520//!             "type": "record",
521//!             "name": "test",
522//!             "fields": [
523//!                 {"name": "a", "type": "long", "default": 42},
524//!                 {"name": "b", "type": "string"}
525//!             ]
526//!         }
527//!     "#;
528//!
529//!     let schema = Schema::parse_str(raw_schema)?;
530//!
531//!     println!("{:?}", schema);
532//!
533//!     let mut writer = Writer::with_codec(&schema, Vec::new(), Codec::Deflate(DeflateSettings::default()));
534//!
535//!     let mut record = Record::new(writer.schema()).unwrap();
536//!     record.put("a", 27i64);
537//!     record.put("b", "foo");
538//!
539//!     writer.append(record)?;
540//!
541//!     let test = Test {
542//!         a: 27,
543//!         b: "foo".to_owned(),
544//!     };
545//!
546//!     writer.append_ser(test)?;
547//!
548//!     let input = writer.into_inner()?;
549//!     let reader = Reader::with_schema(&schema, &input[..])?;
550//!
551//!     for record in reader {
552//!         println!("{:?}", from_value::<Test>(&record?));
553//!     }
554//!     Ok(())
555//! }
556//! ```
557//!
558//! `apache-avro` also supports the logical types listed in the [Avro specification](https://avro.apache.org/docs/current/specification/#logical-types):
559//!
560//! 1. `Decimal` using the [`num_bigint`](https://docs.rs/num-bigint/latest/num_bigint) crate
561//! 1. UUID using the [`uuid`](https://docs.rs/uuid/latest/uuid) crate
562//! 1. Date, Time (milli) as `i32` and Time (micro) as `i64`
563//! 1. Timestamp (milli and micro) as `i64`
564//! 1. Local timestamp (milli and micro) as `i64`
565//! 1. Duration as a custom type with `months`, `days` and `millis` accessor methods each of which returns an `i32`
566//!
567//! Note that the on-disk representation is identical to the underlying primitive/complex type.
568//!
569//! ### Read and write logical types
570//!
571//! ```rust
572//! use apache_avro::{
573//!     types::Record, types::Value, Codec, Days, Decimal, DeflateSettings, Duration, Millis, Months, Reader, Schema,
574//!     Writer, Error,
575//! };
576//! use num_bigint::ToBigInt;
577//!
578//! fn main() -> Result<(), Error> {
579//!     let raw_schema = r#"
580//!     {
581//!       "type": "record",
582//!       "name": "test",
583//!       "fields": [
584//!         {
585//!           "name": "decimal_fixed",
586//!           "type": {
587//!             "type": "fixed",
588//!             "size": 2,
589//!             "name": "decimal"
590//!           },
591//!           "logicalType": "decimal",
592//!           "precision": 4,
593//!           "scale": 2
594//!         },
595//!         {
596//!           "name": "decimal_var",
597//!           "type": "bytes",
598//!           "logicalType": "decimal",
599//!           "precision": 10,
600//!           "scale": 3
601//!         },
602//!         {
603//!           "name": "uuid",
604//!           "type": "string",
605//!           "logicalType": "uuid"
606//!         },
607//!         {
608//!           "name": "date",
609//!           "type": "int",
610//!           "logicalType": "date"
611//!         },
612//!         {
613//!           "name": "time_millis",
614//!           "type": "int",
615//!           "logicalType": "time-millis"
616//!         },
617//!         {
618//!           "name": "time_micros",
619//!           "type": "long",
620//!           "logicalType": "time-micros"
621//!         },
622//!         {
623//!           "name": "timestamp_millis",
624//!           "type": "long",
625//!           "logicalType": "timestamp-millis"
626//!         },
627//!         {
628//!           "name": "timestamp_micros",
629//!           "type": "long",
630//!           "logicalType": "timestamp-micros"
631//!         },
632//!         {
633//!           "name": "local_timestamp_millis",
634//!           "type": "long",
635//!           "logicalType": "local-timestamp-millis"
636//!         },
637//!         {
638//!           "name": "local_timestamp_micros",
639//!           "type": "long",
640//!           "logicalType": "local-timestamp-micros"
641//!         },
642//!         {
643//!           "name": "duration",
644//!           "type": {
645//!             "type": "fixed",
646//!             "size": 12,
647//!             "name": "duration"
648//!           },
649//!           "logicalType": "duration"
650//!         }
651//!       ]
652//!     }
653//!     "#;
654//!
655//!     let schema = Schema::parse_str(raw_schema)?;
656//!
657//!     println!("{:?}", schema);
658//!
659//!     let mut writer = Writer::with_codec(&schema, Vec::new(), Codec::Deflate(DeflateSettings::default()));
660//!
661//!     let mut record = Record::new(writer.schema()).unwrap();
662//!     record.put("decimal_fixed", Decimal::from(9936.to_bigint().unwrap().to_signed_bytes_be()));
663//!     record.put("decimal_var", Decimal::from(((-32442).to_bigint().unwrap()).to_signed_bytes_be()));
664//!     record.put("uuid", uuid::Uuid::parse_str("550e8400-e29b-41d4-a716-446655440000").unwrap());
665//!     record.put("date", Value::Date(1));
666//!     record.put("time_millis", Value::TimeMillis(2));
667//!     record.put("time_micros", Value::TimeMicros(3));
668//!     record.put("timestamp_millis", Value::TimestampMillis(4));
669//!     record.put("timestamp_micros", Value::TimestampMicros(5));
670//!     record.put("timestamp_nanos", Value::TimestampNanos(6));
671//!     record.put("local_timestamp_millis", Value::LocalTimestampMillis(4));
672//!     record.put("local_timestamp_micros", Value::LocalTimestampMicros(5));
673//!     record.put("local_timestamp_nanos", Value::LocalTimestampMicros(6));
674//!     record.put("duration", Duration::new(Months::new(6), Days::new(7), Millis::new(8)));
675//!
676//!     writer.append(record)?;
677//!
678//!     let input = writer.into_inner()?;
679//!     let reader = Reader::with_schema(&schema, &input[..])?;
680//!
681//!     for record in reader {
682//!         println!("{:?}", record?);
683//!     }
684//!     Ok(())
685//! }
686//! ```
687//!
688//! ## Calculate Avro schema fingerprint
689//!
690//! This library supports calculating the following fingerprints:
691//!
692//!  - SHA-256
693//!  - MD5
694//!  - Rabin
695//!
696//! An example of fingerprinting for the supported fingerprints:
697//!
698//! ```rust
699//! use apache_avro::rabin::Rabin;
700//! use apache_avro::{Schema, Error};
701//! use md5::Md5;
702//! use sha2::Sha256;
703//!
704//! fn main() -> Result<(), Error> {
705//!     let raw_schema = r#"
706//!         {
707//!             "type": "record",
708//!             "name": "test",
709//!             "fields": [
710//!                 {"name": "a", "type": "long", "default": 42},
711//!                 {"name": "b", "type": "string"}
712//!             ]
713//!         }
714//!     "#;
715//!     let schema = Schema::parse_str(raw_schema)?;
716//!     println!("{}", schema.fingerprint::<Sha256>());
717//!     println!("{}", schema.fingerprint::<Md5>());
718//!     println!("{}", schema.fingerprint::<Rabin>());
719//!     Ok(())
720//! }
721//! ```
722//!
723//! ## Ill-formed data
724//!
725//! In order to ease decoding, the Binary Encoding specification of Avro data
726//! requires some fields to have their length encoded alongside the data.
727//!
728//! If encoded data passed to a `Reader` has been ill-formed, it can happen that
729//! the bytes meant to contain the length of data are bogus and could result
730//! in extravagant memory allocation.
731//!
732//! To shield users from ill-formed data, `apache-avro` sets a limit (default: 512MB)
733//! to any allocation it will perform when decoding data.
734//!
735//! If you expect some of your data fields to be larger than this limit, be sure
736//! to make use of the `max_allocation_bytes` function before reading **any** data
737//! (we leverage Rust's [`std::sync::Once`](https://doc.rust-lang.org/std/sync/struct.Once.html)
738//! mechanism to initialize this value, if
739//! any call to decode is made before a call to `max_allocation_bytes`, the limit
740//! will be 512MB throughout the lifetime of the program).
741//!
742//!
743//! ```rust
744//! use apache_avro::max_allocation_bytes;
745//!
746//! max_allocation_bytes(2 * 1024 * 1024 * 1024);  // 2GB
747//!
748//! // ... happily decode large data
749//!
750//! ```
751//!
752//! ## Check schemas compatibility
753//!
754//! This library supports checking for schemas compatibility.
755//!
756//! Examples of checking for compatibility:
757//!
758//! 1. Compatible schemas
759//!
760//! Explanation: an int array schema can be read by a long array schema- an int
761//! (32bit signed integer) fits into a long (64bit signed integer)
762//!
763//! ```rust
764//! use apache_avro::{Schema, schema_compatibility::SchemaCompatibility};
765//!
766//! let writers_schema = Schema::parse_str(r#"{"type": "array", "items":"int"}"#).unwrap();
767//! let readers_schema = Schema::parse_str(r#"{"type": "array", "items":"long"}"#).unwrap();
768//! assert!(SchemaCompatibility::can_read(&writers_schema, &readers_schema).is_ok());
769//! ```
770//!
771//! 2. Incompatible schemas (a long array schema cannot be read by an int array schema)
772//!
773//! Explanation: a long array schema cannot be read by an int array schema- a
774//! long (64bit signed integer) does not fit into an int (32bit signed integer)
775//!
776//! ```rust
777//! use apache_avro::{Schema, schema_compatibility::SchemaCompatibility};
778//!
779//! let writers_schema = Schema::parse_str(r#"{"type": "array", "items":"long"}"#).unwrap();
780//! let readers_schema = Schema::parse_str(r#"{"type": "array", "items":"int"}"#).unwrap();
781//! assert!(SchemaCompatibility::can_read(&writers_schema, &readers_schema).is_err());
782//! ```
783//! ## Custom names validators
784//!
785//! By default the library follows the rules by the
786//! [Avro specification](https://avro.apache.org/docs/1.11.1/specification/#names)!
787//!
788//! Some of the other Apache Avro language SDKs are not that strict and allow more
789//! characters in names. For interoperability with those SDKs, the library provides
790//! a way to customize the names validation.
791//!
792//! ```rust
793//! use apache_avro::AvroResult;
794//! use apache_avro::schema::Namespace;
795//! use apache_avro::validator::{SchemaNameValidator, set_schema_name_validator};
796//!
797//! struct MyCustomValidator;
798//!
799//! impl SchemaNameValidator for MyCustomValidator {
800//!     fn validate(&self, name: &str) -> AvroResult<(String, Namespace)> {
801//!         todo!()
802//!     }
803//! }
804//!
805//! // don't parse any schema before registering the custom validator(s) !
806//!
807//! set_schema_name_validator(Box::new(MyCustomValidator));
808//!
809//! // ... use the library
810//! ```
811//!
812//! Similar logic could be applied to the schema namespace, enum symbols and field names validation.
813//!
814//! **Note**: the library allows to set a validator only once per the application lifetime!
815//! If the application parses schemas before setting a validator, the default validator will be
816//! registered and used!
817//!
818//! ## Custom schema equality comparators
819//!
820//! The library provides two implementations of schema equality comparators:
821//! 1. `SpecificationEq` - a comparator that serializes the schemas to their
822//!    canonical forms (i.e. JSON) and compares them as strings. It is the only implementation
823//!    until apache_avro 0.16.0.
824//!    See the [Avro specification](https://avro.apache.org/docs/1.11.1/specification/#parsing-canonical-form-for-schemas)
825//!    for more information!
826//! 2. `StructFieldEq` - a comparator that compares the schemas structurally.
827//!    It is faster than the `SpecificationEq` because it returns `false` as soon as a difference
828//!    is found and is recommended for use!
829//!    It is the default comparator since apache_avro 0.17.0.
830//!
831//! To use a custom comparator, you need to implement the `SchemataEq` trait and set it using the
832//! `set_schemata_equality_comparator` function:
833//!
834//! ```rust
835//! use apache_avro::{AvroResult, Schema};
836//! use apache_avro::schema::Namespace;
837//! use apache_avro::schema_equality::{SchemataEq, set_schemata_equality_comparator};
838//!
839//! #[derive(Debug)]
840//! struct MyCustomSchemataEq;
841//!
842//! impl SchemataEq for MyCustomSchemataEq {
843//!     fn compare(&self, schema_one: &Schema, schema_two: &Schema) -> bool {
844//!         todo!()
845//!     }
846//! }
847//!
848//! // don't parse any schema before registering the custom comparator !
849//!
850//! set_schemata_equality_comparator(Box::new(MyCustomSchemataEq));
851//!
852//! // ... use the library
853//! ```
854//! **Note**: the library allows to set a comparator only once per the application lifetime!
855//! If the application parses schemas before setting a comparator, the default comparator will be
856//! registered and used!
857//!
858
859mod bigdecimal;
860mod bytes;
861mod codec;
862mod de;
863mod decimal;
864mod decode;
865mod duration;
866mod encode;
867mod error;
868mod reader;
869mod ser;
870mod ser_schema;
871mod util;
872mod writer;
873
874pub mod rabin;
875pub mod schema;
876pub mod schema_compatibility;
877pub mod schema_equality;
878pub mod types;
879pub mod validator;
880
881pub use crate::{
882    bigdecimal::BigDecimal,
883    bytes::{
884        serde_avro_bytes, serde_avro_bytes_opt, serde_avro_fixed, serde_avro_fixed_opt,
885        serde_avro_slice, serde_avro_slice_opt,
886    },
887};
888#[cfg(feature = "bzip")]
889pub use codec::bzip::Bzip2Settings;
890#[cfg(feature = "xz")]
891pub use codec::xz::XzSettings;
892#[cfg(feature = "zstandard")]
893pub use codec::zstandard::ZstandardSettings;
894pub use codec::{Codec, DeflateSettings};
895pub use de::from_value;
896pub use decimal::Decimal;
897pub use duration::{Days, Duration, Millis, Months};
898pub use error::Error;
899pub use reader::{
900    from_avro_datum, from_avro_datum_reader_schemata, from_avro_datum_schemata, read_marker,
901    GenericSingleObjectReader, Reader, SpecificSingleObjectReader,
902};
903pub use schema::{AvroSchema, Schema};
904pub use ser::to_value;
905pub use util::{max_allocation_bytes, set_serde_human_readable};
906pub use uuid::Uuid;
907pub use writer::{
908    to_avro_datum, to_avro_datum_schemata, GenericSingleObjectWriter, SpecificSingleObjectWriter,
909    Writer,
910};
911
912#[cfg(feature = "derive")]
913pub use apache_avro_derive::*;
914
915/// A convenience type alias for `Result`s with `Error`s.
916pub type AvroResult<T> = Result<T, Error>;
917
918#[cfg(test)]
919mod tests {
920    use crate::{
921        from_avro_datum,
922        types::{Record, Value},
923        Codec, Reader, Schema, Writer,
924    };
925    use pretty_assertions::assert_eq;
926
927    //TODO: move where it fits better
928    #[test]
929    fn test_enum_default() {
930        let writer_raw_schema = r#"
931            {
932                "type": "record",
933                "name": "test",
934                "fields": [
935                    {"name": "a", "type": "long", "default": 42},
936                    {"name": "b", "type": "string"}
937                ]
938            }
939        "#;
940        let reader_raw_schema = r#"
941            {
942                "type": "record",
943                "name": "test",
944                "fields": [
945                    {"name": "a", "type": "long", "default": 42},
946                    {"name": "b", "type": "string"},
947                    {
948                        "name": "c",
949                        "type": {
950                            "type": "enum",
951                            "name": "suit",
952                            "symbols": ["diamonds", "spades", "clubs", "hearts"]
953                        },
954                        "default": "spades"
955                    }
956                ]
957            }
958        "#;
959        let writer_schema = Schema::parse_str(writer_raw_schema).unwrap();
960        let reader_schema = Schema::parse_str(reader_raw_schema).unwrap();
961        let mut writer = Writer::with_codec(&writer_schema, Vec::new(), Codec::Null);
962        let mut record = Record::new(writer.schema()).unwrap();
963        record.put("a", 27i64);
964        record.put("b", "foo");
965        writer.append(record).unwrap();
966        let input = writer.into_inner().unwrap();
967        let mut reader = Reader::with_schema(&reader_schema, &input[..]).unwrap();
968        assert_eq!(
969            reader.next().unwrap().unwrap(),
970            Value::Record(vec![
971                ("a".to_string(), Value::Long(27)),
972                ("b".to_string(), Value::String("foo".to_string())),
973                ("c".to_string(), Value::Enum(1, "spades".to_string())),
974            ])
975        );
976        assert!(reader.next().is_none());
977    }
978
979    //TODO: move where it fits better
980    #[test]
981    fn test_enum_string_value() {
982        let raw_schema = r#"
983            {
984                "type": "record",
985                "name": "test",
986                "fields": [
987                    {"name": "a", "type": "long", "default": 42},
988                    {"name": "b", "type": "string"},
989                    {
990                        "name": "c",
991                        "type": {
992                            "type": "enum",
993                            "name": "suit",
994                            "symbols": ["diamonds", "spades", "clubs", "hearts"]
995                        },
996                        "default": "spades"
997                    }
998                ]
999            }
1000        "#;
1001        let schema = Schema::parse_str(raw_schema).unwrap();
1002        let mut writer = Writer::with_codec(&schema, Vec::new(), Codec::Null);
1003        let mut record = Record::new(writer.schema()).unwrap();
1004        record.put("a", 27i64);
1005        record.put("b", "foo");
1006        record.put("c", "clubs");
1007        writer.append(record).unwrap();
1008        let input = writer.into_inner().unwrap();
1009        let mut reader = Reader::with_schema(&schema, &input[..]).unwrap();
1010        assert_eq!(
1011            reader.next().unwrap().unwrap(),
1012            Value::Record(vec![
1013                ("a".to_string(), Value::Long(27)),
1014                ("b".to_string(), Value::String("foo".to_string())),
1015                ("c".to_string(), Value::Enum(2, "clubs".to_string())),
1016            ])
1017        );
1018        assert!(reader.next().is_none());
1019    }
1020
1021    //TODO: move where it fits better
1022    #[test]
1023    fn test_enum_no_reader_schema() {
1024        let writer_raw_schema = r#"
1025            {
1026                "type": "record",
1027                "name": "test",
1028                "fields": [
1029                    {"name": "a", "type": "long", "default": 42},
1030                    {"name": "b", "type": "string"},
1031                    {
1032                        "name": "c",
1033                        "type": {
1034                            "type": "enum",
1035                            "name": "suit",
1036                            "symbols": ["diamonds", "spades", "clubs", "hearts"]
1037                        },
1038                        "default": "spades"
1039                    }
1040                ]
1041            }
1042        "#;
1043        let writer_schema = Schema::parse_str(writer_raw_schema).unwrap();
1044        let mut writer = Writer::with_codec(&writer_schema, Vec::new(), Codec::Null);
1045        let mut record = Record::new(writer.schema()).unwrap();
1046        record.put("a", 27i64);
1047        record.put("b", "foo");
1048        record.put("c", "clubs");
1049        writer.append(record).unwrap();
1050        let input = writer.into_inner().unwrap();
1051        let mut reader = Reader::new(&input[..]).unwrap();
1052        assert_eq!(
1053            reader.next().unwrap().unwrap(),
1054            Value::Record(vec![
1055                ("a".to_string(), Value::Long(27)),
1056                ("b".to_string(), Value::String("foo".to_string())),
1057                ("c".to_string(), Value::Enum(2, "clubs".to_string())),
1058            ])
1059        );
1060    }
1061
1062    #[test]
1063    fn test_illformed_length() {
1064        let raw_schema = r#"
1065            {
1066                "type": "record",
1067                "name": "test",
1068                "fields": [
1069                    {"name": "a", "type": "long", "default": 42},
1070                    {"name": "b", "type": "string"}
1071                ]
1072            }
1073        "#;
1074
1075        let schema = Schema::parse_str(raw_schema).unwrap();
1076
1077        // Would allocated 18446744073709551605 bytes
1078        let illformed: &[u8] = &[0x3e, 0x15, 0xff, 0x1f, 0x15, 0xff];
1079
1080        let value = from_avro_datum(&schema, &mut &*illformed, None);
1081        assert!(value.is_err());
1082    }
1083}