apache_avro/
lib.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! [![rust continuous integration][rust continuous integration img]][rust continuous integration]
19//! [![rust clippy check][rust clippy check img]][rust clippy check]
20//! [![rust security audit][rust security audit img]][rust security audit]
21//!
22//! [rust continuous integration]: https://github.com/apache/avro-rs/actions/workflows/test-lang-rust-ci.yml
23//! [rust clippy check]:           https://github.com/apache/avro-rs/actions/workflows/test-lang-rust-clippy.yml
24//! [rust security audit]:         https://github.com/apache/avro-rs/actions/workflows/test-lang-rust-audit.yml
25//!
26//! [rust continuous integration img]: https://github.com/apache/avro-rs/actions/workflows/test-lang-rust-ci.yml/badge.svg
27//! [rust clippy check img]:           https://github.com/apache/avro-rs/actions/workflows/test-lang-rust-clippy.yml/badge.svg
28//! [rust security audit img]:         https://github.com/apache/avro-rs/actions/workflows/test-lang-rust-audit.yml/badge.svg
29//!
30//! A library for working with [Apache Avro](https://avro.apache.org/) in Rust.
31//!
32//! Please check our [documentation](https://docs.rs/apache-avro) for examples, tutorials and API reference.
33//!
34//! **[Apache Avro](https://avro.apache.org/)** is a data serialization system which provides rich
35//! data structures and a compact, fast, binary data format.
36//!
37//! All data in Avro is schematized, as in the following example:
38//!
39//! ```json
40//! {
41//!     "type": "record",
42//!     "name": "test",
43//!     "fields": [
44//!         {"name": "a", "type": "long", "default": 42},
45//!         {"name": "b", "type": "string"}
46//!     ]
47//! }
48//! ```
49//!
50//! There are basically two ways of handling Avro data in Rust:
51//!
52//! * **as Avro-specialized data types** based on an Avro schema;
53//! * **as generic Rust serde-compatible types** implementing/deriving `Serialize` and `Deserialize`;
54//!
55//! **apache-avro** provides a way to read and write both these data representations easily and
56//! efficiently.
57//!
58//! # Installing the library
59//!
60//!
61//! Add to your `Cargo.toml`:
62//!
63//! ```toml
64//! [dependencies]
65//! apache-avro = "x.y"
66//! ```
67//!
68//! Or in case you want to leverage the **Snappy** codec:
69//!
70//! ```toml
71//! [dependencies.apache-avro]
72//! version = "x.y"
73//! features = ["snappy"]
74//! ```
75//!
76//! Or in case you want to leverage the **Zstandard** codec:
77//!
78//! ```toml
79//! [dependencies.apache-avro]
80//! version = "x.y"
81//! features = ["zstandard"]
82//! ```
83//!
84//! Or in case you want to leverage the **Bzip2** codec:
85//!
86//! ```toml
87//! [dependencies.apache-avro]
88//! version = "x.y"
89//! features = ["bzip"]
90//! ```
91//!
92//! Or in case you want to leverage the **Xz** codec:
93//!
94//! ```toml
95//! [dependencies.apache-avro]
96//! version = "x.y"
97//! features = ["xz"]
98//! ```
99//!
100//! # Upgrading to a newer minor version
101//!
102//! The library is still in beta, so there might be backward-incompatible changes between minor
103//! versions. If you have troubles upgrading, check the release notes.
104//!
105//! # Minimum supported Rust version
106//!
107//! 1.85.0
108//!
109//! # Defining a schema
110//!
111//! An Avro data cannot exist without an Avro schema. Schemas **must** be used while writing and
112//! **can** be used while reading and they carry the information regarding the type of data we are
113//! handling. Avro schemas are used for both schema validation and resolution of Avro data.
114//!
115//! Avro schemas are defined in **JSON** format and can just be parsed out of a raw string:
116//!
117//! ```
118//! use apache_avro::Schema;
119//!
120//! let raw_schema = r#"
121//!     {
122//!         "type": "record",
123//!         "name": "test",
124//!         "fields": [
125//!             {"name": "a", "type": "long", "default": 42},
126//!             {"name": "b", "type": "string"}
127//!         ]
128//!     }
129//! "#;
130//!
131//! // if the schema is not valid, this function will return an error
132//! let schema = Schema::parse_str(raw_schema).unwrap();
133//!
134//! // schemas can be printed for debugging
135//! println!("{:?}", schema);
136//! ```
137//!
138//! Additionally, a list of of definitions (which may depend on each other) can be given and all of
139//! them will be parsed into the corresponding schemas.
140//!
141//! ```
142//! use apache_avro::Schema;
143//!
144//! let raw_schema_1 = r#"{
145//!         "name": "A",
146//!         "type": "record",
147//!         "fields": [
148//!             {"name": "field_one", "type": "float"}
149//!         ]
150//!     }"#;
151//!
152//! // This definition depends on the definition of A above
153//! let raw_schema_2 = r#"{
154//!         "name": "B",
155//!         "type": "record",
156//!         "fields": [
157//!             {"name": "field_one", "type": "A"}
158//!         ]
159//!     }"#;
160//!
161//! // if the schemas are not valid, this function will return an error
162//! let schemas = Schema::parse_list(&[raw_schema_1, raw_schema_2]).unwrap();
163//!
164//! // schemas can be printed for debugging
165//! println!("{:?}", schemas);
166//! ```
167//! *N.B.* It is important to note that the composition of schema definitions requires schemas with names.
168//! For this reason, only schemas of type Record, Enum, and Fixed should be input into this function.
169//!
170//! The library provides also a programmatic interface to define schemas without encoding them in
171//! JSON (for advanced use), but we highly recommend the JSON interface. Please read the API
172//! reference in case you are interested.
173//!
174//! For more information about schemas and what kind of information you can encapsulate in them,
175//! please refer to the appropriate section of the
176//! [Avro Specification](https://avro.apache.org/docs/current/specification/#schema-declaration).
177//!
178//! # Writing data
179//!
180//! Once we have defined a schema, we are ready to serialize data in Avro, validating them against
181//! the provided schema in the process. As mentioned before, there are two ways of handling Avro
182//! data in Rust.
183//!
184//! **NOTE:** The library also provides a low-level interface for encoding a single datum in Avro
185//! bytecode without generating markers and headers (for advanced use), but we highly recommend the
186//! `Writer` interface to be totally Avro-compatible. Please read the API reference in case you are
187//! interested.
188//!
189//! ## The avro way
190//!
191//! Given that the schema we defined above is that of an Avro *Record*, we are going to use the
192//! associated type provided by the library to specify the data we want to serialize:
193//!
194//! ```
195//! # use apache_avro::Schema;
196//! use apache_avro::types::Record;
197//! use apache_avro::Writer;
198//! #
199//! # let raw_schema = r#"
200//! #     {
201//! #         "type": "record",
202//! #         "name": "test",
203//! #         "fields": [
204//! #             {"name": "a", "type": "long", "default": 42},
205//! #             {"name": "b", "type": "string"}
206//! #         ]
207//! #     }
208//! # "#;
209//! # let schema = Schema::parse_str(raw_schema).unwrap();
210//! // a writer needs a schema and something to write to
211//! let mut writer = Writer::new(&schema, Vec::new());
212//!
213//! // the Record type models our Record schema
214//! let mut record = Record::new(writer.schema()).unwrap();
215//! record.put("a", 27i64);
216//! record.put("b", "foo");
217//!
218//! // schema validation happens here
219//! writer.append(record).unwrap();
220//!
221//! // this is how to get back the resulting avro bytecode
222//! // this performs a flush operation to make sure data has been written, so it can fail
223//! // you can also call `writer.flush()` yourself without consuming the writer
224//! let encoded = writer.into_inner().unwrap();
225//! ```
226//!
227//! The vast majority of the times, schemas tend to define a record as a top-level container
228//! encapsulating all the values to convert as fields and providing documentation for them, but in
229//! case we want to directly define an Avro value, the library offers that capability via the
230//! `Value` interface.
231//!
232//! ```
233//! use apache_avro::types::Value;
234//!
235//! let mut value = Value::String("foo".to_string());
236//! ```
237//!
238//! ## The serde way
239//!
240//! Given that the schema we defined above is an Avro *Record*, we can directly use a Rust struct
241//! deriving `Serialize` to model our data:
242//!
243//! ```
244//! # use apache_avro::Schema;
245//! # use serde::Serialize;
246//! use apache_avro::Writer;
247//!
248//! #[derive(Debug, Serialize)]
249//! struct Test {
250//!     a: i64,
251//!     b: String,
252//! }
253//!
254//! # let raw_schema = r#"
255//! #     {
256//! #         "type": "record",
257//! #         "name": "test",
258//! #         "fields": [
259//! #             {"name": "a", "type": "long", "default": 42},
260//! #             {"name": "b", "type": "string"}
261//! #         ]
262//! #     }
263//! # "#;
264//! # let schema = Schema::parse_str(raw_schema).unwrap();
265//! // a writer needs a schema and something to write to
266//! let mut writer = Writer::new(&schema, Vec::new());
267//!
268//! // the structure models our Record schema
269//! let test = Test {
270//!     a: 27,
271//!     b: "foo".to_owned(),
272//! };
273//!
274//! // schema validation happens here
275//! writer.append_ser(test).unwrap();
276//!
277//! // this is how to get back the resulting avro bytecode
278//! // this performs a flush operation to make sure data is written, so it can fail
279//! // you can also call `writer.flush()` yourself without consuming the writer
280//! let encoded = writer.into_inner();
281//! ```
282//!
283//! ### Importance of the fields' order
284//!
285//! *Important*: The order of the fields in the struct must match the order of the fields in the Avro schema!
286//!
287//! ### Simple types
288//!
289//! The vast majority of the times, schemas tend to define a record as a top-level container
290//! encapsulating all the values to convert as fields and providing documentation for them, but in
291//! case we want to directly define an Avro value, any type implementing `Serialize` should work.
292//!
293//! ```
294//! let value = "foo".to_string();
295//! ```
296//!
297//! ## Using codecs to compress data
298//!
299//! Avro supports three different compression codecs when encoding data:
300//!
301//! * **Null**: leaves data uncompressed;
302//! * **Deflate**: writes the data block using the deflate algorithm as specified in RFC 1951, and
303//!   typically implemented using the zlib library. Note that this format (unlike the "zlib format" in
304//!   RFC 1950) does not have a checksum.
305//! * **Snappy**: uses Google's [Snappy](http://google.github.io/snappy/) compression library. Each
306//!   compressed block is followed by the 4-byte, big-endianCRC32 checksum of the uncompressed data in
307//!   the block. You must enable the `snappy` feature to use this codec.
308//! * **Zstandard**: uses Facebook's [Zstandard](https://facebook.github.io/zstd/) compression library.
309//!   You must enable the `zstandard` feature to use this codec.
310//! * **Bzip2**: uses [BZip2](https://sourceware.org/bzip2/) compression library.
311//!   You must enable the `bzip` feature to use this codec.
312//! * **Xz**: uses [liblzma](https://github.com/portable-network-archive/liblzma-rs) compression library.
313//!   You must enable the `xz` feature to use this codec.
314//!
315//! To specify a codec to use to compress data, just specify it while creating a `Writer`:
316//! ```
317//! use apache_avro::{Codec, DeflateSettings, Schema, Writer};
318//! #
319//! # let raw_schema = r#"
320//! #     {
321//! #         "type": "record",
322//! #         "name": "test",
323//! #         "fields": [
324//! #             {"name": "a", "type": "long", "default": 42},
325//! #             {"name": "b", "type": "string"}
326//! #         ]
327//! #     }
328//! # "#;
329//! # let schema = Schema::parse_str(raw_schema).unwrap();
330//! let mut writer = Writer::with_codec(&schema, Vec::new(), Codec::Deflate(DeflateSettings::default()));
331//! ```
332//!
333//! # Reading data
334//!
335//! As far as reading Avro encoded data goes, we can just use the schema encoded with the data to
336//! read them. The library will do it automatically for us, as it already does for the compression
337//! codec:
338//!
339//! ```
340//! use apache_avro::Reader;
341//! # use apache_avro::Schema;
342//! # use apache_avro::types::Record;
343//! # use apache_avro::Writer;
344//! #
345//! # let raw_schema = r#"
346//! #     {
347//! #         "type": "record",
348//! #         "name": "test",
349//! #         "fields": [
350//! #             {"name": "a", "type": "long", "default": 42},
351//! #             {"name": "b", "type": "string"}
352//! #         ]
353//! #     }
354//! # "#;
355//! # let schema = Schema::parse_str(raw_schema).unwrap();
356//! # let mut writer = Writer::new(&schema, Vec::new());
357//! # let mut record = Record::new(writer.schema()).unwrap();
358//! # record.put("a", 27i64);
359//! # record.put("b", "foo");
360//! # writer.append(record).unwrap();
361//! # let input = writer.into_inner().unwrap();
362//! // reader creation can fail in case the input to read from is not Avro-compatible or malformed
363//! let reader = Reader::new(&input[..]).unwrap();
364//! ```
365//!
366//! In case, instead, we want to specify a different (but compatible) reader schema from the schema
367//! the data has been written with, we can just do as the following:
368//! ```
369//! use apache_avro::Schema;
370//! use apache_avro::Reader;
371//! # use apache_avro::types::Record;
372//! # use apache_avro::Writer;
373//! #
374//! # let writer_raw_schema = r#"
375//! #     {
376//! #         "type": "record",
377//! #         "name": "test",
378//! #         "fields": [
379//! #             {"name": "a", "type": "long", "default": 42},
380//! #             {"name": "b", "type": "string"}
381//! #         ]
382//! #     }
383//! # "#;
384//! # let writer_schema = Schema::parse_str(writer_raw_schema).unwrap();
385//! # let mut writer = Writer::new(&writer_schema, Vec::new());
386//! # let mut record = Record::new(writer.schema()).unwrap();
387//! # record.put("a", 27i64);
388//! # record.put("b", "foo");
389//! # writer.append(record).unwrap();
390//! # let input = writer.into_inner().unwrap();
391//!
392//! let reader_raw_schema = r#"
393//!     {
394//!         "type": "record",
395//!         "name": "test",
396//!         "fields": [
397//!             {"name": "a", "type": "long", "default": 42},
398//!             {"name": "b", "type": "string"},
399//!             {"name": "c", "type": "long", "default": 43}
400//!         ]
401//!     }
402//! "#;
403//!
404//! let reader_schema = Schema::parse_str(reader_raw_schema).unwrap();
405//!
406//! // reader creation can fail in case the input to read from is not Avro-compatible or malformed
407//! let reader = Reader::with_schema(&reader_schema, &input[..]).unwrap();
408//! ```
409//!
410//! The library will also automatically perform schema resolution while reading the data.
411//!
412//! For more information about schema compatibility and resolution, please refer to the
413//! [Avro Specification](https://avro.apache.org/docs/current/specification/#schema-declaration).
414//!
415//! As usual, there are two ways to handle Avro data in Rust, as you can see below.
416//!
417//! **NOTE:** The library also provides a low-level interface for decoding a single datum in Avro
418//! bytecode without markers and header (for advanced use), but we highly recommend the `Reader`
419//! interface to leverage all Avro features. Please read the API reference in case you are
420//! interested.
421//!
422//!
423//! ## The avro way
424//!
425//! We can just read directly instances of `Value` out of the `Reader` iterator:
426//!
427//! ```
428//! # use apache_avro::Schema;
429//! # use apache_avro::types::Record;
430//! # use apache_avro::Writer;
431//! use apache_avro::Reader;
432//! #
433//! # let raw_schema = r#"
434//! #     {
435//! #         "type": "record",
436//! #         "name": "test",
437//! #         "fields": [
438//! #             {"name": "a", "type": "long", "default": 42},
439//! #             {"name": "b", "type": "string"}
440//! #         ]
441//! #     }
442//! # "#;
443//! # let schema = Schema::parse_str(raw_schema).unwrap();
444//! # let schema = Schema::parse_str(raw_schema).unwrap();
445//! # let mut writer = Writer::new(&schema, Vec::new());
446//! # let mut record = Record::new(writer.schema()).unwrap();
447//! # record.put("a", 27i64);
448//! # record.put("b", "foo");
449//! # writer.append(record).unwrap();
450//! # let input = writer.into_inner().unwrap();
451//! let reader = Reader::new(&input[..]).unwrap();
452//!
453//! // value is a Result  of an Avro Value in case the read operation fails
454//! for value in reader {
455//!     println!("{:?}", value.unwrap());
456//! }
457//!
458//! ```
459//!
460//! ## The serde way
461//!
462//! Alternatively, we can use a Rust type implementing `Deserialize` and representing our schema to
463//! read the data into:
464//!
465//! ```
466//! # use apache_avro::Schema;
467//! # use apache_avro::Writer;
468//! # use serde::{Deserialize, Serialize};
469//! use apache_avro::Reader;
470//! use apache_avro::from_value;
471//!
472//! # #[derive(Serialize)]
473//! #[derive(Debug, Deserialize)]
474//! struct Test {
475//!     a: i64,
476//!     b: String,
477//! }
478//!
479//! # let raw_schema = r#"
480//! #     {
481//! #         "type": "record",
482//! #         "name": "test",
483//! #         "fields": [
484//! #             {"name": "a", "type": "long", "default": 42},
485//! #             {"name": "b", "type": "string"}
486//! #         ]
487//! #     }
488//! # "#;
489//! # let schema = Schema::parse_str(raw_schema).unwrap();
490//! # let mut writer = Writer::new(&schema, Vec::new());
491//! # let test = Test {
492//! #     a: 27,
493//! #     b: "foo".to_owned(),
494//! # };
495//! # writer.append_ser(test).unwrap();
496//! # let input = writer.into_inner().unwrap();
497//! let reader = Reader::new(&input[..]).unwrap();
498//!
499//! // value is a Result in case the read operation fails
500//! for value in reader {
501//!     println!("{:?}", from_value::<Test>(&value.unwrap()));
502//! }
503//! ```
504//!
505//! # Putting everything together
506//!
507//! The following is an example of how to combine everything showed so far and it is meant to be a
508//! quick reference of the library interface:
509//!
510//! ```
511//! use apache_avro::{Codec, DeflateSettings, Reader, Schema, Writer, from_value, types::Record, Error};
512//! use serde::{Deserialize, Serialize};
513//!
514//! #[derive(Debug, Deserialize, Serialize)]
515//! struct Test {
516//!     a: i64,
517//!     b: String,
518//! }
519//!
520//! fn main() -> Result<(), Error> {
521//!     let raw_schema = r#"
522//!         {
523//!             "type": "record",
524//!             "name": "test",
525//!             "fields": [
526//!                 {"name": "a", "type": "long", "default": 42},
527//!                 {"name": "b", "type": "string"}
528//!             ]
529//!         }
530//!     "#;
531//!
532//!     let schema = Schema::parse_str(raw_schema)?;
533//!
534//!     println!("{:?}", schema);
535//!
536//!     let mut writer = Writer::with_codec(&schema, Vec::new(), Codec::Deflate(DeflateSettings::default()));
537//!
538//!     let mut record = Record::new(writer.schema()).unwrap();
539//!     record.put("a", 27i64);
540//!     record.put("b", "foo");
541//!
542//!     writer.append(record)?;
543//!
544//!     let test = Test {
545//!         a: 27,
546//!         b: "foo".to_owned(),
547//!     };
548//!
549//!     writer.append_ser(test)?;
550//!
551//!     let input = writer.into_inner()?;
552//!     let reader = Reader::with_schema(&schema, &input[..])?;
553//!
554//!     for record in reader {
555//!         println!("{:?}", from_value::<Test>(&record?));
556//!     }
557//!     Ok(())
558//! }
559//! ```
560//!
561//! `apache-avro` also supports the logical types listed in the [Avro specification](https://avro.apache.org/docs/current/specification/#logical-types):
562//!
563//! 1. `Decimal` using the [`num_bigint`](https://docs.rs/num-bigint/latest/num_bigint) crate
564//! 1. UUID using the [`uuid`](https://docs.rs/uuid/latest/uuid) crate
565//! 1. Date, Time (milli) as `i32` and Time (micro) as `i64`
566//! 1. Timestamp (milli and micro) as `i64`
567//! 1. Local timestamp (milli and micro) as `i64`
568//! 1. Duration as a custom type with `months`, `days` and `millis` accessor methods each of which returns an `i32`
569//!
570//! Note that the on-disk representation is identical to the underlying primitive/complex type.
571//!
572//! ### Read and write logical types
573//!
574//! ```rust
575//! use apache_avro::{
576//!     types::Record, types::Value, Codec, Days, Decimal, DeflateSettings, Duration, Millis, Months, Reader, Schema,
577//!     Writer, Error,
578//! };
579//! use num_bigint::ToBigInt;
580//!
581//! fn main() -> Result<(), Error> {
582//!     let raw_schema = r#"
583//!     {
584//!       "type": "record",
585//!       "name": "test",
586//!       "fields": [
587//!         {
588//!           "name": "decimal_fixed",
589//!           "type": {
590//!             "type": "fixed",
591//!             "size": 2,
592//!             "name": "decimal"
593//!           },
594//!           "logicalType": "decimal",
595//!           "precision": 4,
596//!           "scale": 2
597//!         },
598//!         {
599//!           "name": "decimal_var",
600//!           "type": "bytes",
601//!           "logicalType": "decimal",
602//!           "precision": 10,
603//!           "scale": 3
604//!         },
605//!         {
606//!           "name": "uuid",
607//!           "type": "string",
608//!           "logicalType": "uuid"
609//!         },
610//!         {
611//!           "name": "date",
612//!           "type": "int",
613//!           "logicalType": "date"
614//!         },
615//!         {
616//!           "name": "time_millis",
617//!           "type": "int",
618//!           "logicalType": "time-millis"
619//!         },
620//!         {
621//!           "name": "time_micros",
622//!           "type": "long",
623//!           "logicalType": "time-micros"
624//!         },
625//!         {
626//!           "name": "timestamp_millis",
627//!           "type": "long",
628//!           "logicalType": "timestamp-millis"
629//!         },
630//!         {
631//!           "name": "timestamp_micros",
632//!           "type": "long",
633//!           "logicalType": "timestamp-micros"
634//!         },
635//!         {
636//!           "name": "local_timestamp_millis",
637//!           "type": "long",
638//!           "logicalType": "local-timestamp-millis"
639//!         },
640//!         {
641//!           "name": "local_timestamp_micros",
642//!           "type": "long",
643//!           "logicalType": "local-timestamp-micros"
644//!         },
645//!         {
646//!           "name": "duration",
647//!           "type": {
648//!             "type": "fixed",
649//!             "size": 12,
650//!             "name": "duration"
651//!           },
652//!           "logicalType": "duration"
653//!         }
654//!       ]
655//!     }
656//!     "#;
657//!
658//!     let schema = Schema::parse_str(raw_schema)?;
659//!
660//!     println!("{:?}", schema);
661//!
662//!     let mut writer = Writer::with_codec(&schema, Vec::new(), Codec::Deflate(DeflateSettings::default()));
663//!
664//!     let mut record = Record::new(writer.schema()).unwrap();
665//!     record.put("decimal_fixed", Decimal::from(9936.to_bigint().unwrap().to_signed_bytes_be()));
666//!     record.put("decimal_var", Decimal::from(((-32442).to_bigint().unwrap()).to_signed_bytes_be()));
667//!     record.put("uuid", uuid::Uuid::parse_str("550e8400-e29b-41d4-a716-446655440000").unwrap());
668//!     record.put("date", Value::Date(1));
669//!     record.put("time_millis", Value::TimeMillis(2));
670//!     record.put("time_micros", Value::TimeMicros(3));
671//!     record.put("timestamp_millis", Value::TimestampMillis(4));
672//!     record.put("timestamp_micros", Value::TimestampMicros(5));
673//!     record.put("timestamp_nanos", Value::TimestampNanos(6));
674//!     record.put("local_timestamp_millis", Value::LocalTimestampMillis(4));
675//!     record.put("local_timestamp_micros", Value::LocalTimestampMicros(5));
676//!     record.put("local_timestamp_nanos", Value::LocalTimestampMicros(6));
677//!     record.put("duration", Duration::new(Months::new(6), Days::new(7), Millis::new(8)));
678//!
679//!     writer.append(record)?;
680//!
681//!     let input = writer.into_inner()?;
682//!     let reader = Reader::with_schema(&schema, &input[..])?;
683//!
684//!     for record in reader {
685//!         println!("{:?}", record?);
686//!     }
687//!     Ok(())
688//! }
689//! ```
690//!
691//! ## Calculate Avro schema fingerprint
692//!
693//! This library supports calculating the following fingerprints:
694//!
695//!  - SHA-256
696//!  - MD5
697//!  - Rabin
698//!
699//! An example of fingerprinting for the supported fingerprints:
700//!
701//! ```rust
702//! use apache_avro::rabin::Rabin;
703//! use apache_avro::{Schema, Error};
704//! use md5::Md5;
705//! use sha2::Sha256;
706//!
707//! fn main() -> Result<(), Error> {
708//!     let raw_schema = r#"
709//!         {
710//!             "type": "record",
711//!             "name": "test",
712//!             "fields": [
713//!                 {"name": "a", "type": "long", "default": 42},
714//!                 {"name": "b", "type": "string"}
715//!             ]
716//!         }
717//!     "#;
718//!     let schema = Schema::parse_str(raw_schema)?;
719//!     println!("{}", schema.fingerprint::<Sha256>());
720//!     println!("{}", schema.fingerprint::<Md5>());
721//!     println!("{}", schema.fingerprint::<Rabin>());
722//!     Ok(())
723//! }
724//! ```
725//!
726//! ## Ill-formed data
727//!
728//! In order to ease decoding, the Binary Encoding specification of Avro data
729//! requires some fields to have their length encoded alongside the data.
730//!
731//! If encoded data passed to a `Reader` has been ill-formed, it can happen that
732//! the bytes meant to contain the length of data are bogus and could result
733//! in extravagant memory allocation.
734//!
735//! To shield users from ill-formed data, `apache-avro` sets a limit (default: 512MB)
736//! to any allocation it will perform when decoding data.
737//!
738//! If you expect some of your data fields to be larger than this limit, be sure
739//! to make use of the `max_allocation_bytes` function before reading **any** data
740//! (we leverage Rust's [`std::sync::Once`](https://doc.rust-lang.org/std/sync/struct.Once.html)
741//! mechanism to initialize this value, if
742//! any call to decode is made before a call to `max_allocation_bytes`, the limit
743//! will be 512MB throughout the lifetime of the program).
744//!
745//!
746//! ```rust
747//! use apache_avro::max_allocation_bytes;
748//!
749//! max_allocation_bytes(2 * 1024 * 1024 * 1024);  // 2GB
750//!
751//! // ... happily decode large data
752//!
753//! ```
754//!
755//! ## Check schemas compatibility
756//!
757//! This library supports checking for schemas compatibility.
758//!
759//! Examples of checking for compatibility:
760//!
761//! 1. Compatible schemas
762//!
763//! Explanation: an int array schema can be read by a long array schema- an int
764//! (32bit signed integer) fits into a long (64bit signed integer)
765//!
766//! ```rust
767//! use apache_avro::{Schema, schema_compatibility::SchemaCompatibility};
768//!
769//! let writers_schema = Schema::parse_str(r#"{"type": "array", "items":"int"}"#).unwrap();
770//! let readers_schema = Schema::parse_str(r#"{"type": "array", "items":"long"}"#).unwrap();
771//! assert!(SchemaCompatibility::can_read(&writers_schema, &readers_schema).is_ok());
772//! ```
773//!
774//! 2. Incompatible schemas (a long array schema cannot be read by an int array schema)
775//!
776//! Explanation: a long array schema cannot be read by an int array schema- a
777//! long (64bit signed integer) does not fit into an int (32bit signed integer)
778//!
779//! ```rust
780//! use apache_avro::{Schema, schema_compatibility::SchemaCompatibility};
781//!
782//! let writers_schema = Schema::parse_str(r#"{"type": "array", "items":"long"}"#).unwrap();
783//! let readers_schema = Schema::parse_str(r#"{"type": "array", "items":"int"}"#).unwrap();
784//! assert!(SchemaCompatibility::can_read(&writers_schema, &readers_schema).is_err());
785//! ```
786//! ## Custom names validators
787//!
788//! By default the library follows the rules by the
789//! [Avro specification](https://avro.apache.org/docs/1.11.1/specification/#names)!
790//!
791//! Some of the other Apache Avro language SDKs are not that strict and allow more
792//! characters in names. For interoperability with those SDKs, the library provides
793//! a way to customize the names validation.
794//!
795//! ```rust
796//! use apache_avro::AvroResult;
797//! use apache_avro::schema::Namespace;
798//! use apache_avro::validator::{SchemaNameValidator, set_schema_name_validator};
799//!
800//! struct MyCustomValidator;
801//!
802//! impl SchemaNameValidator for MyCustomValidator {
803//!     fn validate(&self, name: &str) -> AvroResult<(String, Namespace)> {
804//!         todo!()
805//!     }
806//! }
807//!
808//! // don't parse any schema before registering the custom validator(s) !
809//!
810//! set_schema_name_validator(Box::new(MyCustomValidator));
811//!
812//! // ... use the library
813//! ```
814//!
815//! Similar logic could be applied to the schema namespace, enum symbols and field names validation.
816//!
817//! **Note**: the library allows to set a validator only once per the application lifetime!
818//! If the application parses schemas before setting a validator, the default validator will be
819//! registered and used!
820//!
821//! ## Custom schema equality comparators
822//!
823//! The library provides two implementations of schema equality comparators:
824//! 1. `SpecificationEq` - a comparator that serializes the schemas to their
825//!    canonical forms (i.e. JSON) and compares them as strings. It is the only implementation
826//!    until apache_avro 0.16.0.
827//!    See the [Avro specification](https://avro.apache.org/docs/1.11.1/specification/#parsing-canonical-form-for-schemas)
828//!    for more information!
829//! 2. `StructFieldEq` - a comparator that compares the schemas structurally.
830//!    It is faster than the `SpecificationEq` because it returns `false` as soon as a difference
831//!    is found and is recommended for use!
832//!    It is the default comparator since apache_avro 0.17.0.
833//!
834//! To use a custom comparator, you need to implement the `SchemataEq` trait and set it using the
835//! `set_schemata_equality_comparator` function:
836//!
837//! ```rust
838//! use apache_avro::{AvroResult, Schema};
839//! use apache_avro::schema::Namespace;
840//! use apache_avro::schema_equality::{SchemataEq, set_schemata_equality_comparator};
841//!
842//! #[derive(Debug)]
843//! struct MyCustomSchemataEq;
844//!
845//! impl SchemataEq for MyCustomSchemataEq {
846//!     fn compare(&self, schema_one: &Schema, schema_two: &Schema) -> bool {
847//!         todo!()
848//!     }
849//! }
850//!
851//! // don't parse any schema before registering the custom comparator !
852//!
853//! set_schemata_equality_comparator(Box::new(MyCustomSchemataEq));
854//!
855//! // ... use the library
856//! ```
857//! **Note**: the library allows to set a comparator only once per the application lifetime!
858//! If the application parses schemas before setting a comparator, the default comparator will be
859//! registered and used!
860//!
861
862mod bigdecimal;
863mod bytes;
864mod codec;
865mod de;
866mod decimal;
867mod decode;
868mod duration;
869mod encode;
870mod reader;
871mod ser;
872mod ser_schema;
873mod util;
874mod writer;
875
876pub mod error;
877pub mod headers;
878pub mod rabin;
879pub mod schema;
880pub mod schema_compatibility;
881pub mod schema_equality;
882pub mod types;
883pub mod validator;
884
885pub use crate::{
886    bigdecimal::BigDecimal,
887    bytes::{
888        serde_avro_bytes, serde_avro_bytes_opt, serde_avro_fixed, serde_avro_fixed_opt,
889        serde_avro_slice, serde_avro_slice_opt,
890    },
891};
892#[cfg(feature = "bzip")]
893pub use codec::bzip::Bzip2Settings;
894#[cfg(feature = "xz")]
895pub use codec::xz::XzSettings;
896#[cfg(feature = "zstandard")]
897pub use codec::zstandard::ZstandardSettings;
898pub use codec::{Codec, DeflateSettings};
899pub use de::from_value;
900pub use decimal::Decimal;
901pub use duration::{Days, Duration, Millis, Months};
902pub use error::Error;
903pub use reader::{
904    GenericSingleObjectReader, Reader, SpecificSingleObjectReader, from_avro_datum,
905    from_avro_datum_reader_schemata, from_avro_datum_schemata, read_marker,
906};
907pub use schema::{AvroSchema, Schema};
908pub use ser::to_value;
909pub use util::{max_allocation_bytes, set_serde_human_readable};
910pub use uuid::Uuid;
911pub use writer::{
912    GenericSingleObjectWriter, SpecificSingleObjectWriter, Writer, WriterBuilder, to_avro_datum,
913    to_avro_datum_schemata, write_avro_datum_ref,
914};
915
916#[cfg(feature = "derive")]
917pub use apache_avro_derive::*;
918
919/// A convenience type alias for `Result`s with `Error`s.
920pub type AvroResult<T> = Result<T, Error>;
921
922#[cfg(test)]
923mod tests {
924    use crate::{
925        Codec, Reader, Schema, Writer, from_avro_datum,
926        types::{Record, Value},
927    };
928    use pretty_assertions::assert_eq;
929
930    //TODO: move where it fits better
931    #[test]
932    fn test_enum_default() {
933        let writer_raw_schema = r#"
934            {
935                "type": "record",
936                "name": "test",
937                "fields": [
938                    {"name": "a", "type": "long", "default": 42},
939                    {"name": "b", "type": "string"}
940                ]
941            }
942        "#;
943        let reader_raw_schema = r#"
944            {
945                "type": "record",
946                "name": "test",
947                "fields": [
948                    {"name": "a", "type": "long", "default": 42},
949                    {"name": "b", "type": "string"},
950                    {
951                        "name": "c",
952                        "type": {
953                            "type": "enum",
954                            "name": "suit",
955                            "symbols": ["diamonds", "spades", "clubs", "hearts"]
956                        },
957                        "default": "spades"
958                    }
959                ]
960            }
961        "#;
962        let writer_schema = Schema::parse_str(writer_raw_schema).unwrap();
963        let reader_schema = Schema::parse_str(reader_raw_schema).unwrap();
964        let mut writer = Writer::with_codec(&writer_schema, Vec::new(), Codec::Null);
965        let mut record = Record::new(writer.schema()).unwrap();
966        record.put("a", 27i64);
967        record.put("b", "foo");
968        writer.append(record).unwrap();
969        let input = writer.into_inner().unwrap();
970        let mut reader = Reader::with_schema(&reader_schema, &input[..]).unwrap();
971        assert_eq!(
972            reader.next().unwrap().unwrap(),
973            Value::Record(vec![
974                ("a".to_string(), Value::Long(27)),
975                ("b".to_string(), Value::String("foo".to_string())),
976                ("c".to_string(), Value::Enum(1, "spades".to_string())),
977            ])
978        );
979        assert!(reader.next().is_none());
980    }
981
982    //TODO: move where it fits better
983    #[test]
984    fn test_enum_string_value() {
985        let raw_schema = r#"
986            {
987                "type": "record",
988                "name": "test",
989                "fields": [
990                    {"name": "a", "type": "long", "default": 42},
991                    {"name": "b", "type": "string"},
992                    {
993                        "name": "c",
994                        "type": {
995                            "type": "enum",
996                            "name": "suit",
997                            "symbols": ["diamonds", "spades", "clubs", "hearts"]
998                        },
999                        "default": "spades"
1000                    }
1001                ]
1002            }
1003        "#;
1004        let schema = Schema::parse_str(raw_schema).unwrap();
1005        let mut writer = Writer::with_codec(&schema, Vec::new(), Codec::Null);
1006        let mut record = Record::new(writer.schema()).unwrap();
1007        record.put("a", 27i64);
1008        record.put("b", "foo");
1009        record.put("c", "clubs");
1010        writer.append(record).unwrap();
1011        let input = writer.into_inner().unwrap();
1012        let mut reader = Reader::with_schema(&schema, &input[..]).unwrap();
1013        assert_eq!(
1014            reader.next().unwrap().unwrap(),
1015            Value::Record(vec![
1016                ("a".to_string(), Value::Long(27)),
1017                ("b".to_string(), Value::String("foo".to_string())),
1018                ("c".to_string(), Value::Enum(2, "clubs".to_string())),
1019            ])
1020        );
1021        assert!(reader.next().is_none());
1022    }
1023
1024    //TODO: move where it fits better
1025    #[test]
1026    fn test_enum_no_reader_schema() {
1027        let writer_raw_schema = r#"
1028            {
1029                "type": "record",
1030                "name": "test",
1031                "fields": [
1032                    {"name": "a", "type": "long", "default": 42},
1033                    {"name": "b", "type": "string"},
1034                    {
1035                        "name": "c",
1036                        "type": {
1037                            "type": "enum",
1038                            "name": "suit",
1039                            "symbols": ["diamonds", "spades", "clubs", "hearts"]
1040                        },
1041                        "default": "spades"
1042                    }
1043                ]
1044            }
1045        "#;
1046        let writer_schema = Schema::parse_str(writer_raw_schema).unwrap();
1047        let mut writer = Writer::with_codec(&writer_schema, Vec::new(), Codec::Null);
1048        let mut record = Record::new(writer.schema()).unwrap();
1049        record.put("a", 27i64);
1050        record.put("b", "foo");
1051        record.put("c", "clubs");
1052        writer.append(record).unwrap();
1053        let input = writer.into_inner().unwrap();
1054        let mut reader = Reader::new(&input[..]).unwrap();
1055        assert_eq!(
1056            reader.next().unwrap().unwrap(),
1057            Value::Record(vec![
1058                ("a".to_string(), Value::Long(27)),
1059                ("b".to_string(), Value::String("foo".to_string())),
1060                ("c".to_string(), Value::Enum(2, "clubs".to_string())),
1061            ])
1062        );
1063    }
1064
1065    #[test]
1066    fn test_illformed_length() {
1067        let raw_schema = r#"
1068            {
1069                "type": "record",
1070                "name": "test",
1071                "fields": [
1072                    {"name": "a", "type": "long", "default": 42},
1073                    {"name": "b", "type": "string"}
1074                ]
1075            }
1076        "#;
1077
1078        let schema = Schema::parse_str(raw_schema).unwrap();
1079
1080        // Would allocated 18446744073709551605 bytes
1081        let illformed: &[u8] = &[0x3e, 0x15, 0xff, 0x1f, 0x15, 0xff];
1082
1083        let value = from_avro_datum(&schema, &mut &*illformed, None);
1084        assert!(value.is_err());
1085    }
1086}
apache_avro/lib.rs

apache_avro/
lib.rs