apache_avro/documentation/dynamic.rs
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! # Using Avro in Rust, the dynamic way.
19//!
20//! ## Creating a schema
21//!
22//! An Avro data cannot exist without an Avro schema. Schemas **must** be used while writing and
23//! **can** be used while reading and they carry the information regarding the type of data we are
24//! handling. Avro schemas are used for both schema validation and resolution of Avro data.
25//!
26//! Avro schemas are defined in **JSON** format and can just be parsed out of a raw string:
27//!
28//! ```
29//! use apache_avro::Schema;
30//!
31//! let raw_schema = r#"
32//! {
33//! "type": "record",
34//! "name": "test",
35//! "fields": [
36//! {"name": "a", "type": "long", "default": 42},
37//! {"name": "b", "type": "string"}
38//! ]
39//! }
40//! "#;
41//!
42//! // if the schema is not valid, this function will return an error
43//! let schema = Schema::parse_str(raw_schema).unwrap();
44//!
45//! // schemas can be printed for debugging
46//! println!("{:?}", schema);
47//! ```
48//!
49//! Additionally, a list of definitions (which may depend on each other) can be given and all of
50//! them will be parsed into the corresponding schemas.
51//!
52//! ```
53//! use apache_avro::Schema;
54//!
55//! let raw_schema_1 = r#"{
56//! "name": "A",
57//! "type": "record",
58//! "fields": [
59//! {"name": "field_one", "type": "float"}
60//! ]
61//! }"#;
62//!
63//! // This definition depends on the definition of A above
64//! let raw_schema_2 = r#"{
65//! "name": "B",
66//! "type": "record",
67//! "fields": [
68//! {"name": "field_one", "type": "A"}
69//! ]
70//! }"#;
71//!
72//! // if the schemas are not valid, this function will return an error
73//! let schemas = Schema::parse_list(&[raw_schema_1, raw_schema_2]).unwrap();
74//!
75//! // schemas can be printed for debugging
76//! println!("{:?}", schemas);
77//! ```
78//!
79//! ## Writing data
80//!
81//! Once we have defined a schema, we are ready to serialize data in Avro, validating them against
82//! the provided schema in the process. As mentioned before, there are two ways of handling Avro
83//! data in Rust.
84//!
85//! Given that the schema we defined above is that of an Avro *Record*, we are going to use the
86//! associated type provided by the library to specify the data we want to serialize:
87//!
88//! ```
89//! # use apache_avro::Schema;
90//! use apache_avro::types::Record;
91//! use apache_avro::Writer;
92//! #
93//! # let raw_schema = r#"
94//! # {
95//! # "type": "record",
96//! # "name": "test",
97//! # "fields": [
98//! # {"name": "a", "type": "long", "default": 42},
99//! # {"name": "b", "type": "string"}
100//! # ]
101//! # }
102//! # "#;
103//! # let schema = Schema::parse_str(raw_schema).unwrap();
104//! // a writer needs a schema and something to write to
105//! let mut writer = Writer::new(&schema, Vec::new()).unwrap();
106//!
107//! // the Record type models our Record schema
108//! let mut record = Record::new(writer.schema()).unwrap();
109//! record.put("a", 27i64);
110//! record.put("b", "foo");
111//!
112//! // schema validation happens here
113//! writer.append_value(record).unwrap();
114//!
115//! // this is how to get back the resulting Avro bytecode
116//! // this performs a flush operation to make sure data has been written, so it can fail
117//! // you can also call `writer.flush()` yourself without consuming the writer
118//! let encoded = writer.into_inner().unwrap();
119//! ```
120//!
121//! The vast majority of the times, schemas tend to define a record as a top-level container
122//! encapsulating all the values to convert as fields and providing documentation for them, but in
123//! case we want to directly define an Avro value, the library offers that capability via the
124//! `Value` interface.
125//!
126//! ```
127//! use apache_avro::types::Value;
128//!
129//! let mut value = Value::String("foo".to_string());
130//! ```
131//!
132//! ## Reading data
133//!
134//! As far as reading Avro encoded data goes, we can just use the schema encoded with the data to
135//! read them. The library will do it automatically for us, as it already does for the compression
136//! codec:
137//!
138//! ```
139//! use apache_avro::Reader;
140//! # use apache_avro::Schema;
141//! # use apache_avro::types::Record;
142//! # use apache_avro::Writer;
143//! #
144//! # let raw_schema = r#"
145//! # {
146//! # "type": "record",
147//! # "name": "test",
148//! # "fields": [
149//! # {"name": "a", "type": "long", "default": 42},
150//! # {"name": "b", "type": "string"}
151//! # ]
152//! # }
153//! # "#;
154//! # let schema = Schema::parse_str(raw_schema).unwrap();
155//! # let mut writer = Writer::new(&schema, Vec::new()).unwrap();
156//! # let mut record = Record::new(writer.schema()).unwrap();
157//! # record.put("a", 27i64);
158//! # record.put("b", "foo");
159//! # writer.append_value(record).unwrap();
160//! # let input = writer.into_inner().unwrap();
161//! // reader creation can fail in case the input to read from is not Avro-compatible or malformed
162//! let reader = Reader::new(&input[..]).unwrap();
163//!
164//! // value is a Result of an Avro Value in case the read operation fails
165//! for value in reader {
166//! println!("{:?}", value.unwrap());
167//! }
168//! ```
169//!
170//! In case, instead, we want to specify a different (but compatible) reader schema from the schema
171//! the data has been written with, we can just do as the following:
172//! ```
173//! use apache_avro::Schema;
174//! use apache_avro::Reader;
175//! # use apache_avro::types::Record;
176//! # use apache_avro::Writer;
177//! #
178//! # let writer_raw_schema = r#"
179//! # {
180//! # "type": "record",
181//! # "name": "test",
182//! # "fields": [
183//! # {"name": "a", "type": "long", "default": 42},
184//! # {"name": "b", "type": "string"}
185//! # ]
186//! # }
187//! # "#;
188//! # let writer_schema = Schema::parse_str(writer_raw_schema).unwrap();
189//! # let mut writer = Writer::new(&writer_schema, Vec::new()).unwrap();
190//! # let mut record = Record::new(writer.schema()).unwrap();
191//! # record.put("a", 27i64);
192//! # record.put("b", "foo");
193//! # writer.append_value(record).unwrap();
194//! # let input = writer.into_inner().unwrap();
195//!
196//! let reader_raw_schema = r#"
197//! {
198//! "type": "record",
199//! "name": "test",
200//! "fields": [
201//! {"name": "a", "type": "long", "default": 42},
202//! {"name": "b", "type": "string"},
203//! {"name": "c", "type": "long", "default": 43}
204//! ]
205//! }
206//! "#;
207//!
208//! let reader_schema = Schema::parse_str(reader_raw_schema).unwrap();
209//!
210//! // reader creation can fail in case the input to read from is not Avro-compatible or malformed
211//! let reader = Reader::builder(&input[..]).reader_schema(&reader_schema).build().unwrap();
212//!
213//! // value is a Result of an Avro Value in case the read operation fails
214//! for value in reader {
215//! println!("{:?}", value.unwrap());
216//! }
217//! ```
218//!
219//! The library will also automatically perform schema resolution while reading the data.
220//!
221//! For more information about schema compatibility and resolution, please refer to the
222//! [Avro Specification](https://avro.apache.org/docs/++version++/specification/#schema-declaration).
223//!
224//! ## Putting everything together
225//!
226//! The following is an example of how to combine everything showed so far and it is meant to be a
227//! quick reference of the [`Value`](crate::types::Value) interface:
228//!
229//! ```
230//! use apache_avro::{Codec, DeflateSettings, Reader, Schema, Writer, from_value, types::Record, Error};
231//! use serde::{Deserialize, Serialize};
232//!
233//! #[derive(Debug, Deserialize, Serialize)]
234//! struct Test {
235//! a: i64,
236//! b: String,
237//! }
238//!
239//! fn main() -> Result<(), Error> {
240//! let raw_schema = r#"
241//! {
242//! "type": "record",
243//! "name": "test",
244//! "fields": [
245//! {"name": "a", "type": "long", "default": 42},
246//! {"name": "b", "type": "string"}
247//! ]
248//! }
249//! "#;
250//!
251//! let schema = Schema::parse_str(raw_schema)?;
252//!
253//! println!("{:?}", schema);
254//!
255//! let mut writer = Writer::with_codec(&schema, Vec::new(), Codec::Deflate(DeflateSettings::default())).unwrap();
256//!
257//! let mut record = Record::new(writer.schema()).unwrap();
258//! record.put("a", 27i64);
259//! record.put("b", "foo");
260//!
261//! writer.append_value(record)?;
262//!
263//! let test = Test {
264//! a: 27,
265//! b: "foo".to_owned(),
266//! };
267//!
268//! writer.append_ser(test)?;
269//!
270//! let input = writer.into_inner()?;
271//! let reader = Reader::builder(&input[..]).reader_schema(&schema).build()?;
272//!
273//! for record in reader {
274//! println!("{:?}", from_value::<Test>(&record?));
275//! }
276//! Ok(())
277//! }
278//! ```
279//!