apache_avro/
headers.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Handling of Avro magic headers
19use uuid::Uuid;
20
21use crate::{AvroResult, Schema, rabin::Rabin, schema::SchemaFingerprint};
22
23/// This trait represents that an object is able to construct an Avro message header.
24///
25/// It is implemented for some known header types already. If you need a header type that is not
26/// already included here, then you can create your own struct and implement this trait.
27pub trait HeaderBuilder {
28    fn build_header(&self) -> Vec<u8>;
29}
30
31/// [`HeaderBuilder`] based on the Rabin schema fingerprint.
32///
33/// This is the default and will be used automatically by the `new` impls in
34/// [`GenericSingleObjectReader`](crate::GenericSingleObjectReader) and [`GenericSingleObjectWriter`](crate::GenericSingleObjectWriter).
35pub struct RabinFingerprintHeader {
36    fingerprint: SchemaFingerprint,
37}
38
39impl RabinFingerprintHeader {
40    /// Use this helper to build an instance from an existing Avro `Schema`.
41    pub fn from_schema(schema: &Schema) -> Self {
42        let fingerprint = schema.fingerprint::<Rabin>();
43        RabinFingerprintHeader { fingerprint }
44    }
45}
46
47impl HeaderBuilder for RabinFingerprintHeader {
48    fn build_header(&self) -> Vec<u8> {
49        let bytes = &self.fingerprint.bytes;
50        vec![
51            0xC3, 0x01, bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], bytes[6],
52            bytes[7],
53        ]
54    }
55}
56
57/// [`HeaderBuilder`] for [Glue](https://docs.aws.amazon.com/glue/latest/dg/what-is-glue.html).
58///
59/// See the function docs for usage details.
60pub struct GlueSchemaUuidHeader {
61    schema_uuid: Uuid,
62}
63
64impl GlueSchemaUuidHeader {
65    /// Create an instance of the struct from a Glue Schema UUID.
66    ///
67    /// Code for writing messages will most likely want to use this. You will need to determine
68    /// via other means the correct Glue schema UUID and use it with this method to be able to
69    /// create Avro-encoded messages with the correct headers.
70    pub fn from_uuid(schema_uuid: Uuid) -> Self {
71        GlueSchemaUuidHeader { schema_uuid }
72    }
73
74    /// The minimum length of a Glue header.
75    ///
76    /// 2 bytes for the special prefix (3, 0) plus 16 bytes for the Uuid.
77    const GLUE_HEADER_LENGTH: usize = 18;
78
79    /// Create an instance of the struct based on parsing the UUID out of the header of a raw message
80    ///
81    /// Code for reading messages will most likely want to use this. Once you receive the raw bytes
82    /// of a message, use this function to build the struct from it. That struct can then be used
83    /// with the below `schema_uuid` function to retrieve the UUID in order to retrieve the correct
84    /// schema for the message. You can then use the raw message, the schema, and the struct
85    /// instance to read the message.
86    pub fn parse_from_raw_avro(message_payload: &[u8]) -> AvroResult<Self> {
87        if message_payload.len() < Self::GLUE_HEADER_LENGTH {
88            return Err(crate::error::Details::HeaderMagic.into());
89        }
90        let schema_uuid = Uuid::from_slice(&message_payload[2..18])
91            .map_err(crate::error::Details::UuidFromSlice)?;
92        Ok(GlueSchemaUuidHeader { schema_uuid })
93    }
94
95    /// Retrieve the UUID from the object
96    ///
97    /// This is most useful in conjunction with the `parse_from_raw_avro` function to retrieve the
98    /// actual UUID from the raw data of a received message.
99    pub fn schema_uuid(&self) -> Uuid {
100        self.schema_uuid
101    }
102}
103
104impl HeaderBuilder for GlueSchemaUuidHeader {
105    fn build_header(&self) -> Vec<u8> {
106        let mut output_vec: Vec<u8> = vec![3, 0];
107        output_vec.extend_from_slice(self.schema_uuid.as_bytes());
108        output_vec
109    }
110}
111
112#[cfg(test)]
113mod test {
114    use super::*;
115    use crate::{Error, error::Details};
116    use apache_avro_test_helper::TestResult;
117
118    #[test]
119    fn test_rabin_fingerprint_header() -> TestResult {
120        let schema_str = r#"
121            {
122            "type": "record",
123            "name": "test",
124            "fields": [
125                {
126                "name": "a",
127                "type": "long",
128                "default": 42
129                },
130                {
131                "name": "b",
132                "type": "string"
133                }
134            ]
135            }
136            "#;
137        let schema = Schema::parse_str(schema_str)?;
138        let header_builder = RabinFingerprintHeader::from_schema(&schema);
139        let computed_header = header_builder.build_header();
140        let expected_header: Vec<u8> = vec![195, 1, 232, 198, 194, 12, 97, 95, 44, 71];
141        assert_eq!(computed_header, expected_header);
142        Ok(())
143    }
144
145    #[test]
146    fn test_glue_schema_header() -> TestResult {
147        let schema_uuid = Uuid::parse_str("b2f1cf00-0434-013e-439a-125eb8485a5f")?;
148        let header_builder = GlueSchemaUuidHeader::from_uuid(schema_uuid);
149        let computed_header = header_builder.build_header();
150        let expected_header: Vec<u8> = vec![
151            3, 0, 178, 241, 207, 0, 4, 52, 1, 62, 67, 154, 18, 94, 184, 72, 90, 95,
152        ];
153        assert_eq!(computed_header, expected_header);
154        Ok(())
155    }
156
157    #[test]
158    fn test_glue_header_parse() -> TestResult {
159        let incoming_avro_message: Vec<u8> = vec![
160            3, 0, 178, 241, 207, 0, 4, 52, 1, 62, 67, 154, 18, 94, 184, 72, 90, 95, 65, 65, 65,
161        ];
162        let header_builder = GlueSchemaUuidHeader::parse_from_raw_avro(&incoming_avro_message)?;
163        let expected_schema_uuid = Uuid::parse_str("b2f1cf00-0434-013e-439a-125eb8485a5f")?;
164        assert_eq!(header_builder.schema_uuid(), expected_schema_uuid);
165        Ok(())
166    }
167
168    #[test]
169    fn test_glue_header_parse_err_on_message_too_short() -> TestResult {
170        let incoming_message: Vec<u8> = vec![3, 0, 178, 241, 207, 0, 4, 52, 1];
171        let header_builder_res = GlueSchemaUuidHeader::parse_from_raw_avro(&incoming_message)
172            .map_err(Error::into_details);
173        assert!(matches!(header_builder_res, Err(Details::HeaderMagic)));
174        Ok(())
175    }
176}