apache_avro/
headers.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Handling of Avro magic headers
19use uuid::Uuid;
20
21use crate::{AvroResult, Schema, rabin::Rabin, schema::SchemaFingerprint};
22
23/// This trait represents that an object is able to construct an Avro message header. It is
24/// implemented for some known header types already. If you need a header type that is not already
25/// included here, then you can create your own struct and implement this trait.
26pub trait HeaderBuilder {
27    fn build_header(&self) -> Vec<u8>;
28}
29
30/// HeaderBuilder based on the Rabin schema fingerprint
31///
32/// This is the default and will be used automatically by the `new` impls in
33/// [crate::reader::GenericSingleObjectReader] and [crate::writer::GenericSingleObjectWriter].
34pub struct RabinFingerprintHeader {
35    fingerprint: SchemaFingerprint,
36}
37
38impl RabinFingerprintHeader {
39    /// Use this helper to build an instance from an existing Avro `Schema`.
40    pub fn from_schema(schema: &Schema) -> Self {
41        let fingerprint = schema.fingerprint::<Rabin>();
42        RabinFingerprintHeader { fingerprint }
43    }
44}
45
46impl HeaderBuilder for RabinFingerprintHeader {
47    fn build_header(&self) -> Vec<u8> {
48        let bytes = &self.fingerprint.bytes;
49        vec![
50            0xC3, 0x01, bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], bytes[6],
51            bytes[7],
52        ]
53    }
54}
55
56/// HeaderBuilder based on
57/// [Glue](https://docs.aws.amazon.com/glue/latest/dg/what-is-glue.html) schema UUID
58///
59/// See the function docs for usage details
60pub struct GlueSchemaUuidHeader {
61    schema_uuid: Uuid,
62}
63
64impl GlueSchemaUuidHeader {
65    /// Create an instance of the struct from a Glue Schema UUID
66    ///
67    /// Code for writing messages will most likely want to use this. You will need to determine
68    /// via other means the correct Glue schema UUID and use it with this method to be able to
69    /// create Avro-encoded messages with the correct headers.
70    pub fn from_uuid(schema_uuid: Uuid) -> Self {
71        GlueSchemaUuidHeader { schema_uuid }
72    }
73
74    /// The minimum length of a Glue header.
75    /// 2 bytes for the special prefix (3, 0) plus
76    /// 16 bytes for the Uuid
77    const GLUE_HEADER_LENGTH: usize = 18;
78
79    /// Create an instance of the struct based on parsing the UUID out of the header of a raw
80    /// message
81    ///
82    /// Code for reading messages will most likely want to use this. Once you receive the raw bytes
83    /// of a message, use this function to build the struct from it. That struct can then be used
84    /// with the below `schema_uuid` function to retrieve the UUID in order to retrieve the correct
85    /// schema for the message. You can then use the raw message, the schema, and the struct
86    /// instance to read the message.
87    pub fn parse_from_raw_avro(message_payload: &[u8]) -> AvroResult<Self> {
88        if message_payload.len() < Self::GLUE_HEADER_LENGTH {
89            return Err(crate::error::Details::HeaderMagic.into());
90        }
91        let schema_uuid = Uuid::from_slice(&message_payload[2..18])
92            .map_err(crate::error::Details::UuidFromSlice)?;
93        Ok(GlueSchemaUuidHeader { schema_uuid })
94    }
95
96    /// Retrieve the UUID from the object
97    ///
98    /// This is most useful in conjunction with the `parse_from_raw_avro` function to retrieve the
99    /// actual UUID from the raw data of a received message.
100    pub fn schema_uuid(&self) -> Uuid {
101        self.schema_uuid
102    }
103}
104
105impl HeaderBuilder for GlueSchemaUuidHeader {
106    fn build_header(&self) -> Vec<u8> {
107        let mut output_vec: Vec<u8> = vec![3, 0];
108        output_vec.extend_from_slice(self.schema_uuid.as_bytes());
109        output_vec
110    }
111}
112
113#[cfg(test)]
114mod test {
115    use super::*;
116    use crate::{Error, error::Details};
117    use apache_avro_test_helper::TestResult;
118
119    #[test]
120    fn test_rabin_fingerprint_header() -> TestResult {
121        let schema_str = r#"
122            {
123            "type": "record",
124            "name": "test",
125            "fields": [
126                {
127                "name": "a",
128                "type": "long",
129                "default": 42
130                },
131                {
132                "name": "b",
133                "type": "string"
134                }
135            ]
136            }
137            "#;
138        let schema = Schema::parse_str(schema_str)?;
139        let header_builder = RabinFingerprintHeader::from_schema(&schema);
140        let computed_header = header_builder.build_header();
141        let expected_header: Vec<u8> = vec![195, 1, 232, 198, 194, 12, 97, 95, 44, 71];
142        assert_eq!(computed_header, expected_header);
143        Ok(())
144    }
145
146    #[test]
147    fn test_glue_schema_header() -> TestResult {
148        let schema_uuid = Uuid::parse_str("b2f1cf00-0434-013e-439a-125eb8485a5f")?;
149        let header_builder = GlueSchemaUuidHeader::from_uuid(schema_uuid);
150        let computed_header = header_builder.build_header();
151        let expected_header: Vec<u8> = vec![
152            3, 0, 178, 241, 207, 0, 4, 52, 1, 62, 67, 154, 18, 94, 184, 72, 90, 95,
153        ];
154        assert_eq!(computed_header, expected_header);
155        Ok(())
156    }
157
158    #[test]
159    fn test_glue_header_parse() -> TestResult {
160        let incoming_avro_message: Vec<u8> = vec![
161            3, 0, 178, 241, 207, 0, 4, 52, 1, 62, 67, 154, 18, 94, 184, 72, 90, 95, 65, 65, 65,
162        ];
163        let header_builder = GlueSchemaUuidHeader::parse_from_raw_avro(&incoming_avro_message)?;
164        let expected_schema_uuid = Uuid::parse_str("b2f1cf00-0434-013e-439a-125eb8485a5f")?;
165        assert_eq!(header_builder.schema_uuid(), expected_schema_uuid);
166        Ok(())
167    }
168
169    #[test]
170    fn test_glue_header_parse_err_on_message_too_short() -> TestResult {
171        let incoming_message: Vec<u8> = vec![3, 0, 178, 241, 207, 0, 4, 52, 1];
172        let header_builder_res = GlueSchemaUuidHeader::parse_from_raw_avro(&incoming_message)
173            .map_err(Error::into_details);
174        assert!(matches!(header_builder_res, Err(Details::HeaderMagic)));
175        Ok(())
176    }
177}