apache_avro/
headers.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Handling of Avro magic headers
19use uuid::Uuid;
20
21use crate::{rabin::Rabin, schema::SchemaFingerprint, AvroResult, Schema};
22
23/// This trait represents that an object is able to construct an Avro message header. It is
24/// implemented for some known header types already. If you need a header type that is not already
25/// included here, then you can create your own struct and implement this trait.
26pub trait HeaderBuilder {
27    fn build_header(&self) -> Vec<u8>;
28}
29
30/// HeaderBuilder based on the Rabin schema fingerprint
31///
32/// This is the default and will be used automatically by the `new` impls in
33/// [crate::reader::GenericSingleObjectReader] and [crate::writer::GenericSingleObjectWriter].
34pub struct RabinFingerprintHeader {
35    fingerprint: SchemaFingerprint,
36}
37
38impl RabinFingerprintHeader {
39    /// Use this helper to build an instance from an existing Avro `Schema`.
40    pub fn from_schema(schema: &Schema) -> Self {
41        let fingerprint = schema.fingerprint::<Rabin>();
42        RabinFingerprintHeader { fingerprint }
43    }
44}
45
46impl HeaderBuilder for RabinFingerprintHeader {
47    fn build_header(&self) -> Vec<u8> {
48        let bytes = &self.fingerprint.bytes;
49        vec![
50            0xC3, 0x01, bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], bytes[6],
51            bytes[7],
52        ]
53    }
54}
55
56/// HeaderBuilder based on
57/// [Glue](https://docs.aws.amazon.com/glue/latest/dg/what-is-glue.html) schema UUID
58///
59/// See the function docs for usage details
60pub struct GlueSchemaUuidHeader {
61    schema_uuid: Uuid,
62}
63
64impl GlueSchemaUuidHeader {
65    /// Create an instance of the struct from a Glue Schema UUID
66    ///
67    /// Code for writing messages will most likely want to use this. You will need to determine
68    /// via other means the correct Glue schema UUID and use it with this method to be able to
69    /// create Avro-encoded messages with the correct headers.
70    pub fn from_uuid(schema_uuid: Uuid) -> Self {
71        GlueSchemaUuidHeader { schema_uuid }
72    }
73
74    /// The minimum length of a Glue header.
75    /// 2 bytes for the special prefix (3, 0) plus
76    /// 16 bytes for the Uuid
77    const GLUE_HEADER_LENGTH: usize = 18;
78
79    /// Create an instance of the struct based on parsing the UUID out of the header of a raw
80    /// message
81    ///
82    /// Code for reading messages will most likely want to use this. Once you receive the raw bytes
83    /// of a message, use this function to build the struct from it. That struct can then be used
84    /// with the below `schema_uuid` function to retrieve the UUID in order to retrieve the correct
85    /// schema for the message. You can then use the raw message, the schema, and the struct
86    /// instance to read the message.
87    pub fn parse_from_raw_avro(message_payload: &[u8]) -> AvroResult<Self> {
88        if message_payload.len() < Self::GLUE_HEADER_LENGTH {
89            return Err(crate::error::Error::HeaderMagic);
90        }
91        let schema_uuid =
92            Uuid::from_slice(&message_payload[2..18]).map_err(crate::Error::UuidFromSlice)?;
93        Ok(GlueSchemaUuidHeader { schema_uuid })
94    }
95
96    /// Retrieve the UUID from the object
97    ///
98    /// This is most useful in conjunction with the `parse_from_raw_avro` function to retrieve the
99    /// actual UUID from the raw data of a received message.
100    pub fn schema_uuid(&self) -> Uuid {
101        self.schema_uuid
102    }
103}
104
105impl HeaderBuilder for GlueSchemaUuidHeader {
106    fn build_header(&self) -> Vec<u8> {
107        let mut output_vec: Vec<u8> = vec![3, 0];
108        output_vec.extend_from_slice(self.schema_uuid.as_bytes());
109        output_vec
110    }
111}
112
113#[cfg(test)]
114mod test {
115    use super::*;
116    use apache_avro_test_helper::TestResult;
117
118    #[test]
119    fn test_rabin_fingerprint_header() -> TestResult {
120        let schema_str = r#"
121            {
122            "type": "record",
123            "name": "test",
124            "fields": [
125                {
126                "name": "a",
127                "type": "long",
128                "default": 42
129                },
130                {
131                "name": "b",
132                "type": "string"
133                }
134            ]
135            }
136            "#;
137        let schema = Schema::parse_str(schema_str)?;
138        let header_builder = RabinFingerprintHeader::from_schema(&schema);
139        let computed_header = header_builder.build_header();
140        let expected_header: Vec<u8> = vec![195, 1, 232, 198, 194, 12, 97, 95, 44, 71];
141        assert_eq!(computed_header, expected_header);
142        Ok(())
143    }
144
145    #[test]
146    fn test_glue_schema_header() -> TestResult {
147        let schema_uuid = Uuid::parse_str("b2f1cf00-0434-013e-439a-125eb8485a5f")?;
148        let header_builder = GlueSchemaUuidHeader::from_uuid(schema_uuid);
149        let computed_header = header_builder.build_header();
150        let expected_header: Vec<u8> = vec![
151            3, 0, 178, 241, 207, 0, 4, 52, 1, 62, 67, 154, 18, 94, 184, 72, 90, 95,
152        ];
153        assert_eq!(computed_header, expected_header);
154        Ok(())
155    }
156
157    #[test]
158    fn test_glue_header_parse() -> TestResult {
159        let incoming_avro_message: Vec<u8> = vec![
160            3, 0, 178, 241, 207, 0, 4, 52, 1, 62, 67, 154, 18, 94, 184, 72, 90, 95, 65, 65, 65,
161        ];
162        let header_builder = GlueSchemaUuidHeader::parse_from_raw_avro(&incoming_avro_message)?;
163        let expected_schema_uuid = Uuid::parse_str("b2f1cf00-0434-013e-439a-125eb8485a5f")?;
164        assert_eq!(header_builder.schema_uuid(), expected_schema_uuid);
165        Ok(())
166    }
167
168    #[test]
169    fn test_glue_header_parse_err_on_message_too_short() -> TestResult {
170        let incoming_message: Vec<u8> = vec![3, 0, 178, 241, 207, 0, 4, 52, 1];
171        let header_builder_res = GlueSchemaUuidHeader::parse_from_raw_avro(&incoming_message);
172        assert!(matches!(
173            header_builder_res,
174            Err(crate::error::Error::HeaderMagic)
175        ));
176        Ok(())
177    }
178}