apache_avro/
util.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Utility functions, like configuring various global settings.
19
20use crate::{AvroResult, error::Details, schema::Documentation};
21use serde_json::{Map, Value};
22use std::{
23    io::{Read, Write},
24    sync::OnceLock,
25};
26
27/// Maximum number of bytes that can be allocated when decoding
28/// Avro-encoded values. This is a protection against ill-formed
29/// data, whose length field might be interpreted as enormous.
30/// See max_allocation_bytes to change this limit.
31pub const DEFAULT_MAX_ALLOCATION_BYTES: usize = 512 * 1024 * 1024;
32static MAX_ALLOCATION_BYTES: OnceLock<usize> = OnceLock::new();
33
34/// Whether to set serialization & deserialization traits
35/// as `human_readable` or not.
36/// See [set_serde_human_readable] to change this value.
37// crate-visible for testing
38pub(crate) static SERDE_HUMAN_READABLE: OnceLock<bool> = OnceLock::new();
39/// Whether the serializer and deserializer should indicate to types that the format is human-readable.
40pub const DEFAULT_SERDE_HUMAN_READABLE: bool = false;
41
42pub(crate) trait MapHelper {
43    fn string(&self, key: &str) -> Option<String>;
44
45    fn name(&self) -> Option<String> {
46        self.string("name")
47    }
48
49    fn doc(&self) -> Documentation {
50        self.string("doc")
51    }
52
53    fn aliases(&self) -> Option<Vec<String>>;
54}
55
56impl MapHelper for Map<String, Value> {
57    fn string(&self, key: &str) -> Option<String> {
58        self.get(key)
59            .and_then(|v| v.as_str())
60            .map(|v| v.to_string())
61    }
62
63    fn aliases(&self) -> Option<Vec<String>> {
64        // FIXME no warning when aliases aren't a json array of json strings
65        self.get("aliases")
66            .and_then(|aliases| aliases.as_array())
67            .and_then(|aliases| {
68                aliases
69                    .iter()
70                    .map(|alias| alias.as_str())
71                    .map(|alias| alias.map(|a| a.to_string()))
72                    .collect::<Option<_>>()
73            })
74    }
75}
76
77pub(crate) fn read_long<R: Read>(reader: &mut R) -> AvroResult<i64> {
78    zag_i64(reader)
79}
80
81pub(crate) fn zig_i32<W: Write>(n: i32, buffer: W) -> AvroResult<usize> {
82    zig_i64(n as i64, buffer)
83}
84
85pub(crate) fn zig_i64<W: Write>(n: i64, writer: W) -> AvroResult<usize> {
86    encode_variable(((n << 1) ^ (n >> 63)) as u64, writer)
87}
88
89pub(crate) fn zag_i32<R: Read>(reader: &mut R) -> AvroResult<i32> {
90    let i = zag_i64(reader)?;
91    i32::try_from(i).map_err(|e| Details::ZagI32(e, i).into())
92}
93
94pub(crate) fn zag_i64<R: Read>(reader: &mut R) -> AvroResult<i64> {
95    let z = decode_variable(reader)?;
96    Ok(if z & 0x1 == 0 {
97        (z >> 1) as i64
98    } else {
99        !(z >> 1) as i64
100    })
101}
102
103fn encode_variable<W: Write>(mut z: u64, mut writer: W) -> AvroResult<usize> {
104    let mut buffer = [0u8; 10];
105    let mut i: usize = 0;
106    loop {
107        if z <= 0x7F {
108            buffer[i] = (z & 0x7F) as u8;
109            i += 1;
110            break;
111        } else {
112            buffer[i] = (0x80 | (z & 0x7F)) as u8;
113            i += 1;
114            z >>= 7;
115        }
116    }
117    writer
118        .write(&buffer[..i])
119        .map_err(|e| Details::WriteBytes(e).into())
120}
121
122fn decode_variable<R: Read>(reader: &mut R) -> AvroResult<u64> {
123    let mut i = 0u64;
124    let mut buf = [0u8; 1];
125
126    let mut j = 0;
127    loop {
128        if j > 9 {
129            // if j * 7 > 64
130            return Err(Details::IntegerOverflow.into());
131        }
132        reader
133            .read_exact(&mut buf[..])
134            .map_err(Details::ReadVariableIntegerBytes)?;
135        i |= (u64::from(buf[0] & 0x7F)) << (j * 7);
136        if (buf[0] >> 7) == 0 {
137            break;
138        } else {
139            j += 1;
140        }
141    }
142
143    Ok(i)
144}
145
146/// Set the maximum number of bytes that can be allocated when decoding data.
147///
148/// This function only changes the setting once. On subsequent calls the value will stay the same
149/// as the first time it is called. It is automatically called on first allocation and defaults to
150/// [`DEFAULT_MAX_ALLOCATION_BYTES`].
151///
152/// # Returns
153/// The configured maximum, which might be different from what the function was called with if the
154/// value was already set before.
155pub fn max_allocation_bytes(num_bytes: usize) -> usize {
156    *MAX_ALLOCATION_BYTES.get_or_init(|| num_bytes)
157}
158
159pub(crate) fn safe_len(len: usize) -> AvroResult<usize> {
160    let max_bytes = max_allocation_bytes(DEFAULT_MAX_ALLOCATION_BYTES);
161
162    if len <= max_bytes {
163        Ok(len)
164    } else {
165        Err(Details::MemoryAllocation {
166            desired: len,
167            maximum: max_bytes,
168        }
169        .into())
170    }
171}
172
173/// Set whether the serializer and deserializer should indicate to types that the format is human-readable.
174///
175/// This function only changes the setting once. On subsequent calls the value will stay the same
176/// as the first time it is called. It is automatically called on first allocation and defaults to
177/// [`DEFAULT_SERDE_HUMAN_READABLE`].
178///
179/// *NOTE*: Changing this setting can change the output of [`from_value`](crate::from_value) and the
180/// accepted input of [`to_value`](crate::to_value).
181///
182/// # Returns
183/// The configured human-readable value, which might be different from what the function was called
184/// with if the value was already set before.
185pub fn set_serde_human_readable(human_readable: bool) -> bool {
186    *SERDE_HUMAN_READABLE.get_or_init(|| human_readable)
187}
188
189pub(crate) fn is_human_readable() -> bool {
190    *SERDE_HUMAN_READABLE.get_or_init(|| DEFAULT_SERDE_HUMAN_READABLE)
191}
192
193#[cfg(test)]
194mod tests {
195    use super::*;
196    use apache_avro_test_helper::TestResult;
197    use pretty_assertions::assert_eq;
198
199    #[test]
200    fn test_zigzag() {
201        let mut a = Vec::new();
202        let mut b = Vec::new();
203        zig_i32(42i32, &mut a).unwrap();
204        zig_i64(42i64, &mut b).unwrap();
205        assert_eq!(a, b);
206    }
207
208    #[test]
209    fn test_zig_i64() {
210        let mut s = Vec::new();
211
212        zig_i64(0, &mut s).unwrap();
213        assert_eq!(s, [0]);
214
215        s.clear();
216        zig_i64(-1, &mut s).unwrap();
217        assert_eq!(s, [1]);
218
219        s.clear();
220        zig_i64(1, &mut s).unwrap();
221        assert_eq!(s, [2]);
222
223        s.clear();
224        zig_i64(-64, &mut s).unwrap();
225        assert_eq!(s, [127]);
226
227        s.clear();
228        zig_i64(64, &mut s).unwrap();
229        assert_eq!(s, [128, 1]);
230
231        s.clear();
232        zig_i64(i32::MAX as i64, &mut s).unwrap();
233        assert_eq!(s, [254, 255, 255, 255, 15]);
234
235        s.clear();
236        zig_i64(i32::MAX as i64 + 1, &mut s).unwrap();
237        assert_eq!(s, [128, 128, 128, 128, 16]);
238
239        s.clear();
240        zig_i64(i32::MIN as i64, &mut s).unwrap();
241        assert_eq!(s, [255, 255, 255, 255, 15]);
242
243        s.clear();
244        zig_i64(i32::MIN as i64 - 1, &mut s).unwrap();
245        assert_eq!(s, [129, 128, 128, 128, 16]);
246
247        s.clear();
248        zig_i64(i64::MAX, &mut s).unwrap();
249        assert_eq!(s, [254, 255, 255, 255, 255, 255, 255, 255, 255, 1]);
250
251        s.clear();
252        zig_i64(i64::MIN, &mut s).unwrap();
253        assert_eq!(s, [255, 255, 255, 255, 255, 255, 255, 255, 255, 1]);
254    }
255
256    #[test]
257    fn test_zig_i32() {
258        let mut s = Vec::new();
259        zig_i32(i32::MAX / 2, &mut s).unwrap();
260        assert_eq!(s, [254, 255, 255, 255, 7]);
261
262        s.clear();
263        zig_i32(i32::MIN / 2, &mut s).unwrap();
264        assert_eq!(s, [255, 255, 255, 255, 7]);
265
266        s.clear();
267        zig_i32(-(i32::MIN / 2), &mut s).unwrap();
268        assert_eq!(s, [128, 128, 128, 128, 8]);
269
270        s.clear();
271        zig_i32(i32::MIN / 2 - 1, &mut s).unwrap();
272        assert_eq!(s, [129, 128, 128, 128, 8]);
273
274        s.clear();
275        zig_i32(i32::MAX, &mut s).unwrap();
276        assert_eq!(s, [254, 255, 255, 255, 15]);
277
278        s.clear();
279        zig_i32(i32::MIN, &mut s).unwrap();
280        assert_eq!(s, [255, 255, 255, 255, 15]);
281    }
282
283    #[test]
284    fn test_overflow() {
285        let causes_left_shift_overflow: &[u8] = &[0xe1, 0xe1, 0xe1, 0xe1, 0xe1];
286        assert!(decode_variable(&mut &*causes_left_shift_overflow).is_err());
287    }
288
289    #[test]
290    fn test_safe_len() -> TestResult {
291        assert_eq!(42usize, safe_len(42usize)?);
292        assert!(safe_len(1024 * 1024 * 1024).is_err());
293
294        Ok(())
295    }
296}