apache_avro/rabin.rs
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Implementation of the Rabin fingerprint algorithm
19#[expect(
20 deprecated,
21 reason = "https://github.com/RustCrypto/traits/issues/2036"
22)]
23use digest::{
24 FixedOutput, FixedOutputReset, HashMarker, Output, Reset, Update, consts::U8,
25 core_api::OutputSizeUser, generic_array::GenericArray,
26};
27use std::sync::OnceLock;
28
29const EMPTY: i64 = -4513414715797952619;
30
31fn fp_table() -> &'static [i64; 256] {
32 static FPTABLE_ONCE: OnceLock<[i64; 256]> = OnceLock::new();
33 FPTABLE_ONCE.get_or_init(|| {
34 let mut fp_table: [i64; 256] = [0; 256];
35 for i in 0..256 {
36 let mut fp = i;
37 for _ in 0..8 {
38 fp = (fp as u64 >> 1) as i64 ^ (EMPTY & -(fp & 1));
39 }
40 fp_table[i as usize] = fp;
41 }
42 fp_table
43 })
44}
45
46/// Implementation of the Rabin fingerprint algorithm using the Digest trait as described in [schema_fingerprints](https://avro.apache.org/docs/current/specification/#schema-fingerprints).
47///
48/// The digest is returned as the 8-byte little-endian encoding of the Rabin hash.
49/// This is what is used for avro [single object encoding](https://avro.apache.org/docs/current/specification/#single-object-encoding)
50///
51/// ```rust
52/// use apache_avro::rabin::Rabin;
53/// use digest::Digest;
54/// use hex_literal::hex;
55///
56/// // create the Rabin hasher
57/// let mut hasher = Rabin::new();
58///
59/// // add the data
60/// hasher.update(b"hello world");
61///
62/// // read hash digest and consume hasher
63/// let result = hasher.finalize();
64///
65/// assert_eq!(result[..], hex!("60335ba6d0415528"));
66/// ```
67///
68/// To convert the digest to the commonly used 64-bit integer value, you can use the i64::from_le_bytes() function
69///
70/// ```rust
71/// # use apache_avro::rabin::Rabin;
72/// # use digest::Digest;
73/// # use hex_literal::hex;
74///
75/// # let mut hasher = Rabin::new();
76///
77/// # hasher.update(b"hello world");
78///
79/// # let result = hasher.finalize();
80///
81/// # assert_eq!(result[..], hex!("60335ba6d0415528"));
82///
83/// let i = i64::from_le_bytes(result.try_into().unwrap());
84///
85/// assert_eq!(i, 2906301498937520992)
86/// ```
87#[derive(Clone)]
88pub struct Rabin {
89 result: i64,
90}
91
92impl Default for Rabin {
93 fn default() -> Self {
94 Rabin { result: EMPTY }
95 }
96}
97
98impl Update for Rabin {
99 fn update(&mut self, data: &[u8]) {
100 for b in data {
101 self.result = (self.result as u64 >> 8) as i64
102 ^ fp_table()[((self.result ^ *b as i64) & 0xff) as usize];
103 }
104 }
105}
106
107impl FixedOutput for Rabin {
108 #[expect(
109 deprecated,
110 reason = "https://github.com/RustCrypto/traits/issues/2036"
111 )]
112 fn finalize_into(self, out: &mut GenericArray<u8, Self::OutputSize>) {
113 out.copy_from_slice(&self.result.to_le_bytes());
114 }
115}
116
117impl Reset for Rabin {
118 fn reset(&mut self) {
119 self.result = EMPTY;
120 }
121}
122
123impl OutputSizeUser for Rabin {
124 // 8-byte little-endian form of the i64
125 // See: https://avro.apache.org/docs/current/specification/#single-object-encoding
126 type OutputSize = U8;
127}
128
129impl HashMarker for Rabin {}
130
131impl FixedOutputReset for Rabin {
132 fn finalize_into_reset(&mut self, out: &mut Output<Self>) {
133 out.copy_from_slice(&self.result.to_le_bytes());
134 self.reset();
135 }
136}
137
138#[cfg(test)]
139mod tests {
140 use super::Rabin;
141 use apache_avro_test_helper::TestResult;
142 use digest::Digest;
143 use pretty_assertions::assert_eq;
144
145 // See: https://github.com/apache/avro/blob/main/share/test/data/schema-tests.txt
146 #[test]
147 fn test1() -> TestResult {
148 let data: &[(&str, i64)] = &[
149 (r#""null""#, 7195948357588979594),
150 (r#""boolean""#, -6970731678124411036),
151 (
152 r#"{"name":"foo","type":"fixed","size":15}"#,
153 1756455273707447556,
154 ),
155 (
156 r#"{"name":"PigValue","type":"record","fields":[{"name":"value","type":["null","int","long","PigValue"]}]}"#,
157 -1759257747318642341,
158 ),
159 ];
160
161 let mut hasher = Rabin::new();
162
163 for (s, fp) in data {
164 hasher.update(s.as_bytes());
165 let res: &[u8] = &hasher.finalize_reset();
166 let result = i64::from_le_bytes(res.try_into()?);
167 assert_eq!(*fp, result);
168 }
169
170 Ok(())
171 }
172}