apache_avro_test_helper/
data.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Provides a set of Avro schema examples that are used in the tests.
19
20use std::sync::OnceLock;
21
22pub const PRIMITIVE_EXAMPLES: &[(&str, bool)] = &[
23    (r#""null""#, true),
24    (r#"{"type": "null"}"#, true),
25    (r#""boolean""#, true),
26    (r#"{"type": "boolean"}"#, true),
27    (r#""string""#, true),
28    (r#"{"type": "string"}"#, true),
29    (r#""bytes""#, true),
30    (r#"{"type": "bytes"}"#, true),
31    (r#""int""#, true),
32    (r#"{"type": "int"}"#, true),
33    (r#""long""#, true),
34    (r#"{"type": "long"}"#, true),
35    (r#""float""#, true),
36    (r#"{"type": "float"}"#, true),
37    (r#""double""#, true),
38    (r#"{"type": "double"}"#, true),
39    (r#""true""#, false),
40    (r#"true"#, false),
41    (r#"{"no_type": "test"}"#, false),
42    (r#"{"type": "panther"}"#, false),
43];
44
45pub const FIXED_EXAMPLES: &[(&str, bool)] = &[
46    (r#"{"type": "fixed", "name": "Test", "size": 1}"#, true),
47    (
48        r#"{
49                "type": "fixed",
50                "name": "MyFixed",
51                "namespace": "org.apache.hadoop.avro",
52                "size": 1
53            }"#,
54        true,
55    ),
56    (r#"{"type": "fixed", "name": "MissingSize"}"#, false),
57    (r#"{"type": "fixed", "size": 314}"#, false),
58];
59
60pub const ENUM_EXAMPLES: &[(&str, bool)] = &[
61    (
62        r#"{"type": "enum", "name": "Test", "symbols": ["A", "B"]}"#,
63        true,
64    ),
65    (
66        r#"{
67                "type": "enum",
68                "name": "Status",
69                "symbols": "Normal Caution Critical"
70            }"#,
71        false,
72    ),
73    (
74        r#"{
75                "type": "enum",
76                "name": [ 0, 1, 1, 2, 3, 5, 8 ],
77                "symbols": ["Golden", "Mean"]
78            }"#,
79        false,
80    ),
81    (
82        r#"{
83                "type": "enum",
84                "symbols" : ["I", "will", "fail", "no", "name"]
85            }"#,
86        false,
87    ),
88    (
89        r#"{
90                "type": "enum",
91                 "name": "Test"
92                 "symbols" : ["AA", "AA"]
93            }"#,
94        false,
95    ),
96];
97
98pub const ARRAY_EXAMPLES: &[(&str, bool)] = &[
99    (r#"{"type": "array", "items": "long"}"#, true),
100    (
101        r#"{
102                "type": "array",
103                 "items": {"type": "enum", "name": "Test", "symbols": ["A", "B"]}
104            }"#,
105        true,
106    ),
107];
108
109pub const MAP_EXAMPLES: &[(&str, bool)] = &[
110    (r#"{"type": "map", "values": "long"}"#, true),
111    (
112        r#"{
113                "type": "map",
114                "values": {"type": "enum", "name": "Test", "symbols": ["A", "B"]}
115            }"#,
116        true,
117    ),
118];
119
120pub const UNION_EXAMPLES: &[(&str, bool)] = &[
121    (r#"["string", "null", "long"]"#, true),
122    (r#"["null", "null"]"#, false),
123    (r#"["long", "long"]"#, false),
124    (
125        r#"[
126                {"type": "array", "items": "long"}
127                {"type": "array", "items": "string"}
128            ]"#,
129        false,
130    ),
131    // Unions with default values
132    (
133        r#"{"name": "foo", "type": ["string", "long"], "default": "bar"}"#,
134        true,
135    ),
136    (
137        r#"{"name": "foo", "type": ["long", "string"], "default": 1}"#,
138        true,
139    ),
140    (
141        r#"{"name": "foo", "type": ["null", "string"], "default": null}"#,
142        true,
143    ),
144    (
145        r#"{"name": "foo", "type": ["string", "long"], "default": 1}"#,
146        true,
147    ),
148    (
149        r#"{"name": "foo", "type": ["string", "null"], "default": null}"#,
150        true,
151    ),
152    (
153        r#"{"name": "foo", "type": ["null", "string"], "default": "null"}"#,
154        true,
155    ),
156    (
157        r#"{"name": "foo", "type": ["long", "string"], "default": "str"}"#,
158        true,
159    ),
160];
161
162pub const RECORD_EXAMPLES: &[(&str, bool)] = &[
163    (
164        r#"{
165                "type": "record",
166                "name": "Test",
167                "fields": [{"name": "f", "type": "long"}]
168            }"#,
169        true,
170    ),
171    (
172        r#"{
173            "type": "error",
174            "name": "Test",
175            "fields": [{"name": "f", "type": "long"}]
176        }"#,
177        false,
178    ),
179    (
180        r#"{
181            "type": "record",
182            "name": "Node",
183            "fields": [
184                {"name": "label", "type": "string"},
185                {"name": "children", "type": {"type": "array", "items": "Node"}}
186            ]
187        }"#,
188        true,
189    ),
190    (
191        r#"{
192            "type": "record",
193            "name": "Lisp",
194            "fields": [
195                {
196                    "name": "value",
197                    "type": [
198                        "null", "string",
199                        {
200                            "type": "record",
201                            "name": "Cons",
202                            "fields": [
203                                {"name": "car", "type": "Lisp"},
204                                {"name": "cdr", "type": "Lisp"}
205                            ]
206                        }
207                    ]
208                }
209            ]
210        }"#,
211        true,
212    ),
213    (
214        r#"{
215            "type": "record",
216            "name": "HandshakeRequest",
217            "namespace": "org.apache.avro.ipc",
218            "fields": [
219                {"name": "clientHash", "type": {"type": "fixed", "name": "MD5", "size": 16}},
220                {"name": "clientProtocol", "type": ["null", "string"]},
221                {"name": "serverHash", "type": "MD5"},
222                {"name": "meta", "type": ["null", {"type": "map", "values": "bytes"}]}
223            ]
224        }"#,
225        true,
226    ),
227    (
228        r#"{
229                "type":"record",
230                "name":"HandshakeResponse",
231                "namespace":"org.apache.avro.ipc",
232                "fields":[
233                    {
234                        "name":"match",
235                        "type":{
236                           "type":"enum",
237                           "name":"HandshakeMatch",
238                           "symbols":["BOTH", "CLIENT", "NONE"]
239                        }
240                    },
241                    {"name":"serverProtocol", "type":["null", "string"]},
242                    {
243                        "name":"serverHash",
244                        "type":["null", {"name":"MD5", "size":16, "type":"fixed"}]
245                    },
246                    {
247                        "name":"meta",
248                        "type":["null", {"type":"map", "values":"bytes"}]
249                    }
250                ]
251            }"#,
252        true,
253    ),
254    (
255        r#"{
256                "type":"record",
257                "name":"HandshakeResponse",
258                "namespace":"org.apache.avro.ipc",
259                "fields":[
260                    {
261                        "name":"match",
262                        "type":{
263                            "type":"enum",
264                            "name":"HandshakeMatch",
265                            "symbols":["BOTH", "CLIENT", "NONE"]
266                        }
267                    },
268                    {"name":"serverProtocol", "type":["null", "string"]},
269                    {
270                        "name":"serverHash",
271                        "type":["null", { "name":"MD5", "size":16, "type":"fixed"}]
272                    },
273                    {"name":"meta", "type":["null", { "type":"map", "values":"bytes"}]}
274                ]
275            }"#,
276        true,
277    ),
278    // Unions may not contain more than one schema with the same type, except for the named
279    // types record, fixed and enum. For example, unions containing two array types or two map
280    // types are not permitted, but two types with different names are permitted.
281    // (Names permit efficient resolution when reading and writing unions.)
282    (
283        r#"{
284            "type": "record",
285            "name": "ipAddr",
286            "fields": [
287                {
288                    "name": "addr",
289                    "type": [
290                        {"name": "IPv6", "type": "fixed", "size": 16},
291                        {"name": "IPv4", "type": "fixed", "size": 4}
292                    ]
293                }
294            ]
295        }"#,
296        true,
297    ),
298    (
299        r#"{
300                "type": "record",
301                "name": "Address",
302                "fields": [
303                    {"type": "string"},
304                    {"type": "string", "name": "City"}
305                ]
306            }"#,
307        false,
308    ),
309    (
310        r#"{
311                "type": "record",
312                "name": "Event",
313                "fields": [{"name": "Sponsor"}, {"name": "City", "type": "string"}]
314            }"#,
315        false,
316    ),
317    (
318        r#"{
319                "type": "record",
320                "fields": "His vision, from the constantly passing bars,"
321                "name",
322                "Rainer"
323            }"#,
324        false,
325    ),
326    (
327        r#"{
328                "name": ["Tom", "Jerry"],
329                "type": "record",
330                "fields": [{"name": "name", "type": "string"}]
331            }"#,
332        false,
333    ),
334];
335
336pub const DOC_EXAMPLES: &[(&str, bool)] = &[
337    (
338        r#"{
339                "type": "record",
340                "name": "TestDoc",
341                "doc":  "Doc string",
342                "fields": [{"name": "name", "type": "string", "doc" : "Doc String"}]
343            }"#,
344        true,
345    ),
346    (
347        r#"{"type": "enum", "name": "Test", "symbols": ["A", "B"], "doc": "Doc String"}"#,
348        true,
349    ),
350    (
351        r#"{"type": "fixed", "name": "Test", "size": 1, "doc": "Fixed Doc String"}"#,
352        true,
353    ),
354];
355
356pub const OTHER_ATTRIBUTES_EXAMPLES: &[(&str, bool)] = &[
357    (
358        r#"{
359                "type": "record",
360                "name": "TestRecord",
361                "cp_string": "string",
362                "cp_int": 1,
363                "cp_array": [ 1, 2, 3, 4],
364                "fields": [
365                    {"name": "f1", "type": "fixed", "size": 16, "cp_object": {"a":1,"b":2}},
366                    {"name": "f2", "type": "fixed", "size": 8, "cp_null": null}
367                ]
368            }"#,
369        true,
370    ),
371    (
372        r#"{"type": "map", "values": "long", "cp_boolean": true}"#,
373        true,
374    ),
375    (
376        r#"{
377                "type": "enum",
378                 "name": "TestEnum",
379                 "symbols": [ "one", "two", "three" ],
380                 "cp_float" : 1.0
381            }"#,
382        true,
383    ),
384];
385
386pub const DECIMAL_LOGICAL_TYPE: &[(&str, bool)] = &[
387    (
388        r#"{
389            "type": {
390                "type": "fixed",
391                "name": "TestDecimal",
392                "size": 10
393            },
394            "logicalType": "decimal",
395            "precision": 4,
396            "scale": 2
397        }"#,
398        true,
399    ),
400    (
401        r#"{
402            "type": {
403                "type": "fixed",
404                "name": "ScaleIsImplicitlyZero",
405                "size": 10
406            },
407            "logicalType": "decimal",
408            "precision": 4
409        }"#,
410        true,
411    ),
412    (
413        r#"{
414            "type": {
415                "type": "fixed",
416                "name": "PrecisionMustBeGreaterThanZero",
417                "size": 10
418            },
419            "logicalType": "decimal",
420            "precision": 0
421        }"#,
422        true,
423    ),
424    (
425        r#"{
426             "type": "fixed",
427             "logicalType": "decimal",
428             "name": "TestDecimal",
429             "precision": 10,
430             "scale": 2,
431             "size": 18
432         }"#,
433        true,
434    ),
435    (
436        r#"{
437             "type": "bytes",
438             "logicalType": "decimal",
439             "precision": 4,
440             "scale": 2
441         }"#,
442        true,
443    ),
444    (
445        r#"{
446             "type": "bytes",
447             "logicalType": "decimal",
448             "precision": 2,
449             "scale": -2
450         }"#,
451        true,
452    ),
453    (
454        r#"{
455             "type": "bytes",
456             "logicalType": "decimal",
457             "precision": -2,
458             "scale": 2
459         }"#,
460        true,
461    ),
462    (
463        r#"{
464             "type": "bytes",
465             "logicalType": "decimal",
466             "precision": 2,
467             "scale": 3
468         }"#,
469        true,
470    ),
471    (
472        r#"{
473             "type": "fixed",
474             "logicalType": "decimal",
475             "name": "TestDecimal",
476             "precision": -10,
477             "scale": 2,
478             "size": 5
479         }"#,
480        true,
481    ),
482    (
483        r#"{
484             "type": "fixed",
485             "logicalType": "decimal",
486             "name": "TestDecimal",
487             "precision": 2,
488             "scale": 3,
489             "size": 2
490         }"#,
491        true,
492    ),
493    (
494        r#"{
495             "type": "fixed",
496             "logicalType": "decimal",
497             "name": "TestDecimal",
498             "precision": 2,
499             "scale": 2,
500             "size": -2
501         }"#,
502        false,
503    ),
504];
505
506pub const DATE_LOGICAL_TYPE: &[(&str, bool)] = &[
507    (r#"{"type": "int", "logicalType": "date"}"#, true),
508    // this is valid even though its logical type is "date1", because unknown logical types are
509    // ignored
510    (r#"{"type": "int", "logicalType": "date1"}"#, true),
511    // this is still valid because unknown logicalType should be ignored
512    (r#"{"type": "long", "logicalType": "date"}"#, true),
513];
514
515pub const TIMEMILLIS_LOGICAL_TYPE: &[(&str, bool)] = &[
516    (r#"{"type": "int", "logicalType": "time-millis"}"#, true),
517    // this is valid even though its logical type is "time-milis" (missing the second "l"),
518    // because unknown logical types are ignored
519    (r#"{"type": "int", "logicalType": "time-milis"}"#, true),
520    // this is still valid because unknown logicalType should be ignored
521    (r#"{"type": "long", "logicalType": "time-millis"}"#, true),
522];
523
524pub const TIMEMICROS_LOGICAL_TYPE: &[(&str, bool)] = &[
525    (r#"{"type": "long", "logicalType": "time-micros"}"#, true),
526    // this is valid even though its logical type is "time-micro" (missing the last "s"), because
527    // unknown logical types are ignored
528    (r#"{"type": "long", "logicalType": "time-micro"}"#, true),
529    // this is still valid because unknown logicalType should be ignored
530    (r#"{"type": "int", "logicalType": "time-micros"}"#, true),
531];
532
533pub const TIMESTAMPMILLIS_LOGICAL_TYPE: &[(&str, bool)] = &[
534    (
535        r#"{"type": "long", "logicalType": "timestamp-millis"}"#,
536        true,
537    ),
538    // this is valid even though its logical type is "timestamp-milis" (missing the second "l"), because
539    // unknown logical types are ignored
540    (
541        r#"{"type": "long", "logicalType": "timestamp-milis"}"#,
542        true,
543    ),
544    (
545        // this is still valid because unknown logicalType should be ignored
546        r#"{"type": "int", "logicalType": "timestamp-millis"}"#,
547        true,
548    ),
549];
550
551pub const TIMESTAMPMICROS_LOGICAL_TYPE: &[(&str, bool)] = &[
552    (
553        r#"{"type": "long", "logicalType": "timestamp-micros"}"#,
554        true,
555    ),
556    // this is valid even though its logical type is "timestamp-micro" (missing the last "s"), because
557    // unknown logical types are ignored
558    (
559        r#"{"type": "long", "logicalType": "timestamp-micro"}"#,
560        true,
561    ),
562    (
563        // this is still valid because unknown logicalType should be ignored
564        r#"{"type": "int", "logicalType": "timestamp-micros"}"#,
565        true,
566    ),
567];
568
569pub const LOCAL_TIMESTAMPMILLIS_LOGICAL_TYPE: &[(&str, bool)] = &[
570    (
571        r#"{"type": "long", "logicalType": "local-timestamp-millis"}"#,
572        true,
573    ),
574    // this is valid even though its logical type is "local-timestamp-milis" (missing the second "l"), because
575    // unknown logical types are ignored
576    (
577        r#"{"type": "long", "logicalType": "local-timestamp-milis"}"#,
578        true,
579    ),
580    (
581        // this is still valid because unknown logicalType should be ignored
582        r#"{"type": "int", "logicalType": "local-timestamp-millis"}"#,
583        true,
584    ),
585];
586
587pub const LOCAL_TIMESTAMPMICROS_LOGICAL_TYPE: &[(&str, bool)] = &[
588    (
589        r#"{"type": "long", "logicalType": "local-timestamp-micros"}"#,
590        true,
591    ),
592    // this is valid even though its logical type is "local-timestamp-micro" (missing the last "s"), because
593    // unknown logical types are ignored
594    (
595        r#"{"type": "long", "logicalType": "local-timestamp-micro"}"#,
596        true,
597    ),
598    (
599        // this is still valid because unknown logicalType should be ignored
600        r#"{"type": "int", "logicalType": "local-timestamp-micros"}"#,
601        true,
602    ),
603];
604
605pub fn examples() -> &'static Vec<(&'static str, bool)> {
606    static EXAMPLES_ONCE: OnceLock<Vec<(&'static str, bool)>> = OnceLock::new();
607    EXAMPLES_ONCE.get_or_init(|| {
608        Vec::new()
609            .iter()
610            .copied()
611            .chain(PRIMITIVE_EXAMPLES.iter().copied())
612            .chain(FIXED_EXAMPLES.iter().copied())
613            .chain(ENUM_EXAMPLES.iter().copied())
614            .chain(ARRAY_EXAMPLES.iter().copied())
615            .chain(MAP_EXAMPLES.iter().copied())
616            .chain(UNION_EXAMPLES.iter().copied())
617            .chain(RECORD_EXAMPLES.iter().copied())
618            .chain(DOC_EXAMPLES.iter().copied())
619            .chain(OTHER_ATTRIBUTES_EXAMPLES.iter().copied())
620            .chain(DECIMAL_LOGICAL_TYPE.iter().copied())
621            .chain(DATE_LOGICAL_TYPE.iter().copied())
622            .chain(TIMEMILLIS_LOGICAL_TYPE.iter().copied())
623            .chain(TIMEMICROS_LOGICAL_TYPE.iter().copied())
624            .chain(TIMESTAMPMILLIS_LOGICAL_TYPE.iter().copied())
625            .chain(TIMESTAMPMICROS_LOGICAL_TYPE.iter().copied())
626            .chain(LOCAL_TIMESTAMPMILLIS_LOGICAL_TYPE.iter().copied())
627            .chain(LOCAL_TIMESTAMPMICROS_LOGICAL_TYPE.iter().copied())
628            .collect()
629    })
630}
631
632pub fn valid_examples() -> &'static Vec<(&'static str, bool)> {
633    static VALID_EXAMPLES_ONCE: OnceLock<Vec<(&'static str, bool)>> = OnceLock::new();
634    VALID_EXAMPLES_ONCE.get_or_init(|| examples().iter().copied().filter(|s| s.1).collect())
635}