apache_avro_test_helper/
data.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Provides a set of Avro schema examples that are used in the tests.
19
20use std::sync::OnceLock;
21
22pub const PRIMITIVE_EXAMPLES: &[(&str, bool)] = &[
23    (r#""null""#, true),
24    (r#"{"type": "null"}"#, true),
25    (r#""boolean""#, true),
26    (r#"{"type": "boolean"}"#, true),
27    (r#""string""#, true),
28    (r#"{"type": "string"}"#, true),
29    (r#""bytes""#, true),
30    (r#"{"type": "bytes"}"#, true),
31    (r#""int""#, true),
32    (r#"{"type": "int"}"#, true),
33    (r#""long""#, true),
34    (r#"{"type": "long"}"#, true),
35    (r#""float""#, true),
36    (r#"{"type": "float"}"#, true),
37    (r#""double""#, true),
38    (r#"{"type": "double"}"#, true),
39    (r#""true""#, false),
40    (r#"true"#, false),
41    (r#"{"no_type": "test"}"#, false),
42    (r#"{"type": "panther"}"#, false),
43];
44
45pub const FIXED_EXAMPLES: &[(&str, bool)] = &[
46    (r#"{"type": "fixed", "name": "Test", "size": 1}"#, true),
47    (
48        r#"{
49                "type": "fixed",
50                "name": "MyFixed",
51                "namespace": "org.apache.hadoop.avro",
52                "size": 1
53            }"#,
54        true,
55    ),
56    (r#"{"type": "fixed", "name": "MissingSize"}"#, false),
57    (r#"{"type": "fixed", "size": 314}"#, false),
58];
59
60pub const ENUM_EXAMPLES: &[(&str, bool)] = &[
61    (
62        r#"{"type": "enum", "name": "Test", "symbols": ["A", "B"]}"#,
63        true,
64    ),
65    (
66        r#"{
67                "type": "enum",
68                "name": "Status",
69                "symbols": "Normal Caution Critical"
70            }"#,
71        false,
72    ),
73    (
74        r#"{
75                "type": "enum",
76                "name": [ 0, 1, 1, 2, 3, 5, 8 ],
77                "symbols": ["Golden", "Mean"]
78            }"#,
79        false,
80    ),
81    (
82        r#"{
83                "type": "enum",
84                "symbols" : ["I", "will", "fail", "no", "name"]
85            }"#,
86        false,
87    ),
88    (
89        r#"{
90                "type": "enum",
91                 "name": "Test"
92                 "symbols" : ["AA", "AA"]
93            }"#,
94        false,
95    ),
96];
97
98pub const ARRAY_EXAMPLES: &[(&str, bool)] = &[
99    (r#"{"type": "array", "items": "long"}"#, true),
100    (
101        r#"{
102                "type": "array",
103                 "items": {"type": "enum", "name": "Test", "symbols": ["A", "B"]}
104            }"#,
105        true,
106    ),
107];
108
109pub const MAP_EXAMPLES: &[(&str, bool)] = &[
110    (r#"{"type": "map", "values": "long"}"#, true),
111    (
112        r#"{
113                "type": "map",
114                "values": {"type": "enum", "name": "Test", "symbols": ["A", "B"]}
115            }"#,
116        true,
117    ),
118];
119
120pub const UNION_EXAMPLES: &[(&str, bool)] = &[
121    (r#"["string", "null", "long"]"#, true),
122    (r#"["null", "null"]"#, false),
123    (r#"["long", "long"]"#, false),
124    (
125        r#"[
126                {"type": "array", "items": "long"}
127                {"type": "array", "items": "string"}
128            ]"#,
129        false,
130    ),
131    // Unions with default values
132    (
133        r#"{"name": "foo", "type": ["string", "long"], "default": "bar"}"#,
134        true,
135    ),
136    (
137        r#"{"name": "foo", "type": ["long", "string"], "default": 1}"#,
138        true,
139    ),
140    (
141        r#"{"name": "foo", "type": ["null", "string"], "default": null}"#,
142        true,
143    ),
144    (
145        r#"{"name": "foo", "type": ["string", "long"], "default": 1}"#,
146        true,
147    ),
148    (
149        r#"{"name": "foo", "type": ["string", "null"], "default": null}"#,
150        true,
151    ),
152    (
153        r#"{"name": "foo", "type": ["null", "string"], "default": "null"}"#,
154        true,
155    ),
156    (
157        r#"{"name": "foo", "type": ["long", "string"], "default": "str"}"#,
158        true,
159    ),
160];
161
162pub const RECORD_EXAMPLES: &[(&str, bool)] = &[
163    (
164        r#"{
165                "type": "record",
166                "name": "Test",
167                "fields": [{"name": "f", "type": "long"}]
168            }"#,
169        true,
170    ),
171    (
172        r#"{
173            "type": "error",
174            "name": "Test",
175            "fields": [{"name": "f", "type": "long"}]
176        }"#,
177        false,
178    ),
179    (
180        r#"{
181            "type": "record",
182            "name": "Node",
183            "fields": [
184                {"name": "label", "type": "string"},
185                {"name": "children", "type": {"type": "array", "items": "Node"}}
186            ]
187        }"#,
188        true,
189    ),
190    (
191        r#"{
192            "type": "record",
193            "name": "Lisp",
194            "fields": [
195                {
196                    "name": "value",
197                    "type": [
198                        "null", "string",
199                        {
200                            "type": "record",
201                            "name": "Cons",
202                            "fields": [
203                                {"name": "car", "type": "Lisp"},
204                                {"name": "cdr", "type": "Lisp"}
205                            ]
206                        }
207                    ]
208                }
209            ]
210        }"#,
211        true,
212    ),
213    (
214        r#"{
215            "type": "record",
216            "name": "HandshakeRequest",
217            "namespace": "org.apache.avro.ipc",
218            "fields": [
219                {"name": "clientHash", "type": {"type": "fixed", "name": "MD5", "size": 16}},
220                {"name": "clientProtocol", "type": ["null", "string"]},
221                {"name": "serverHash", "type": "MD5"},
222                {"name": "meta", "type": ["null", {"type": "map", "values": "bytes"}]}
223            ]
224        }"#,
225        true,
226    ),
227    (
228        r#"{
229                "type":"record",
230                "name":"HandshakeResponse",
231                "namespace":"org.apache.avro.ipc",
232                "fields":[
233                    {
234                        "name":"match",
235                        "type":{
236                           "type":"enum",
237                           "name":"HandshakeMatch",
238                           "symbols":["BOTH", "CLIENT", "NONE"]
239                        }
240                    },
241                    {"name":"serverProtocol", "type":["null", "string"]},
242                    {
243                        "name":"serverHash",
244                        "type":["null", {"name":"MD5", "size":16, "type":"fixed"}]
245                    },
246                    {
247                        "name":"meta",
248                        "type":["null", {"type":"map", "values":"bytes"}]
249                    }
250                ]
251            }"#,
252        true,
253    ),
254    (
255        r#"{
256                "type":"record",
257                "name":"HandshakeResponse",
258                "namespace":"org.apache.avro.ipc",
259                "fields":[
260                    {
261                        "name":"match",
262                        "type":{
263                            "type":"enum",
264                            "name":"HandshakeMatch",
265                            "symbols":["BOTH", "CLIENT", "NONE"]
266                        }
267                    },
268                    {"name":"serverProtocol", "type":["null", "string"]},
269                    {
270                        "name":"serverHash",
271                        "type":["null", { "name":"MD5", "size":16, "type":"fixed"}]
272                    },
273                    {"name":"meta", "type":["null", { "type":"map", "values":"bytes"}]}
274                ]
275            }"#,
276        true,
277    ),
278    // Unions may not contain more than one schema with the same type, except for the named
279    // types record, fixed and enum. For example, unions containing two array types or two map
280    // types are not permitted, but two types with different names are permitted.
281    // (Names permit efficient resolution when reading and writing unions.)
282    (
283        r#"{
284            "type": "record",
285            "name": "ipAddr",
286            "fields": [
287                {
288                    "name": "addr",
289                    "type": [
290                        {"name": "IPv6", "type": "fixed", "size": 16},
291                        {"name": "IPv4", "type": "fixed", "size": 4}
292                    ]
293                }
294            ]
295        }"#,
296        true,
297    ),
298    (
299        r#"{
300                "type": "record",
301                "name": "Address",
302                "fields": [
303                    {"type": "string"},
304                    {"type": "string", "name": "City"}
305                ]
306            }"#,
307        false,
308    ),
309    (
310        r#"{
311                "type": "record",
312                "name": "Event",
313                "fields": [{"name": "Sponsor"}, {"name": "City", "type": "string"}]
314            }"#,
315        false,
316    ),
317    (
318        r#"{
319                "type": "record",
320                "fields": "His vision, from the constantly passing bars,"
321                "name",
322                "Rainer"
323            }"#,
324        false,
325    ),
326    (
327        r#"{
328                "name": ["Tom", "Jerry"],
329                "type": "record",
330                "fields": [{"name": "name", "type": "string"}]
331            }"#,
332        false,
333    ),
334];
335
336pub const DOC_EXAMPLES: &[(&str, bool)] = &[
337    (
338        r#"{
339                "type": "record",
340                "name": "TestDoc",
341                "doc":  "Doc string",
342                "fields": [{"name": "name", "type": "string", "doc" : "Doc String"}]
343            }"#,
344        true,
345    ),
346    (
347        r#"{"type": "enum", "name": "Test", "symbols": ["A", "B"], "doc": "Doc String"}"#,
348        true,
349    ),
350    (
351        r#"{"type": "fixed", "name": "Test", "size": 1, "doc": "Fixed Doc String"}"#,
352        true,
353    ),
354];
355
356pub const OTHER_ATTRIBUTES_EXAMPLES: &[(&str, bool)] = &[
357    (
358        r#"{
359                "type": "record",
360                "name": "TestRecord",
361                "cp_string": "string",
362                "cp_int": 1,
363                "cp_array": [ 1, 2, 3, 4],
364                "fields": [
365                    {"name": "f1", "type": "string", "cp_object": {"a":1,"b":2}},
366                    {"name": "f2", "type": "long", "cp_null": null}
367                ]
368            }"#,
369        true,
370    ),
371    (
372        r#"{"type": "map", "values": "long", "cp_boolean": true}"#,
373        true,
374    ),
375    (
376        r#"{
377                "type": "enum",
378                 "name": "TestEnum",
379                 "symbols": [ "one", "two", "three" ],
380                 "cp_float" : 1.0
381            }"#,
382        true,
383    ),
384    (r#"{"type": "long", "date": "true"}"#, true),
385];
386
387pub const DECIMAL_LOGICAL_TYPE: &[(&str, bool)] = &[
388    (
389        r#"{
390            "type": {
391                "type": "fixed",
392                "name": "TestDecimal",
393                "size": 10
394            },
395            "logicalType": "decimal",
396            "precision": 4,
397            "scale": 2
398        }"#,
399        true,
400    ),
401    (
402        r#"{
403            "type": {
404                "type": "fixed",
405                "name": "ScaleIsImplicitlyZero",
406                "size": 10
407            },
408            "logicalType": "decimal",
409            "precision": 4
410        }"#,
411        true,
412    ),
413    (
414        r#"{
415            "type": {
416                "type": "fixed",
417                "name": "PrecisionMustBeGreaterThanZero",
418                "size": 10
419            },
420            "logicalType": "decimal",
421            "precision": 0
422        }"#,
423        true,
424    ),
425    (
426        r#"{
427             "type": "fixed",
428             "logicalType": "decimal",
429             "name": "TestDecimal",
430             "precision": 10,
431             "scale": 2,
432             "size": 18
433         }"#,
434        true,
435    ),
436    (
437        r#"{
438             "type": "bytes",
439             "logicalType": "decimal",
440             "precision": 4,
441             "scale": 2
442         }"#,
443        true,
444    ),
445    (
446        r#"{
447             "type": "bytes",
448             "logicalType": "decimal",
449             "precision": 2,
450             "scale": -2
451         }"#,
452        true,
453    ),
454    (
455        r#"{
456             "type": "bytes",
457             "logicalType": "decimal",
458             "precision": -2,
459             "scale": 2
460         }"#,
461        true,
462    ),
463    (
464        r#"{
465             "type": "bytes",
466             "logicalType": "decimal",
467             "precision": 2,
468             "scale": 3
469         }"#,
470        true,
471    ),
472    (
473        r#"{
474             "type": "fixed",
475             "logicalType": "decimal",
476             "name": "TestDecimal",
477             "precision": -10,
478             "scale": 2,
479             "size": 5
480         }"#,
481        true,
482    ),
483    (
484        r#"{
485             "type": "fixed",
486             "logicalType": "decimal",
487             "name": "TestDecimal",
488             "precision": 2,
489             "scale": 3,
490             "size": 2
491         }"#,
492        true,
493    ),
494    (
495        r#"{
496             "type": "fixed",
497             "logicalType": "decimal",
498             "name": "TestDecimal",
499             "precision": 2,
500             "scale": 2,
501             "size": -2
502         }"#,
503        false,
504    ),
505];
506
507pub const DATE_LOGICAL_TYPE: &[(&str, bool)] = &[
508    (r#"{"type": "int", "logicalType": "date"}"#, true),
509    // this is valid even though its logical type is "date1", because unknown logical types are
510    // ignored
511    (r#"{"type": "int", "logicalType": "date1"}"#, true),
512    // this is still valid because unknown logicalType should be ignored
513    (r#"{"type": "long", "logicalType": "date"}"#, true),
514];
515
516pub const TIMEMILLIS_LOGICAL_TYPE: &[(&str, bool)] = &[
517    (r#"{"type": "int", "logicalType": "time-millis"}"#, true),
518    // this is valid even though its logical type is "time-milis" (missing the second "l"),
519    // because unknown logical types are ignored
520    (r#"{"type": "int", "logicalType": "time-milis"}"#, true),
521    // this is still valid because unknown logicalType should be ignored
522    (r#"{"type": "long", "logicalType": "time-millis"}"#, true),
523];
524
525pub const TIMEMICROS_LOGICAL_TYPE: &[(&str, bool)] = &[
526    (r#"{"type": "long", "logicalType": "time-micros"}"#, true),
527    // this is valid even though its logical type is "time-micro" (missing the last "s"), because
528    // unknown logical types are ignored
529    (r#"{"type": "long", "logicalType": "time-micro"}"#, true),
530    // this is still valid because unknown logicalType should be ignored
531    (r#"{"type": "int", "logicalType": "time-micros"}"#, true),
532];
533
534pub const TIMESTAMPMILLIS_LOGICAL_TYPE: &[(&str, bool)] = &[
535    (
536        r#"{"type": "long", "logicalType": "timestamp-millis"}"#,
537        true,
538    ),
539    // this is valid even though its logical type is "timestamp-milis" (missing the second "l"), because
540    // unknown logical types are ignored
541    (
542        r#"{"type": "long", "logicalType": "timestamp-milis"}"#,
543        true,
544    ),
545    (
546        // this is still valid because unknown logicalType should be ignored
547        r#"{"type": "int", "logicalType": "timestamp-millis"}"#,
548        true,
549    ),
550];
551
552pub const TIMESTAMPMICROS_LOGICAL_TYPE: &[(&str, bool)] = &[
553    (
554        r#"{"type": "long", "logicalType": "timestamp-micros"}"#,
555        true,
556    ),
557    // this is valid even though its logical type is "timestamp-micro" (missing the last "s"), because
558    // unknown logical types are ignored
559    (
560        r#"{"type": "long", "logicalType": "timestamp-micro"}"#,
561        true,
562    ),
563    (
564        // this is still valid because unknown logicalType should be ignored
565        r#"{"type": "int", "logicalType": "timestamp-micros"}"#,
566        true,
567    ),
568];
569
570pub const LOCAL_TIMESTAMPMILLIS_LOGICAL_TYPE: &[(&str, bool)] = &[
571    (
572        r#"{"type": "long", "logicalType": "local-timestamp-millis"}"#,
573        true,
574    ),
575    // this is valid even though its logical type is "local-timestamp-milis" (missing the second "l"), because
576    // unknown logical types are ignored
577    (
578        r#"{"type": "long", "logicalType": "local-timestamp-milis"}"#,
579        true,
580    ),
581    (
582        // this is still valid because unknown logicalType should be ignored
583        r#"{"type": "int", "logicalType": "local-timestamp-millis"}"#,
584        true,
585    ),
586];
587
588pub const LOCAL_TIMESTAMPMICROS_LOGICAL_TYPE: &[(&str, bool)] = &[
589    (
590        r#"{"type": "long", "logicalType": "local-timestamp-micros"}"#,
591        true,
592    ),
593    // this is valid even though its logical type is "local-timestamp-micro" (missing the last "s"), because
594    // unknown logical types are ignored
595    (
596        r#"{"type": "long", "logicalType": "local-timestamp-micro"}"#,
597        true,
598    ),
599    (
600        // this is still valid because unknown logicalType should be ignored
601        r#"{"type": "int", "logicalType": "local-timestamp-micros"}"#,
602        true,
603    ),
604];
605
606pub fn examples() -> &'static Vec<(&'static str, bool)> {
607    static EXAMPLES_ONCE: OnceLock<Vec<(&'static str, bool)>> = OnceLock::new();
608    EXAMPLES_ONCE.get_or_init(|| {
609        Vec::new()
610            .iter()
611            .copied()
612            .chain(PRIMITIVE_EXAMPLES.iter().copied())
613            .chain(FIXED_EXAMPLES.iter().copied())
614            .chain(ENUM_EXAMPLES.iter().copied())
615            .chain(ARRAY_EXAMPLES.iter().copied())
616            .chain(MAP_EXAMPLES.iter().copied())
617            .chain(UNION_EXAMPLES.iter().copied())
618            .chain(RECORD_EXAMPLES.iter().copied())
619            .chain(DOC_EXAMPLES.iter().copied())
620            .chain(OTHER_ATTRIBUTES_EXAMPLES.iter().copied())
621            .chain(DECIMAL_LOGICAL_TYPE.iter().copied())
622            .chain(DATE_LOGICAL_TYPE.iter().copied())
623            .chain(TIMEMILLIS_LOGICAL_TYPE.iter().copied())
624            .chain(TIMEMICROS_LOGICAL_TYPE.iter().copied())
625            .chain(TIMESTAMPMILLIS_LOGICAL_TYPE.iter().copied())
626            .chain(TIMESTAMPMICROS_LOGICAL_TYPE.iter().copied())
627            .chain(LOCAL_TIMESTAMPMILLIS_LOGICAL_TYPE.iter().copied())
628            .chain(LOCAL_TIMESTAMPMICROS_LOGICAL_TYPE.iter().copied())
629            .collect()
630    })
631}
632
633pub fn valid_examples() -> &'static Vec<(&'static str, bool)> {
634    static VALID_EXAMPLES_ONCE: OnceLock<Vec<(&'static str, bool)>> = OnceLock::new();
635    VALID_EXAMPLES_ONCE.get_or_init(|| examples().iter().copied().filter(|s| s.1).collect())
636}