Skip to main content

uuid/
parser.rs

1// Copyright 2013-2014 The Rust Project Developers.
2// Copyright 2018 The Uuid Project Developers.
3//
4// See the COPYRIGHT file at the top-level directory of this distribution.
5//
6// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
7// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
8// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
9// option. This file may not be copied, modified, or distributed
10// except according to those terms.
11
12//! [`Uuid`] parsing constructs and utilities.
13//!
14//! [`Uuid`]: ../struct.Uuid.html
15
16use crate::{
17    error::*,
18    std::{convert::TryFrom, str},
19    Uuid,
20};
21
22#[cfg(feature = "std")]
23use crate::std::string::String;
24
25impl str::FromStr for Uuid {
26    type Err = Error;
27
28    fn from_str(uuid_str: &str) -> Result<Self, Self::Err> {
29        Uuid::parse_str(uuid_str)
30    }
31}
32
33impl TryFrom<&'_ str> for Uuid {
34    type Error = Error;
35
36    fn try_from(uuid_str: &'_ str) -> Result<Self, Self::Error> {
37        Uuid::parse_str(uuid_str)
38    }
39}
40
41#[cfg(feature = "std")]
42impl TryFrom<String> for Uuid {
43    type Error = Error;
44
45    fn try_from(uuid_str: String) -> Result<Self, Self::Error> {
46        Uuid::try_from(uuid_str.as_ref())
47    }
48}
49
50impl Uuid {
51    /// Parses a `Uuid` from a string of hexadecimal digits with optional
52    /// hyphens.
53    ///
54    /// Any of the formats generated by this module (simple, hyphenated, urn,
55    /// Microsoft GUID) are supported by this parsing function.
56    ///
57    /// Prefer [`try_parse`] unless you need detailed user-facing diagnostics.
58    /// This method will be eventually deprecated in favor of `try_parse`.
59    ///
60    /// # Examples
61    ///
62    /// Parse a hyphenated UUID:
63    ///
64    /// ```
65    /// # use uuid::{Uuid, Version, Variant};
66    /// # fn main() -> Result<(), uuid::Error> {
67    /// let uuid = Uuid::parse_str("550e8400-e29b-41d4-a716-446655440000")?;
68    ///
69    /// assert_eq!(Some(Version::Random), uuid.get_version());
70    /// assert_eq!(Variant::RFC4122, uuid.get_variant());
71    /// # Ok(())
72    /// # }
73    /// ```
74    ///
75    /// [`try_parse`]: #method.try_parse
76    pub fn parse_str(input: &str) -> Result<Uuid, Error> {
77        try_parse(input.as_bytes())
78            .map(Uuid::from_bytes)
79            .map_err(InvalidUuid::into_err)
80    }
81
82    /// Parses a `Uuid` from a string of hexadecimal digits with optional
83    /// hyphens.
84    ///
85    /// This function is similar to [`parse_str`], in fact `parse_str` shares
86    /// the same underlying parser. The difference is that if `try_parse`
87    /// fails, it won't generate very useful error messages. The `parse_str`
88    /// function will eventually be deprecated in favor of `try_parse`.
89    ///
90    /// To parse a UUID from a byte stream instead of a UTF8 string, see
91    /// [`try_parse_ascii`].
92    ///
93    /// # Examples
94    ///
95    /// Parse a hyphenated UUID:
96    ///
97    /// ```
98    /// # use uuid::{Uuid, Version, Variant};
99    /// # fn main() -> Result<(), uuid::Error> {
100    /// let uuid = Uuid::try_parse("550e8400-e29b-41d4-a716-446655440000")?;
101    ///
102    /// assert_eq!(Some(Version::Random), uuid.get_version());
103    /// assert_eq!(Variant::RFC4122, uuid.get_variant());
104    /// # Ok(())
105    /// # }
106    /// ```
107    ///
108    /// [`parse_str`]: #method.parse_str
109    /// [`try_parse_ascii`]: #method.try_parse_ascii
110    pub const fn try_parse(input: &str) -> Result<Uuid, Error> {
111        Self::try_parse_ascii(input.as_bytes())
112    }
113
114    /// Parses a `Uuid` from a string of hexadecimal digits with optional
115    /// hyphens.
116    ///
117    /// The input is expected to be a string of ASCII characters. This method
118    /// can be more convenient than [`try_parse`] if the UUID is being
119    /// parsed from a byte stream instead of from a UTF8 string.
120    ///
121    /// # Examples
122    ///
123    /// Parse a hyphenated UUID:
124    ///
125    /// ```
126    /// # use uuid::{Uuid, Version, Variant};
127    /// # fn main() -> Result<(), uuid::Error> {
128    /// let uuid = Uuid::try_parse_ascii(b"550e8400-e29b-41d4-a716-446655440000")?;
129    ///
130    /// assert_eq!(Some(Version::Random), uuid.get_version());
131    /// assert_eq!(Variant::RFC4122, uuid.get_variant());
132    /// # Ok(())
133    /// # }
134    /// ```
135    ///
136    /// [`try_parse`]: #method.try_parse
137    pub const fn try_parse_ascii(input: &[u8]) -> Result<Uuid, Error> {
138        match try_parse(input) {
139            Ok(bytes) => Ok(Uuid::from_bytes(bytes)),
140            // If parsing fails then we don't know exactly what went wrong
141            // In this case, we just return a generic error
142            Err(_) => Err(Error(ErrorKind::ParseOther)),
143        }
144    }
145}
146
147const fn try_parse(input: &'_ [u8]) -> Result<[u8; 16], InvalidUuid<'_>> {
148    match (input.len(), input) {
149        // Inputs of 32 bytes must be a non-hyphenated UUID
150        (32, s) => parse_simple(s, true),
151        // Hyphenated UUIDs may be wrapped in various ways:
152        // - `{UUID}` for braced UUIDs
153        // - `urn:uuid:UUID` for URNs
154        // - `UUID` for a regular hyphenated UUID
155        (36, s)
156        | (38, [b'{', s @ .., b'}'])
157        | (45, [b'u', b'r', b'n', b':', b'u', b'u', b'i', b'd', b':', s @ ..]) => {
158            parse_hyphenated(s)
159        }
160        // Any other shaped input is immediately invalid
161        _ => Err(InvalidUuid(input, RequestedUuid::Any)),
162    }
163}
164
165#[inline]
166#[allow(dead_code)]
167pub(crate) const fn parse_braced(input: &'_ [u8]) -> Result<[u8; 16], InvalidUuid<'_>> {
168    if let (38, [b'{', s @ .., b'}']) = (input.len(), input) {
169        parse_hyphenated(s)
170    } else {
171        Err(InvalidUuid(input, RequestedUuid::Braced))
172    }
173}
174
175#[inline]
176#[allow(dead_code)]
177pub(crate) const fn parse_urn(input: &'_ [u8]) -> Result<[u8; 16], InvalidUuid<'_>> {
178    if let (45, [b'u', b'r', b'n', b':', b'u', b'u', b'i', b'd', b':', s @ ..]) =
179        (input.len(), input)
180    {
181        parse_hyphenated(s)
182    } else {
183        Err(InvalidUuid(input, RequestedUuid::Urn))
184    }
185}
186
187#[inline]
188pub(crate) const fn parse_simple(
189    s: &'_ [u8],
190    speculative: bool,
191) -> Result<[u8; 16], InvalidUuid<'_>> {
192    // This length check here removes all other bounds
193    // checks in this function
194    if s.len() != 32 {
195        return Err(InvalidUuid(
196            s,
197            if speculative {
198                RequestedUuid::Any
199            } else {
200                RequestedUuid::Simple
201            },
202        ));
203    }
204
205    let mut buf: [u8; 16] = [0; 16];
206    let mut i = 0;
207
208    while i < 16 {
209        // Convert a two-char hex value (like `A8`)
210        // into a byte (like `10101000`)
211        let h1 = HEX_TABLE[s[i * 2] as usize];
212        let h2 = HEX_TABLE[s[i * 2 + 1] as usize];
213
214        // We use `0xff` as a sentinel value to indicate
215        // an invalid hex character sequence (like the letter `G`)
216        if h1 | h2 == 0xff {
217            return Err(InvalidUuid(
218                s,
219                if speculative {
220                    RequestedUuid::Any
221                } else {
222                    RequestedUuid::Simple
223                },
224            ));
225        }
226
227        // The upper nibble needs to be shifted into position
228        // to produce the final byte value
229        buf[i] = SHL4_TABLE[h1 as usize] | h2;
230        i += 1;
231    }
232
233    Ok(buf)
234}
235
236#[inline]
237pub(crate) const fn parse_hyphenated(s: &'_ [u8]) -> Result<[u8; 16], InvalidUuid<'_>> {
238    // This length check here removes all other bounds
239    // checks in this function
240    if s.len() != 36 {
241        return Err(InvalidUuid(s, RequestedUuid::Hyphenated));
242    }
243
244    // We look at two hex-encoded values (4 chars) at a time because
245    // that's the size of the smallest group in a hyphenated UUID.
246    // The indexes we're interested in are:
247    //
248    // uuid     : 936da01f-9abd-4d9d-80c7-02af85c822a8
249    //            |   |   ||   ||   ||   ||   |   |
250    // hyphens  : |   |   8|  13|  18|  23|   |   |
251    // positions: 0   4    9   14   19   24  28  32
252
253    // First, ensure the hyphens appear in the right places
254    match [s[8], s[13], s[18], s[23]] {
255        [b'-', b'-', b'-', b'-'] => {}
256        _ => return Err(InvalidUuid(s, RequestedUuid::Hyphenated)),
257    }
258
259    let positions: [u8; 8] = [0, 4, 9, 14, 19, 24, 28, 32];
260    let mut buf: [u8; 16] = [0; 16];
261    let mut j = 0;
262
263    while j < 8 {
264        let i = positions[j];
265
266        // The decoding here is the same as the simple case
267        // We're just dealing with two values instead of one
268        let h1 = HEX_TABLE[s[i as usize] as usize];
269        let h2 = HEX_TABLE[s[(i + 1) as usize] as usize];
270        let h3 = HEX_TABLE[s[(i + 2) as usize] as usize];
271        let h4 = HEX_TABLE[s[(i + 3) as usize] as usize];
272
273        if h1 | h2 | h3 | h4 == 0xff {
274            return Err(InvalidUuid(s, RequestedUuid::Hyphenated));
275        }
276
277        buf[j * 2] = SHL4_TABLE[h1 as usize] | h2;
278        buf[j * 2 + 1] = SHL4_TABLE[h3 as usize] | h4;
279        j += 1;
280    }
281
282    Ok(buf)
283}
284
285const HEX_TABLE: &[u8; 256] = &{
286    let mut buf = [0; 256];
287    let mut i: u8 = 0;
288
289    loop {
290        buf[i as usize] = match i {
291            b'0'..=b'9' => i - b'0',
292            b'a'..=b'f' => i - b'a' + 10,
293            b'A'..=b'F' => i - b'A' + 10,
294            _ => 0xff,
295        };
296
297        if i == 255 {
298            break buf;
299        }
300
301        i += 1
302    }
303};
304
305const SHL4_TABLE: &[u8; 256] = &{
306    let mut buf = [0; 256];
307    let mut i: u8 = 0;
308
309    loop {
310        buf[i as usize] = i.wrapping_shl(4);
311
312        if i == 255 {
313            break buf;
314        }
315
316        i += 1;
317    }
318};
319
320#[cfg(test)]
321mod tests {
322    use super::*;
323    use crate::{
324        fmt::*,
325        std::{str::FromStr, string::ToString},
326        tests::some_uuid_iter,
327    };
328
329    #[test]
330    fn test_parse_valid() {
331        let from_hyphenated = Uuid::parse_str("67e55044-10b1-426f-9247-bb680e5fe0c8").unwrap();
332        let from_simple = Uuid::parse_str("67e5504410b1426f9247bb680e5fe0c8").unwrap();
333        let from_urn = Uuid::parse_str("urn:uuid:67e55044-10b1-426f-9247-bb680e5fe0c8").unwrap();
334        let from_guid = Uuid::parse_str("{67e55044-10b1-426f-9247-bb680e5fe0c8}").unwrap();
335
336        assert_eq!(from_hyphenated, from_simple);
337        assert_eq!(from_hyphenated, from_urn);
338        assert_eq!(from_hyphenated, from_guid);
339
340        assert!(Uuid::parse_str("00000000000000000000000000000000").is_ok());
341        assert!(Uuid::parse_str("67e55044-10b1-426f-9247-bb680e5fe0c8").is_ok());
342        assert!(Uuid::parse_str("F9168C5E-CEB2-4faa-B6BF-329BF39FA1E4").is_ok());
343        assert!(Uuid::parse_str("67e5504410b1426f9247bb680e5fe0c8").is_ok());
344        assert!(Uuid::parse_str("01020304-1112-2122-3132-414243444546").is_ok());
345        assert!(Uuid::parse_str("urn:uuid:67e55044-10b1-426f-9247-bb680e5fe0c8").is_ok());
346        assert!(Uuid::parse_str("{6d93bade-bd9f-4e13-8914-9474e1e3567b}").is_ok());
347
348        // Nil
349        let nil = Uuid::nil();
350        assert_eq!(
351            Uuid::parse_str("00000000000000000000000000000000").unwrap(),
352            nil
353        );
354        assert_eq!(
355            Uuid::parse_str("00000000-0000-0000-0000-000000000000").unwrap(),
356            nil
357        );
358    }
359
360    #[test]
361    fn test_parse_invalid() {
362        // Invalid
363        assert_eq!(
364            Uuid::parse_str(""),
365            Err(Error(ErrorKind::ParseLength { len: 0 }))
366        );
367
368        assert_eq!(
369            Uuid::parse_str("!"),
370            Err(Error(ErrorKind::ParseChar {
371                character: '!',
372                index: 0,
373            }))
374        );
375
376        assert_eq!(
377            Uuid::parse_str("F9168C5E-CEB2-4faa-B6BF-329BF39FA1E45"),
378            Err(Error(ErrorKind::ParseGroupLength {
379                group: 4,
380                len: 13,
381                index: 25,
382            }))
383        );
384
385        assert_eq!(
386            Uuid::parse_str("F9168C5E-CEB2-4faa-BBF-329BF39FA1E4"),
387            Err(Error(ErrorKind::ParseGroupLength {
388                group: 3,
389                len: 3,
390                index: 20,
391            }))
392        );
393
394        assert_eq!(
395            Uuid::parse_str("F9168C5E-CEB2-4faa-BGBF-329BF39FA1E4"),
396            Err(Error(ErrorKind::ParseChar {
397                character: 'G',
398                index: 20,
399            }))
400        );
401
402        assert_eq!(
403            Uuid::parse_str("F9168C5E-CEB2F4faaFB6BFF329BF39FA1E4"),
404            Err(Error(ErrorKind::ParseGroupCount { count: 2 }))
405        );
406
407        assert_eq!(
408            Uuid::parse_str("F9168C5E-CEB2-4faaFB6BFF329BF39FA1E4"),
409            Err(Error(ErrorKind::ParseGroupCount { count: 3 }))
410        );
411
412        assert_eq!(
413            Uuid::parse_str("F9168C5E-CEB2-4faa-B6BFF329BF39FA1E4"),
414            Err(Error(ErrorKind::ParseGroupCount { count: 4 }))
415        );
416
417        assert_eq!(
418            Uuid::parse_str("F9168C5E-CEB2-4faa"),
419            Err(Error(ErrorKind::ParseGroupCount { count: 3 }))
420        );
421
422        assert_eq!(
423            Uuid::parse_str("F9168C5E-CEB2-4faaXB6BFF329BF39FA1E4"),
424            Err(Error(ErrorKind::ParseChar {
425                character: 'X',
426                index: 18,
427            }))
428        );
429
430        assert_eq!(
431            Uuid::parse_str("{F9168C5E-CEB2-4faa9B6BFF329BF39FA1E41"),
432            Err(Error(ErrorKind::ParseChar {
433                character: '{',
434                index: 0,
435            }))
436        );
437
438        assert_eq!(
439            Uuid::parse_str("{F9168C5E-CEB2-4faa9B6BFF329BF39FA1E41}"),
440            Err(Error(ErrorKind::ParseGroupCount { count: 3 }))
441        );
442
443        assert_eq!(
444            Uuid::parse_str("F9168C5E-CEB-24fa-eB6BFF32-BF39FA1E4"),
445            Err(Error(ErrorKind::ParseGroupLength {
446                group: 1,
447                len: 3,
448                index: 10,
449            }))
450        );
451
452        // // (group, found, expecting)
453        assert_eq!(
454            Uuid::parse_str("01020304-1112-2122-3132-41424344"),
455            Err(Error(ErrorKind::ParseGroupLength {
456                group: 4,
457                len: 8,
458                index: 25,
459            }))
460        );
461
462        assert_eq!(
463            Uuid::parse_str("67e5504410b1426f9247bb680e5fe0c"),
464            Err(Error(ErrorKind::ParseLength { len: 31 }))
465        );
466
467        assert_eq!(
468            Uuid::parse_str("67e5504410b1426f9247bb680e5fe0c88"),
469            Err(Error(ErrorKind::ParseLength { len: 33 }))
470        );
471
472        assert_eq!(
473            Uuid::parse_str("67e5504410b1426f9247bb680e5fe0cg8"),
474            Err(Error(ErrorKind::ParseChar {
475                character: 'g',
476                index: 31,
477            }))
478        );
479
480        assert_eq!(
481            Uuid::parse_str("67e5504410b1426%9247bb680e5fe0c8"),
482            Err(Error(ErrorKind::ParseChar {
483                character: '%',
484                index: 15,
485            }))
486        );
487
488        assert_eq!(
489            Uuid::parse_str("231231212212423424324323477343246663"),
490            Err(Error(ErrorKind::ParseGroupCount { count: 1 }))
491        );
492
493        assert_eq!(
494            Uuid::parse_str("{00000000000000000000000000000000}"),
495            Err(Error(ErrorKind::ParseGroupCount { count: 1 }))
496        );
497
498        assert_eq!(
499            Uuid::parse_str("67e5504410b1426f9247bb680e5fe0c"),
500            Err(Error(ErrorKind::ParseLength { len: 31 }))
501        );
502
503        assert_eq!(
504            Uuid::parse_str("67e550X410b1426f9247bb680e5fe0cd"),
505            Err(Error(ErrorKind::ParseChar {
506                character: 'X',
507                index: 6,
508            }))
509        );
510
511        assert_eq!(
512            Uuid::parse_str("67e550-4105b1426f9247bb680e5fe0c"),
513            Err(Error(ErrorKind::ParseGroupCount { count: 2 }))
514        );
515
516        assert_eq!(
517            Uuid::parse_str("F9168C5E-CEB2-4faa-B6BF1-02BF39FA1E4"),
518            Err(Error(ErrorKind::ParseGroupLength {
519                group: 3,
520                len: 5,
521                index: 20,
522            }))
523        );
524
525        assert_eq!(
526            Uuid::parse_str("\u{bcf3c}"),
527            Err(Error(ErrorKind::ParseChar {
528                character: '\u{bcf3c}',
529                index: 0,
530            }))
531        );
532
533        assert_eq!(
534            Err(Error(ErrorKind::ParseLength { len: 0 })),
535            Hyphenated::from_str("")
536        );
537
538        assert_eq!(
539            Err(Error(ErrorKind::ParseGroupCount { count: 1 })),
540            Hyphenated::from_str("550e8400e29b41d4a716446655440000")
541        );
542
543        assert_eq!(
544            Err(Error(ErrorKind::ParseChar {
545                character: '-',
546                index: 8
547            })),
548            Simple::from_str("550e8400-e29b-41d4-a716-446655440000")
549        );
550
551        assert_eq!(
552            Err(Error(ErrorKind::ParseChar {
553                character: '5',
554                index: 0
555            })),
556            Urn::from_str("550e8400-e29b-41d4-a716-446655440000")
557        );
558        assert_eq!(
559            Err(Error(ErrorKind::ParseChar {
560                character: ':',
561                index: 0
562            })),
563            Urn::from_str(":550e8400-e29b-41d4-a716-446655440000")
564        );
565
566        assert_eq!(
567            Err(Error(ErrorKind::ParseChar {
568                character: '5',
569                index: 0
570            })),
571            Braced::from_str("550e8400-e29b-41d4-a716-446655440000")
572        );
573        assert_eq!(
574            Err(Error(ErrorKind::ParseChar {
575                character: '{',
576                index: 1
577            })),
578            Braced::from_str("{{550e8400-e29b-41d4-a716-446655440000}}")
579        );
580    }
581
582    #[test]
583    fn test_roundtrip_default() {
584        for uuid_orig in some_uuid_iter() {
585            let orig_str = uuid_orig.to_string();
586            let uuid_out = Uuid::parse_str(&orig_str).unwrap();
587            assert_eq!(uuid_orig, uuid_out);
588        }
589    }
590
591    #[test]
592    fn test_roundtrip_hyphenated() {
593        for uuid_orig in some_uuid_iter() {
594            let orig_str = uuid_orig.hyphenated().to_string();
595            let uuid_out = Uuid::parse_str(&orig_str).unwrap();
596            assert_eq!(uuid_orig, uuid_out);
597        }
598    }
599
600    #[test]
601    fn test_roundtrip_simple() {
602        for uuid_orig in some_uuid_iter() {
603            let orig_str = uuid_orig.simple().to_string();
604            let uuid_out = Uuid::parse_str(&orig_str).unwrap();
605            assert_eq!(uuid_orig, uuid_out);
606        }
607    }
608
609    #[test]
610    fn test_roundtrip_urn() {
611        for uuid_orig in some_uuid_iter() {
612            let orig_str = uuid_orig.urn().to_string();
613            let uuid_out = Uuid::parse_str(&orig_str).unwrap();
614            assert_eq!(uuid_orig, uuid_out);
615        }
616    }
617
618    #[test]
619    fn test_roundtrip_braced() {
620        for uuid_orig in some_uuid_iter() {
621            let orig_str = uuid_orig.braced().to_string();
622            let uuid_out = Uuid::parse_str(&orig_str).unwrap();
623            assert_eq!(uuid_orig, uuid_out);
624        }
625    }
626
627    #[test]
628    fn test_roundtrip_parse_urn() {
629        for uuid_orig in some_uuid_iter() {
630            let orig_str = uuid_orig.urn().to_string();
631            let uuid_out = Uuid::from_bytes(parse_urn(orig_str.as_bytes()).unwrap());
632            assert_eq!(uuid_orig, uuid_out);
633        }
634    }
635
636    #[test]
637    fn test_roundtrip_parse_braced() {
638        for uuid_orig in some_uuid_iter() {
639            let orig_str = uuid_orig.braced().to_string();
640            let uuid_out = Uuid::from_bytes(parse_braced(orig_str.as_bytes()).unwrap());
641            assert_eq!(uuid_orig, uuid_out);
642        }
643    }
644
645    #[test]
646    fn test_try_parse_ascii_non_utf8() {
647        assert!(Uuid::try_parse_ascii(b"67e55044-10b1-426f-9247-bb680e5\0e0c8").is_err());
648    }
649}