liblzma/
read.rs

1//! Reader-based compression/decompression streams
2
3use std::io::prelude::*;
4use std::io::{self, BufReader};
5
6use crate::bufread;
7use crate::stream::Stream;
8
9/// A compression stream which wraps an uncompressed stream of data. Compressed
10/// data will be read from the stream.
11pub struct XzEncoder<R: Read> {
12    inner: bufread::XzEncoder<BufReader<R>>,
13}
14
15/// A decompression stream which wraps a compressed stream of data. Decompressed
16/// data will be read from the stream.
17pub struct XzDecoder<R: Read> {
18    inner: bufread::XzDecoder<BufReader<R>>,
19}
20
21impl<R: Read> XzEncoder<R> {
22    /// Create a new compression stream which will compress at the given level
23    /// to read compress output to the give output stream.
24    ///
25    /// The `level` argument here is typically 0-9 with 6 being a good default.
26    #[inline]
27    pub fn new(r: R, level: u32) -> XzEncoder<R> {
28        XzEncoder {
29            inner: bufread::XzEncoder::new(BufReader::new(r), level),
30        }
31    }
32
33    /// Create a new parallel compression stream which will compress at the given level
34    /// to read compress output to the give output stream.
35    ///
36    /// The `level` argument here is typically 0-9 with 6 being a good default.
37    #[cfg(feature = "parallel")]
38    pub fn new_parallel(r: R, level: u32) -> XzEncoder<R> {
39        XzEncoder {
40            inner: bufread::XzEncoder::new_parallel(BufReader::new(r), level),
41        }
42    }
43
44    /// Creates a new encoder with a custom `Stream`.
45    ///
46    /// The `Stream` can be pre-configured for multithreaded encoding, different
47    /// compression options/tuning, etc.
48    #[inline]
49    pub fn new_stream(r: R, stream: Stream) -> XzEncoder<R> {
50        XzEncoder {
51            inner: bufread::XzEncoder::new_stream(BufReader::new(r), stream),
52        }
53    }
54
55    /// Acquires a reference to the underlying stream
56    #[inline]
57    pub fn get_ref(&self) -> &R {
58        self.inner.get_ref().get_ref()
59    }
60
61    /// Acquires a mutable reference to the underlying stream
62    ///
63    /// Note that mutation of the stream may result in surprising results if
64    /// this encoder is continued to be used.
65    #[inline]
66    pub fn get_mut(&mut self) -> &mut R {
67        self.inner.get_mut().get_mut()
68    }
69
70    /// Unwrap the underlying writer, finishing the compression stream.
71    #[inline]
72    pub fn into_inner(self) -> R {
73        self.inner.into_inner().into_inner()
74    }
75
76    /// Returns the number of bytes produced by the compressor
77    /// (e.g. the number of bytes read from this stream)
78    ///
79    /// Note that, due to buffering, this only bears any relation to
80    /// total_in() when the compressor chooses to flush its data
81    /// (unfortunately, this won't happen this won't happen in general
82    /// at the end of the stream, because the compressor doesn't know
83    /// if there's more data to come).  At that point,
84    /// `total_out() / total_in()` would be the compression ratio.
85    #[inline]
86    pub fn total_out(&self) -> u64 {
87        self.inner.total_out()
88    }
89
90    /// Returns the number of bytes consumed by the compressor
91    /// (e.g. the number of bytes read from the underlying stream)
92    #[inline]
93    pub fn total_in(&self) -> u64 {
94        self.inner.total_in()
95    }
96}
97
98impl<R: Read> Read for XzEncoder<R> {
99    #[inline]
100    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
101        self.inner.read(buf)
102    }
103}
104
105impl<W: Write + Read> Write for XzEncoder<W> {
106    #[inline]
107    fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
108        self.get_mut().write(buf)
109    }
110
111    #[inline]
112    fn flush(&mut self) -> io::Result<()> {
113        self.get_mut().flush()
114    }
115}
116
117impl<R: Read> XzDecoder<R> {
118    /// Create a new decompression stream, which will read compressed
119    /// data from the given input stream, and decompress one xz stream.
120    /// It may also consume input data that follows the xz stream.
121    /// Use [`bufread::XzDecoder`] instead to
122    /// process a mix of xz and non-xz data.
123    #[inline]
124    pub fn new(r: R) -> XzDecoder<R> {
125        XzDecoder {
126            inner: bufread::XzDecoder::new(BufReader::new(r)),
127        }
128    }
129
130    /// Create a new parallel decompression stream, which will read compressed
131    /// data from the given input stream, and decompress one xz stream.
132    /// It may also consume input data that follows the xz stream.
133    /// Use [`bufread::XzDecoder`] instead to process a mix of xz and non-xz data.
134    #[cfg(feature = "parallel")]
135    #[inline]
136    pub fn new_parallel(r: R) -> XzDecoder<R> {
137        XzDecoder {
138            inner: bufread::XzDecoder::new_parallel(BufReader::new(r)),
139        }
140    }
141
142    /// Create a new decompression stream, which will read compressed
143    /// data from the given input and decompress all the xz stream it contains.
144    #[inline]
145    pub fn new_multi_decoder(r: R) -> XzDecoder<R> {
146        XzDecoder {
147            inner: bufread::XzDecoder::new_multi_decoder(BufReader::new(r)),
148        }
149    }
150
151    /// Creates a new decoder with a custom `Stream`.
152    ///
153    /// The `Stream` can be pre-configured for various checks, different
154    /// decompression options/tuning, etc.
155    #[inline]
156    pub fn new_stream(r: R, stream: Stream) -> XzDecoder<R> {
157        XzDecoder {
158            inner: bufread::XzDecoder::new_stream(BufReader::new(r), stream),
159        }
160    }
161
162    /// Acquires a reference to the underlying stream
163    #[inline]
164    pub fn get_ref(&self) -> &R {
165        self.inner.get_ref().get_ref()
166    }
167
168    /// Acquires a mutable reference to the underlying stream
169    ///
170    /// Note that mutation of the stream may result in surprising results if
171    /// this encoder is continued to be used.
172    #[inline]
173    pub fn get_mut(&mut self) -> &mut R {
174        self.inner.get_mut().get_mut()
175    }
176
177    /// Unwrap the underlying writer, finishing the compression stream.
178    #[inline]
179    pub fn into_inner(self) -> R {
180        self.inner.into_inner().into_inner()
181    }
182
183    /// Returns the number of bytes produced by the decompressor
184    /// (e.g. the number of bytes read from this stream)
185    ///
186    /// Note that, due to buffering, this only bears any relation to
187    /// total_in() when the decompressor reaches a sync point
188    /// (e.g. where the original compressed stream was flushed).
189    /// At that point, `total_in() / total_out()` is the compression ratio.
190    #[inline]
191    pub fn total_out(&self) -> u64 {
192        self.inner.total_out()
193    }
194
195    /// Returns the number of bytes consumed by the decompressor
196    /// (e.g. the number of bytes read from the underlying stream)
197    #[inline]
198    pub fn total_in(&self) -> u64 {
199        self.inner.total_in()
200    }
201}
202
203impl<R: Read> Read for XzDecoder<R> {
204    #[inline]
205    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
206        self.inner.read(buf)
207    }
208}
209
210impl<W: Write + Read> Write for XzDecoder<W> {
211    #[inline]
212    fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
213        self.get_mut().write(buf)
214    }
215
216    #[inline]
217    fn flush(&mut self) -> io::Result<()> {
218        self.get_mut().flush()
219    }
220}
221
222#[cfg(test)]
223mod tests {
224    use super::*;
225    use crate::stream::LzmaOptions;
226    use quickcheck::quickcheck;
227    use rand::{thread_rng, Rng};
228    use std::iter;
229    #[cfg(all(target_family = "wasm", target_os = "unknown"))]
230    use wasm_bindgen_test::wasm_bindgen_test as test;
231
232    #[test]
233    fn smoke() {
234        let m: &[u8] = &[1, 2, 3, 4, 5, 6, 7, 8];
235        let mut c = XzEncoder::new(m, 6);
236        let mut data = vec![];
237        c.read_to_end(&mut data).unwrap();
238        let mut d = XzDecoder::new(&data[..]);
239        let mut data2 = Vec::new();
240        d.read_to_end(&mut data2).unwrap();
241        assert_eq!(data2, m);
242    }
243
244    #[test]
245    fn smoke2() {
246        let m: &[u8] = &[1, 2, 3, 4, 5, 6, 7, 8];
247        let c = XzEncoder::new(m, 6);
248        let mut d = XzDecoder::new(c);
249        let mut data = vec![];
250        d.read_to_end(&mut data).unwrap();
251        assert_eq!(data, [1, 2, 3, 4, 5, 6, 7, 8]);
252    }
253
254    #[test]
255    fn smoke3() {
256        let m = vec![3u8; 128 * 1024 + 1];
257        let c = XzEncoder::new(&m[..], 6);
258        let mut d = XzDecoder::new(c);
259        let mut data = vec![];
260        d.read_to_end(&mut data).unwrap();
261        assert_eq!(data, &m[..]);
262    }
263
264    #[test]
265    fn self_terminating() {
266        let m = vec![3u8; 128 * 1024 + 1];
267        let mut c = XzEncoder::new(&m[..], 6);
268
269        let mut result = Vec::new();
270        c.read_to_end(&mut result).unwrap();
271
272        let mut rng = thread_rng();
273        let v = iter::repeat_with(|| rng.gen::<u8>())
274            .take(1024)
275            .collect::<Vec<_>>();
276        for _ in 0..200 {
277            result.extend(v.iter().map(|x| *x));
278        }
279
280        let mut d = XzDecoder::new(&result[..]);
281        let mut data = Vec::with_capacity(m.len());
282        unsafe {
283            data.set_len(m.len());
284        }
285        assert_eq!(d.read(&mut data).unwrap(), m.len());
286        assert_eq!(data, &m[..]);
287    }
288
289    #[test]
290    fn zero_length_read_at_eof() {
291        let m = Vec::new();
292        let mut c = XzEncoder::new(&m[..], 6);
293
294        let mut result = Vec::new();
295        c.read_to_end(&mut result).unwrap();
296
297        let mut d = XzDecoder::new(&result[..]);
298        let mut data = Vec::new();
299        assert_eq!(d.read(&mut data).unwrap(), 0);
300    }
301
302    #[test]
303    fn zero_length_read_with_data() {
304        let m = vec![3u8; 128 * 1024 + 1];
305        let mut c = XzEncoder::new(&m[..], 6);
306
307        let mut result = Vec::new();
308        c.read_to_end(&mut result).unwrap();
309
310        let mut d = XzDecoder::new(&result[..]);
311        let mut data = Vec::new();
312        assert_eq!(d.read(&mut data).unwrap(), 0);
313    }
314
315    #[test]
316    fn qc_lzma1() {
317        quickcheck(test as fn(_) -> _);
318        fn test(v: Vec<u8>) -> bool {
319            let options = LzmaOptions::new_preset(6).unwrap();
320            let stream = Stream::new_lzma_encoder(&options).unwrap();
321            let r = XzEncoder::new_stream(&v[..], stream);
322            let stream = Stream::new_lzma_decoder(u64::MAX).unwrap();
323            let mut r = XzDecoder::new_stream(r, stream);
324            let mut v2 = Vec::new();
325            r.read_to_end(&mut v2).unwrap();
326            v == v2
327        }
328    }
329
330    #[test]
331    fn qc() {
332        quickcheck(test as fn(_) -> _);
333
334        fn test(v: Vec<u8>) -> bool {
335            let r = XzEncoder::new(&v[..], 6);
336            let mut r = XzDecoder::new(r);
337            let mut v2 = Vec::new();
338            r.read_to_end(&mut v2).unwrap();
339            v == v2
340        }
341    }
342
343    #[cfg(feature = "parallel")]
344    #[test]
345    fn qc_parallel_encode() {
346        quickcheck(test as fn(_) -> _);
347
348        fn test(v: Vec<u8>) -> bool {
349            let r = XzEncoder::new_parallel(&v[..], 6);
350            let mut r = XzDecoder::new(r);
351            let mut v2 = Vec::new();
352            r.read_to_end(&mut v2).unwrap();
353            v == v2
354        }
355    }
356
357    #[cfg(feature = "parallel")]
358    #[test]
359    fn qc_parallel_decode() {
360        quickcheck(test as fn(_) -> _);
361
362        fn test(v: Vec<u8>) -> bool {
363            let r = XzEncoder::new(&v[..], 6);
364            let mut r = XzDecoder::new_parallel(r);
365            let mut v2 = Vec::new();
366            r.read_to_end(&mut v2).unwrap();
367            v == v2
368        }
369    }
370
371    #[test]
372    fn two_streams() {
373        let mut input_stream1: Vec<u8> = Vec::new();
374        let mut input_stream2: Vec<u8> = Vec::new();
375        let mut all_input: Vec<u8> = Vec::new();
376
377        // Generate input data.
378        const STREAM1_SIZE: usize = 1024;
379        for num in 0..STREAM1_SIZE {
380            input_stream1.push(num as u8)
381        }
382        const STREAM2_SIZE: usize = 532;
383        for num in 0..STREAM2_SIZE {
384            input_stream2.push((num + 32) as u8)
385        }
386        all_input.extend(&input_stream1);
387        all_input.extend(&input_stream2);
388
389        // Make a vector with compressed data
390        let mut decoder_input = Vec::new();
391        {
392            let mut encoder = XzEncoder::new(&input_stream1[..], 6);
393            encoder.read_to_end(&mut decoder_input).unwrap();
394        }
395        {
396            let mut encoder = XzEncoder::new(&input_stream2[..], 6);
397            encoder.read_to_end(&mut decoder_input).unwrap();
398        }
399
400        // Decoder must be able to read the 2 concatenated xz streams and get the same data as input.
401        let mut decoder_reader = &decoder_input[..];
402        {
403            // using `XzDecoder::new` here would fail because only 1 xz stream would be processed.
404            let mut decoder = XzDecoder::new_multi_decoder(&mut decoder_reader);
405            let mut decompressed_data = vec![0u8; all_input.len()];
406
407            assert_eq!(
408                decoder.read(&mut decompressed_data).unwrap(),
409                all_input.len()
410            );
411            assert_eq!(decompressed_data, &all_input[..]);
412        }
413    }
414}