bzip2/
read.rs

1//! Reader-based compression/decompression streams
2
3use std::io::prelude::*;
4use std::io::{self, BufReader};
5
6use crate::bufread;
7use crate::Compression;
8
9/// A compression stream which wraps an uncompressed stream of data. Compressed
10/// data will be read from the stream.
11pub struct BzEncoder<R> {
12    inner: bufread::BzEncoder<BufReader<R>>,
13}
14
15/// A decompression stream which wraps a compressed stream of data. Decompressed
16/// data will be read from the stream.
17pub struct BzDecoder<R> {
18    inner: bufread::BzDecoder<BufReader<R>>,
19}
20
21impl<R: Read> BzEncoder<R> {
22    /// Create a new compression stream which will compress at the given level
23    /// to read compress output to the give output stream.
24    pub fn new(r: R, level: Compression) -> BzEncoder<R> {
25        BzEncoder {
26            inner: bufread::BzEncoder::new(BufReader::new(r), level),
27        }
28    }
29
30    /// Acquires a reference to the underlying stream
31    pub fn get_ref(&self) -> &R {
32        self.inner.get_ref().get_ref()
33    }
34
35    /// Acquires a mutable reference to the underlying stream
36    ///
37    /// Note that mutation of the stream may result in surprising results if
38    /// this encoder is continued to be used.
39    pub fn get_mut(&mut self) -> &mut R {
40        self.inner.get_mut().get_mut()
41    }
42
43    /// Unwrap the underlying writer, finishing the compression stream.
44    pub fn into_inner(self) -> R {
45        self.inner.into_inner().into_inner()
46    }
47
48    /// Returns the number of bytes produced by the compressor
49    /// (e.g. the number of bytes read from this stream)
50    ///
51    /// Note that, due to buffering, this only bears any relation to
52    /// total_in() when the compressor chooses to flush its data
53    /// (unfortunately, this won't happen in general
54    /// at the end of the stream, because the compressor doesn't know
55    /// if there's more data to come).  At that point,
56    /// `total_out() / total_in()` would be the compression ratio.
57    pub fn total_out(&self) -> u64 {
58        self.inner.total_out()
59    }
60
61    /// Returns the number of bytes consumed by the compressor
62    /// (e.g. the number of bytes read from the underlying stream)
63    pub fn total_in(&self) -> u64 {
64        self.inner.total_in()
65    }
66}
67
68impl<R: Read> Read for BzEncoder<R> {
69    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
70        self.inner.read(buf)
71    }
72}
73
74impl<W: Write + Read> Write for BzEncoder<W> {
75    fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
76        self.get_mut().write(buf)
77    }
78
79    fn flush(&mut self) -> io::Result<()> {
80        self.get_mut().flush()
81    }
82}
83
84impl<R: Read> BzDecoder<R> {
85    /// Create a new decompression stream, which will read compressed
86    /// data from the given input stream and decompress it.
87    pub fn new(r: R) -> BzDecoder<R> {
88        BzDecoder {
89            inner: bufread::BzDecoder::new(BufReader::new(r)),
90        }
91    }
92
93    /// Acquires a reference to the underlying stream
94    pub fn get_ref(&self) -> &R {
95        self.inner.get_ref().get_ref()
96    }
97
98    /// Acquires a mutable reference to the underlying stream
99    ///
100    /// Note that mutation of the stream may result in surprising results if
101    /// this encoder is continued to be used.
102    pub fn get_mut(&mut self) -> &mut R {
103        self.inner.get_mut().get_mut()
104    }
105
106    /// Unwrap the underlying writer, finishing the compression stream.
107    pub fn into_inner(self) -> R {
108        self.inner.into_inner().into_inner()
109    }
110
111    /// Returns the number of bytes produced by the decompressor
112    /// (e.g. the number of bytes read from this stream)
113    ///
114    /// Note that, due to buffering, this only bears any relation to
115    /// total_in() when the decompressor reaches a sync point
116    /// (e.g. where the original compressed stream was flushed).
117    /// At that point, `total_in() / total_out()` is the compression ratio.
118    pub fn total_out(&self) -> u64 {
119        self.inner.total_out()
120    }
121
122    /// Returns the number of bytes consumed by the decompressor
123    /// (e.g. the number of bytes read from the underlying stream)
124    pub fn total_in(&self) -> u64 {
125        self.inner.total_in()
126    }
127}
128
129impl<R: Read> Read for BzDecoder<R> {
130    fn read(&mut self, into: &mut [u8]) -> io::Result<usize> {
131        self.inner.read(into)
132    }
133}
134
135impl<W: Write + Read> Write for BzDecoder<W> {
136    fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
137        self.get_mut().write(buf)
138    }
139
140    fn flush(&mut self) -> io::Result<()> {
141        self.get_mut().flush()
142    }
143}
144
145/// A bzip2 streaming decoder that decodes all members of a multistream
146///
147/// Wikipedia, particularly, uses bzip2 multistream for their dumps.
148pub struct MultiBzDecoder<R> {
149    inner: bufread::MultiBzDecoder<BufReader<R>>,
150}
151
152impl<R: Read> MultiBzDecoder<R> {
153    /// Creates a new decoder from the given reader, immediately parsing the
154    /// (first) gzip header. If the gzip stream contains multiple members all will
155    /// be decoded.
156    pub fn new(r: R) -> MultiBzDecoder<R> {
157        MultiBzDecoder {
158            inner: bufread::MultiBzDecoder::new(BufReader::new(r)),
159        }
160    }
161}
162
163impl<R> MultiBzDecoder<R> {
164    /// Acquires a reference to the underlying reader.
165    pub fn get_ref(&self) -> &R {
166        self.inner.get_ref().get_ref()
167    }
168
169    /// Acquires a mutable reference to the underlying stream.
170    ///
171    /// Note that mutation of the stream may result in surprising results if
172    /// this encoder is continued to be used.
173    pub fn get_mut(&mut self) -> &mut R {
174        self.inner.get_mut().get_mut()
175    }
176
177    /// Consumes this decoder, returning the underlying reader.
178    pub fn into_inner(self) -> R {
179        self.inner.into_inner().into_inner()
180    }
181}
182
183impl<R: Read> Read for MultiBzDecoder<R> {
184    fn read(&mut self, into: &mut [u8]) -> io::Result<usize> {
185        self.inner.read(into)
186    }
187}
188
189#[cfg(test)]
190mod tests {
191    use crate::read::{BzDecoder, BzEncoder, MultiBzDecoder};
192    use crate::Compression;
193    use partial_io::quickcheck_types::{GenInterrupted, PartialWithErrors};
194    use partial_io::PartialRead;
195    use rand::distr::StandardUniform;
196    use rand::{rng, Rng};
197    use std::io::Read;
198
199    #[test]
200    fn smoke() {
201        let m: &[u8] = &[1, 2, 3, 4, 5, 6, 7, 8];
202        let mut c = BzEncoder::new(m, Compression::default());
203        let mut data = vec![];
204        c.read_to_end(&mut data).unwrap();
205        let mut d = BzDecoder::new(&data[..]);
206        let mut data2 = Vec::new();
207        d.read_to_end(&mut data2).unwrap();
208        assert_eq!(data2, m);
209    }
210
211    #[test]
212    fn smoke2() {
213        let m: &[u8] = &[1, 2, 3, 4, 5, 6, 7, 8];
214        let c = BzEncoder::new(m, Compression::default());
215        let mut d = BzDecoder::new(c);
216        let mut data = vec![];
217        d.read_to_end(&mut data).unwrap();
218        assert_eq!(data, [1, 2, 3, 4, 5, 6, 7, 8]);
219    }
220
221    #[test]
222    fn smoke3() {
223        let m = vec![3u8; 128 * 1024 + 1];
224        let c = BzEncoder::new(&m[..], Compression::default());
225        let mut d = BzDecoder::new(c);
226        let mut data = vec![];
227        d.read_to_end(&mut data).unwrap();
228        assert!(data == m[..]);
229    }
230
231    #[test]
232    fn self_terminating() {
233        let m = vec![3u8; 128 * 1024 + 1];
234        let mut c = BzEncoder::new(&m[..], Compression::default());
235
236        let mut result = Vec::new();
237        c.read_to_end(&mut result).unwrap();
238
239        let v = rng()
240            .sample_iter(&StandardUniform)
241            .take(1024)
242            .collect::<Vec<u8>>();
243        for _ in 0..200 {
244            result.extend(v.iter().copied());
245        }
246
247        let mut d = BzDecoder::new(&result[..]);
248        let mut data = vec![0; m.len()];
249        assert!(d.read(&mut data).unwrap() == m.len());
250        assert!(data == m[..]);
251    }
252
253    #[test]
254    fn zero_length_read_at_eof() {
255        let m = Vec::new();
256        let mut c = BzEncoder::new(&m[..], Compression::default());
257
258        let mut result = Vec::new();
259        c.read_to_end(&mut result).unwrap();
260
261        let mut d = BzDecoder::new(&result[..]);
262        let mut data = Vec::new();
263        assert!(d.read(&mut data).unwrap() == 0);
264    }
265
266    #[test]
267    fn zero_length_read_with_data() {
268        let m = vec![3u8; 128 * 1024 + 1];
269        let mut c = BzEncoder::new(&m[..], Compression::default());
270
271        let mut result = Vec::new();
272        c.read_to_end(&mut result).unwrap();
273
274        let mut d = BzDecoder::new(&result[..]);
275        let mut data = Vec::new();
276        assert!(d.read(&mut data).unwrap() == 0);
277    }
278
279    #[test]
280    fn multistream_read_till_eof() {
281        let m = vec![3u8; 128 * 1024 + 1];
282        let repeat = 3;
283        let mut result = Vec::new();
284
285        for _i in 0..repeat {
286            let mut c = BzEncoder::new(&m[..], Compression::default());
287            c.read_to_end(&mut result).unwrap();
288        }
289
290        let mut d = MultiBzDecoder::new(&result[..]);
291        let mut data = Vec::new();
292
293        let a = d.read_to_end(&mut data).unwrap();
294        let b = m.len() * repeat;
295        assert!(a == b, "{} {}", a, b);
296    }
297
298    #[test]
299    fn empty() {
300        let r = BzEncoder::new(&[][..], Compression::default());
301        let mut r = BzDecoder::new(r);
302        let mut v2 = Vec::new();
303        r.read_to_end(&mut v2).unwrap();
304        assert!(v2.is_empty());
305    }
306
307    #[test]
308    fn qc() {
309        ::quickcheck::quickcheck(test as fn(_) -> _);
310
311        fn test(v: Vec<u8>) -> bool {
312            let r = BzEncoder::new(&v[..], Compression::default());
313            let mut r = BzDecoder::new(r);
314            let mut v2 = Vec::new();
315            r.read_to_end(&mut v2).unwrap();
316            v == v2
317        }
318    }
319
320    #[test]
321    fn qc_partial() {
322        ::quickcheck::quickcheck(test as fn(_, _, _) -> _);
323
324        fn test(
325            v: Vec<u8>,
326            encode_ops: PartialWithErrors<GenInterrupted>,
327            decode_ops: PartialWithErrors<GenInterrupted>,
328        ) -> bool {
329            let r = BzEncoder::new(PartialRead::new(&v[..], encode_ops), Compression::default());
330            let mut r = BzDecoder::new(PartialRead::new(r, decode_ops));
331            let mut v2 = Vec::new();
332            r.read_to_end(&mut v2).unwrap();
333            v == v2
334        }
335    }
336}