bzip2/
mem.rs

1//! Raw low-level manipulations of bz streams.
2
3use std::error;
4use std::fmt;
5use std::marker;
6use std::mem;
7use std::slice;
8
9use core::ffi::{c_int, c_uint};
10
11use crate::{ffi, Compression};
12
13/// Representation of an in-memory compression stream.
14///
15/// An instance of [`Compress`] can be used to compress a stream of bz2 data.
16pub struct Compress {
17    inner: Stream<DirCompress>,
18}
19
20/// Representation of an in-memory decompression stream.
21///
22/// An instance of [`Decompress`] can be used to decompress a stream of bz2-encoded
23/// data.
24pub struct Decompress {
25    inner: Stream<DirDecompress>,
26}
27
28struct Stream<D: Direction> {
29    // libbz2 requires a stable address for this stream.
30    raw: Box<ffi::bz_stream>,
31    _marker: marker::PhantomData<D>,
32}
33
34unsafe impl<D: Direction> Send for Stream<D> {}
35unsafe impl<D: Direction> Sync for Stream<D> {}
36
37trait Direction {
38    unsafe fn destroy(stream: *mut ffi::bz_stream) -> c_int;
39}
40
41enum DirCompress {}
42enum DirDecompress {}
43
44/// Possible actions to take on compression.
45#[derive(PartialEq, Eq, Copy, Debug, Clone)]
46pub enum Action {
47    /// Normal compression.
48    Run = ffi::BZ_RUN as isize,
49    /// Flush any existing output, but do not read any more input
50    Flush = ffi::BZ_FLUSH as isize,
51    /// Request that the compression stream be finalized.
52    Finish = ffi::BZ_FINISH as isize,
53}
54
55/// Result of compression or decompression
56#[derive(PartialEq, Eq, Copy, Debug, Clone)]
57pub enum Status {
58    /// Decompression went fine, nothing much to report.
59    Ok,
60
61    /// The Flush action on a compression went ok.
62    FlushOk,
63
64    /// The Run action on compression went ok.
65    RunOk,
66
67    /// The Finish action on compression went ok.
68    FinishOk,
69
70    /// The stream's end has been met, meaning that no more data can be input.
71    StreamEnd,
72
73    /// There was insufficient memory in the input or output buffer to complete
74    /// the request, but otherwise everything went normally.
75    MemNeeded,
76}
77
78/// Fatal errors encountered when compressing/decompressing bytes.
79///
80/// These errors indicate that progress could not be made in any form due to
81/// input or output parameters.
82#[derive(PartialEq, Eq, Copy, Debug, Clone)]
83pub enum Error {
84    /// The sequence of operations called on a decompression/compression stream
85    /// were invalid. See methods for details.
86    Sequence,
87
88    /// The data being decompressed was invalid, or it was not a valid bz2
89    /// stream.
90    Data,
91
92    /// The magic bz2 header wasn't present when decompressing.
93    DataMagic,
94
95    /// The parameters to this function were invalid.
96    Param,
97}
98
99impl Compress {
100    /// Creates a new stream prepared for compression.
101    ///
102    /// The `work_factor` parameter controls how the compression phase behaves
103    /// when presented with worst case, highly repetitive, input data. If
104    /// compression runs into difficulties caused by repetitive data, the
105    /// library switches from the standard sorting algorithm to a fallback
106    /// algorithm. The fallback is slower than the standard algorithm by perhaps
107    /// a factor of three, but always behaves reasonably, no matter how bad the
108    /// input.
109    ///
110    /// Lower values of `work_factor` reduce the amount of effort the standard
111    /// algorithm will expend before resorting to the fallback. You should set
112    /// this parameter carefully; too low, and many inputs will be handled by
113    /// the fallback algorithm and so compress rather slowly, too high, and your
114    /// average-to-worst case compression times can become very large. The
115    /// default value of 30 gives reasonable behaviour over a wide range of
116    /// circumstances.
117    ///
118    /// Allowable values range from 0 to 250 inclusive. 0 is a special case,
119    /// equivalent to using the default value of 30.
120    pub fn new(lvl: Compression, work_factor: u32) -> Compress {
121        unsafe {
122            let mut raw = Box::new(mem::zeroed());
123            assert_eq!(
124                ffi::BZ2_bzCompressInit(&mut *raw, lvl.level() as c_int, 0, work_factor as c_int),
125                0
126            );
127            Compress {
128                inner: Stream {
129                    raw,
130                    _marker: marker::PhantomData,
131                },
132            }
133        }
134    }
135
136    /// Compress a block of input into a block of output.
137    ///
138    /// If anything other than [`BZ_OK`] is seen, `Err` is returned.
139    ///
140    /// The action given must be one of [`Action::Run`], [`Action::Flush`] or [`Action::Finish`].
141    ///
142    /// [`BZ_OK`]: ffi::BZ_OK
143    pub fn compress(
144        &mut self,
145        input: &[u8],
146        output: &mut [u8],
147        action: Action,
148    ) -> Result<Status, Error> {
149        // apparently 0-length compression requests which don't actually make
150        // any progress are returned as BZ_PARAM_ERROR, which we don't want, to
151        // just translate to a success here.
152        if input.is_empty() && action == Action::Run {
153            return Ok(Status::RunOk);
154        }
155        self.inner.raw.next_in = input.as_ptr() as *mut _;
156        self.inner.raw.avail_in = input.len().min(c_uint::MAX as usize) as c_uint;
157        self.inner.raw.next_out = output.as_mut_ptr() as *mut _;
158        self.inner.raw.avail_out = output.len().min(c_uint::MAX as usize) as c_uint;
159        unsafe {
160            match ffi::BZ2_bzCompress(&mut *self.inner.raw, action as c_int) {
161                ffi::BZ_RUN_OK => Ok(Status::RunOk),
162                ffi::BZ_FLUSH_OK => Ok(Status::FlushOk),
163                ffi::BZ_FINISH_OK => Ok(Status::FinishOk),
164                ffi::BZ_STREAM_END => Ok(Status::StreamEnd),
165                ffi::BZ_SEQUENCE_ERROR => Err(Error::Sequence),
166                c => panic!("unknown return status: {}", c),
167            }
168        }
169    }
170
171    /// Compress a block of input into an output vector.
172    ///
173    /// This function will not grow `output`, but it will fill the space after
174    /// its current length up to its capacity. The length of the vector will be
175    /// adjusted appropriately.
176    pub fn compress_vec(
177        &mut self,
178        input: &[u8],
179        output: &mut Vec<u8>,
180        action: Action,
181    ) -> Result<Status, Error> {
182        let cap = output.capacity();
183        let len = output.len();
184
185        unsafe {
186            let before = self.total_out();
187            let ret = {
188                let ptr = output.as_mut_ptr().add(len);
189                let out = slice::from_raw_parts_mut(ptr, cap - len);
190                self.compress(input, out, action)
191            };
192            output.set_len((self.total_out() - before) as usize + len);
193
194            ret
195        }
196    }
197
198    /// Total number of bytes processed as input
199    pub fn total_in(&self) -> u64 {
200        self.inner.total_in()
201    }
202
203    /// Total number of bytes processed as output
204    pub fn total_out(&self) -> u64 {
205        self.inner.total_out()
206    }
207}
208
209impl Decompress {
210    /// Creates a new stream prepared for decompression.
211    ///
212    /// If `small` is true, then the library will use an alternative
213    /// decompression algorithm which uses less memory but at the cost of
214    /// decompressing more slowly (roughly speaking, half the speed, but the
215    /// maximum memory requirement drops to around 2300k).
216    pub fn new(small: bool) -> Decompress {
217        unsafe {
218            let mut raw = Box::new(mem::zeroed());
219            assert_eq!(ffi::BZ2_bzDecompressInit(&mut *raw, 0, small as c_int), 0);
220            Decompress {
221                inner: Stream {
222                    raw,
223                    _marker: marker::PhantomData,
224                },
225            }
226        }
227    }
228
229    /// Decompress a block of input into a block of output.
230    pub fn decompress(&mut self, input: &[u8], output: &mut [u8]) -> Result<Status, Error> {
231        self.inner.raw.next_in = input.as_ptr() as *mut _;
232        self.inner.raw.avail_in = input.len().min(c_uint::MAX as usize) as c_uint;
233        self.inner.raw.next_out = output.as_mut_ptr() as *mut _;
234        self.inner.raw.avail_out = output.len().min(c_uint::MAX as usize) as c_uint;
235        unsafe {
236            match ffi::BZ2_bzDecompress(&mut *self.inner.raw) {
237                ffi::BZ_OK => Ok(Status::Ok),
238                ffi::BZ_MEM_ERROR => Ok(Status::MemNeeded),
239                ffi::BZ_STREAM_END => Ok(Status::StreamEnd),
240                ffi::BZ_PARAM_ERROR => Err(Error::Param),
241                ffi::BZ_DATA_ERROR => Err(Error::Data),
242                ffi::BZ_DATA_ERROR_MAGIC => Err(Error::DataMagic),
243                ffi::BZ_SEQUENCE_ERROR => Err(Error::Sequence),
244                c => panic!("wut: {}", c),
245            }
246        }
247    }
248
249    /// Decompress a block of input into an output vector.
250    ///
251    /// This function will not grow `output`, but it will fill the space after
252    /// its current length up to its capacity. The length of the vector will be
253    /// adjusted appropriately.
254    pub fn decompress_vec(&mut self, input: &[u8], output: &mut Vec<u8>) -> Result<Status, Error> {
255        let cap = output.capacity();
256        let len = output.len();
257
258        unsafe {
259            let before = self.total_out();
260            let ret = {
261                let ptr = output.as_mut_ptr().add(len);
262                let out = slice::from_raw_parts_mut(ptr, cap - len);
263                self.decompress(input, out)
264            };
265            output.set_len((self.total_out() - before) as usize + len);
266
267            ret
268        }
269    }
270
271    /// Total number of bytes processed as input
272    pub fn total_in(&self) -> u64 {
273        self.inner.total_in()
274    }
275
276    /// Total number of bytes processed as output
277    pub fn total_out(&self) -> u64 {
278        self.inner.total_out()
279    }
280}
281
282impl<D: Direction> Stream<D> {
283    fn total_in(&self) -> u64 {
284        (self.raw.total_in_lo32 as u64) | ((self.raw.total_in_hi32 as u64) << 32)
285    }
286
287    fn total_out(&self) -> u64 {
288        (self.raw.total_out_lo32 as u64) | ((self.raw.total_out_hi32 as u64) << 32)
289    }
290}
291
292impl error::Error for Error {}
293
294impl fmt::Display for Error {
295    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
296        let description = match self {
297            Error::Sequence => "bzip2: sequence of operations invalid",
298            Error::Data => "bzip2: invalid data",
299            Error::DataMagic => "bzip2: bz2 header missing",
300            Error::Param => "bzip2: invalid parameters",
301        };
302        f.write_str(description)
303    }
304}
305
306impl From<Error> for std::io::Error {
307    fn from(data: Error) -> std::io::Error {
308        std::io::Error::new(std::io::ErrorKind::Other, data)
309    }
310}
311
312impl Direction for DirCompress {
313    unsafe fn destroy(stream: *mut ffi::bz_stream) -> c_int {
314        ffi::BZ2_bzCompressEnd(stream)
315    }
316}
317impl Direction for DirDecompress {
318    unsafe fn destroy(stream: *mut ffi::bz_stream) -> c_int {
319        ffi::BZ2_bzDecompressEnd(stream)
320    }
321}
322
323impl<D: Direction> Drop for Stream<D> {
324    fn drop(&mut self) {
325        unsafe {
326            let _ = D::destroy(&mut *self.raw);
327        }
328    }
329}