bzip2/
mem.rs

1//! Raw low-level manipulations of bz streams.
2
3use std::error;
4use std::fmt;
5use std::marker;
6use std::mem;
7
8use core::ffi::{c_int, c_uint};
9
10use crate::{ffi, Compression};
11
12/// Representation of an in-memory compression stream.
13///
14/// An instance of [`Compress`] can be used to compress a stream of bz2 data.
15pub struct Compress {
16    inner: Stream<DirCompress>,
17}
18
19/// Representation of an in-memory decompression stream.
20///
21/// An instance of [`Decompress`] can be used to decompress a stream of bz2-encoded
22/// data.
23pub struct Decompress {
24    inner: Stream<DirDecompress>,
25}
26
27struct Stream<D: Direction> {
28    // libbz2 requires a stable address for this stream.
29    raw: Box<ffi::bz_stream>,
30    _marker: marker::PhantomData<D>,
31}
32
33unsafe impl<D: Direction> Send for Stream<D> {}
34unsafe impl<D: Direction> Sync for Stream<D> {}
35
36trait Direction {
37    unsafe fn destroy(stream: *mut ffi::bz_stream) -> c_int;
38}
39
40enum DirCompress {}
41enum DirDecompress {}
42
43/// Possible actions to take on compression.
44#[derive(PartialEq, Eq, Copy, Debug, Clone)]
45pub enum Action {
46    /// Normal compression.
47    Run = ffi::BZ_RUN as isize,
48    /// Flush any existing output, but do not read any more input
49    Flush = ffi::BZ_FLUSH as isize,
50    /// Request that the compression stream be finalized.
51    Finish = ffi::BZ_FINISH as isize,
52}
53
54/// Result of compression or decompression
55#[derive(PartialEq, Eq, Copy, Debug, Clone)]
56pub enum Status {
57    /// Decompression went fine, nothing much to report.
58    Ok,
59
60    /// The Flush action on a compression went ok.
61    FlushOk,
62
63    /// The Run action on compression went ok.
64    RunOk,
65
66    /// The Finish action on compression went ok.
67    FinishOk,
68
69    /// The stream's end has been met, meaning that no more data can be input.
70    StreamEnd,
71
72    /// There was insufficient memory in the input or output buffer to complete
73    /// the request, but otherwise everything went normally.
74    MemNeeded,
75}
76
77/// Fatal errors encountered when compressing/decompressing bytes.
78///
79/// These errors indicate that progress could not be made in any form due to
80/// input or output parameters.
81#[derive(PartialEq, Eq, Copy, Debug, Clone)]
82pub enum Error {
83    /// The sequence of operations called on a decompression/compression stream
84    /// were invalid. See methods for details.
85    Sequence,
86
87    /// The data being decompressed was invalid, or it was not a valid bz2
88    /// stream.
89    Data,
90
91    /// The magic bz2 header wasn't present when decompressing.
92    DataMagic,
93
94    /// The parameters to this function were invalid.
95    Param,
96}
97
98impl Compress {
99    /// Creates a new stream prepared for compression.
100    ///
101    /// The `work_factor` parameter controls how the compression phase behaves
102    /// when presented with worst case, highly repetitive, input data. If
103    /// compression runs into difficulties caused by repetitive data, the
104    /// library switches from the standard sorting algorithm to a fallback
105    /// algorithm. The fallback is slower than the standard algorithm by perhaps
106    /// a factor of three, but always behaves reasonably, no matter how bad the
107    /// input.
108    ///
109    /// Lower values of `work_factor` reduce the amount of effort the standard
110    /// algorithm will expend before resorting to the fallback. You should set
111    /// this parameter carefully; too low, and many inputs will be handled by
112    /// the fallback algorithm and so compress rather slowly, too high, and your
113    /// average-to-worst case compression times can become very large. The
114    /// default value of 30 gives reasonable behaviour over a wide range of
115    /// circumstances.
116    ///
117    /// Allowable values range from 0 to 250 inclusive. 0 is a special case,
118    /// equivalent to using the default value of 30.
119    pub fn new(lvl: Compression, work_factor: u32) -> Self {
120        unsafe {
121            let mut raw = Box::new(mem::zeroed());
122            assert_eq!(
123                ffi::BZ2_bzCompressInit(&mut *raw, lvl.level() as c_int, 0, work_factor as c_int),
124                0
125            );
126            Self {
127                inner: Stream {
128                    raw,
129                    _marker: marker::PhantomData,
130                },
131            }
132        }
133    }
134
135    unsafe fn compress_inner(
136        &mut self,
137        input: &[u8],
138        output_ptr: *mut u8,
139        output_len: usize,
140        action: Action,
141    ) -> Result<Status, Error> {
142        // apparently 0-length compression requests which don't actually make
143        // any progress are returned as BZ_PARAM_ERROR, which we don't want, to
144        // just translate to a success here.
145        if input.is_empty() && action == Action::Run {
146            return Ok(Status::RunOk);
147        }
148        self.inner.raw.next_in = input.as_ptr() as *mut _;
149        self.inner.raw.avail_in = input.len().min(c_uint::MAX as usize) as c_uint;
150        self.inner.raw.next_out = output_ptr as *mut _;
151        self.inner.raw.avail_out = output_len.min(c_uint::MAX as usize) as c_uint;
152        unsafe {
153            match ffi::BZ2_bzCompress(&mut *self.inner.raw, action as c_int) {
154                ffi::BZ_RUN_OK => Ok(Status::RunOk),
155                ffi::BZ_FLUSH_OK => Ok(Status::FlushOk),
156                ffi::BZ_FINISH_OK => Ok(Status::FinishOk),
157                ffi::BZ_STREAM_END => Ok(Status::StreamEnd),
158                ffi::BZ_SEQUENCE_ERROR => Err(Error::Sequence),
159                c => panic!("unknown return status: {c}"),
160            }
161        }
162    }
163
164    /// Compress a block of input into a block of output.
165    ///
166    /// If anything other than [`BZ_OK`] is seen, `Err` is returned.
167    ///
168    /// The action given must be one of [`Action::Run`], [`Action::Flush`] or [`Action::Finish`].
169    ///
170    /// [`BZ_OK`]: ffi::BZ_OK
171    pub fn compress(
172        &mut self,
173        input: &[u8],
174        output: &mut [u8],
175        action: Action,
176    ) -> Result<Status, Error> {
177        unsafe { self.compress_inner(input, output.as_mut_ptr(), output.len(), action) }
178    }
179
180    /// Same as [`Self::compress`] but accepts an uninitialised `output` buffer.
181    pub fn compress_uninit(
182        &mut self,
183        input: &[u8],
184        output: &mut [mem::MaybeUninit<u8>],
185        action: Action,
186    ) -> Result<Status, Error> {
187        unsafe { self.compress_inner(input, output.as_mut_ptr() as *mut _, output.len(), action) }
188    }
189
190    /// Compress a block of input into an output vector.
191    ///
192    /// This function will not grow `output`, but it will fill the space after
193    /// its current length up to its capacity. The length of the vector will be
194    /// adjusted appropriately.
195    pub fn compress_vec(
196        &mut self,
197        input: &[u8],
198        output: &mut Vec<u8>,
199        action: Action,
200    ) -> Result<Status, Error> {
201        let len = output.len();
202
203        unsafe {
204            let before = self.total_out();
205            let ret = self.compress_uninit(input, output.spare_capacity_mut(), action);
206            output.set_len((self.total_out() - before) as usize + len);
207
208            ret
209        }
210    }
211
212    /// Total number of bytes processed as input
213    pub fn total_in(&self) -> u64 {
214        self.inner.total_in()
215    }
216
217    /// Total number of bytes processed as output
218    pub fn total_out(&self) -> u64 {
219        self.inner.total_out()
220    }
221}
222
223impl Decompress {
224    /// Creates a new stream prepared for decompression.
225    ///
226    /// If `small` is true, then the library will use an alternative
227    /// decompression algorithm which uses less memory but at the cost of
228    /// decompressing more slowly (roughly speaking, half the speed, but the
229    /// maximum memory requirement drops to around 2300k).
230    pub fn new(small: bool) -> Self {
231        unsafe {
232            let mut raw = Box::new(mem::zeroed());
233            assert_eq!(ffi::BZ2_bzDecompressInit(&mut *raw, 0, small as c_int), 0);
234            Self {
235                inner: Stream {
236                    raw,
237                    _marker: marker::PhantomData,
238                },
239            }
240        }
241    }
242
243    unsafe fn decompress_inner(
244        &mut self,
245        input: &[u8],
246        output_ptr: *mut u8,
247        output_len: usize,
248    ) -> Result<Status, Error> {
249        self.inner.raw.next_in = input.as_ptr() as *mut _;
250        self.inner.raw.avail_in = input.len().min(c_uint::MAX as usize) as c_uint;
251        self.inner.raw.next_out = output_ptr as *mut _;
252        self.inner.raw.avail_out = output_len.min(c_uint::MAX as usize) as c_uint;
253        unsafe {
254            match ffi::BZ2_bzDecompress(&mut *self.inner.raw) {
255                ffi::BZ_OK => Ok(Status::Ok),
256                ffi::BZ_MEM_ERROR => Ok(Status::MemNeeded),
257                ffi::BZ_STREAM_END => Ok(Status::StreamEnd),
258                ffi::BZ_PARAM_ERROR => Err(Error::Param),
259                ffi::BZ_DATA_ERROR => Err(Error::Data),
260                ffi::BZ_DATA_ERROR_MAGIC => Err(Error::DataMagic),
261                ffi::BZ_SEQUENCE_ERROR => Err(Error::Sequence),
262                c => panic!("wut: {c}"),
263            }
264        }
265    }
266
267    /// Decompress a block of input into a block of output.
268    pub fn decompress(&mut self, input: &[u8], output: &mut [u8]) -> Result<Status, Error> {
269        unsafe { self.decompress_inner(input, output.as_mut_ptr(), output.len()) }
270    }
271
272    /// Same as [`Self::decompress`] but accepts an uninitialized buffer.
273    pub fn decompress_uninit(
274        &mut self,
275        input: &[u8],
276        output: &mut [mem::MaybeUninit<u8>],
277    ) -> Result<Status, Error> {
278        unsafe { self.decompress_inner(input, output.as_mut_ptr() as *mut _, output.len()) }
279    }
280
281    /// Decompress a block of input into an output vector.
282    ///
283    /// This function will not grow `output`, but it will fill the space after
284    /// its current length up to its capacity. The length of the vector will be
285    /// adjusted appropriately.
286    pub fn decompress_vec(&mut self, input: &[u8], output: &mut Vec<u8>) -> Result<Status, Error> {
287        let len = output.len();
288
289        unsafe {
290            let before = self.total_out();
291            let ret = self.decompress_uninit(input, output.spare_capacity_mut());
292            output.set_len((self.total_out() - before) as usize + len);
293
294            ret
295        }
296    }
297
298    /// Total number of bytes processed as input
299    pub fn total_in(&self) -> u64 {
300        self.inner.total_in()
301    }
302
303    /// Total number of bytes processed as output
304    pub fn total_out(&self) -> u64 {
305        self.inner.total_out()
306    }
307}
308
309impl<D: Direction> Stream<D> {
310    fn total_in(&self) -> u64 {
311        (self.raw.total_in_lo32 as u64) | ((self.raw.total_in_hi32 as u64) << 32)
312    }
313
314    fn total_out(&self) -> u64 {
315        (self.raw.total_out_lo32 as u64) | ((self.raw.total_out_hi32 as u64) << 32)
316    }
317}
318
319impl error::Error for Error {}
320
321impl fmt::Display for Error {
322    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
323        let description = match self {
324            Self::Sequence => "bzip2: sequence of operations invalid",
325            Self::Data => "bzip2: invalid data",
326            Self::DataMagic => "bzip2: bz2 header missing",
327            Self::Param => "bzip2: invalid parameters",
328        };
329        f.write_str(description)
330    }
331}
332
333impl From<Error> for std::io::Error {
334    fn from(data: Error) -> Self {
335        Self::other(data)
336    }
337}
338
339impl Direction for DirCompress {
340    unsafe fn destroy(stream: *mut ffi::bz_stream) -> c_int {
341        ffi::BZ2_bzCompressEnd(stream)
342    }
343}
344impl Direction for DirDecompress {
345    unsafe fn destroy(stream: *mut ffi::bz_stream) -> c_int {
346        ffi::BZ2_bzDecompressEnd(stream)
347    }
348}
349
350impl<D: Direction> Drop for Stream<D> {
351    fn drop(&mut self) {
352        unsafe {
353            let _ = D::destroy(&mut *self.raw);
354        }
355    }
356}