miniz_oxide/deflate/
stored.rs

1use crate::deflate::core::{
2    flush_block, CallbackOxide, CompressorOxide, TDEFLFlush, TDEFLStatus, LZ_DICT_SIZE,
3    LZ_DICT_SIZE_MASK, MAX_MATCH_LEN, MIN_MATCH_LEN,
4};
5use core::cmp;
6
7/// Compression function for stored blocks, split out from the main compression function.
8pub(crate) fn compress_stored(d: &mut CompressorOxide, callback: &mut CallbackOxide) -> bool {
9    let in_buf = match callback.buf() {
10        None => return true,
11        Some(in_buf) => in_buf,
12    };
13
14    // Right now there isn't any code for re-adding previous data to the hash chain if compression is switched
15    // from stored mode to level 1 or higher after first starting compression at level 0
16    // which causes a slight deviation from oroginal miniz behaviour but much faster
17    // stored compression since original miniz does this by being super inefficient
18    // and adds data to hash table when doing stored and rle compression.
19    // Ideally we would handle it like zlib which keeps track of previous data and
20    // only adds to hash table when switching over to actual compression but that
21    // would need a bunch of more work.
22
23    // Make sure this is cleared in case compression level is switched later.
24    // TODO: It's possible we don't need this or could do this elsewhere later
25    // but just do this here to avoid causing issues for now.
26    d.params.saved_match_len = 0;
27    let mut bytes_written = d.lz.total_bytes;
28    let mut src_pos = d.params.src_pos;
29    let mut lookahead_size = d.dict.lookahead_size;
30    let mut lookahead_pos = d.dict.lookahead_pos;
31
32    // TODO: This mostly copied from the existing miniz code that was part of the main compression function
33    // but could be much simplified and optimized further to a simple copy.
34    while src_pos < in_buf.len() || (d.params.flush != TDEFLFlush::None && lookahead_size != 0) {
35        let src_buf_left = in_buf.len() - src_pos;
36        let num_bytes_to_process = cmp::min(src_buf_left, MAX_MATCH_LEN - lookahead_size);
37
38        if lookahead_size + d.dict.size >= usize::from(MIN_MATCH_LEN) - 1
39            && num_bytes_to_process > 0
40        {
41            let dictb = &mut d.dict.b;
42
43            let mut dst_pos = (lookahead_pos + lookahead_size) & LZ_DICT_SIZE_MASK;
44
45            lookahead_size += num_bytes_to_process;
46
47            for &c in &in_buf[src_pos..src_pos + num_bytes_to_process] {
48                // Add byte to input buffer.
49                dictb.dict[dst_pos] = c;
50                if dst_pos < MAX_MATCH_LEN - 1 {
51                    dictb.dict[LZ_DICT_SIZE + dst_pos] = c;
52                }
53
54                dst_pos = (dst_pos + 1) & LZ_DICT_SIZE_MASK;
55            }
56        } else {
57            let dictb = &mut d.dict.b;
58            for &c in &in_buf[src_pos..src_pos + num_bytes_to_process] {
59                let dst_pos = (lookahead_pos + lookahead_size) & LZ_DICT_SIZE_MASK;
60                dictb.dict[dst_pos] = c;
61                if dst_pos < MAX_MATCH_LEN - 1 {
62                    dictb.dict[LZ_DICT_SIZE + dst_pos] = c;
63                }
64
65                lookahead_size += 1;
66            }
67        }
68
69        src_pos += num_bytes_to_process;
70
71        d.dict.size = cmp::min(LZ_DICT_SIZE - lookahead_size, d.dict.size);
72        if d.params.flush == TDEFLFlush::None && lookahead_size < MAX_MATCH_LEN {
73            break;
74        }
75
76        let len_to_move = 1;
77
78        bytes_written += 1;
79
80        lookahead_pos += len_to_move;
81        lookahead_size -= len_to_move;
82        d.dict.size = cmp::min(d.dict.size + len_to_move, LZ_DICT_SIZE);
83
84        if bytes_written > 31 * 1024 {
85            d.lz.total_bytes = bytes_written;
86
87            d.params.src_pos = src_pos;
88            // These values are used in flush_block, so we need to write them back here.
89            d.dict.lookahead_size = lookahead_size;
90            d.dict.lookahead_pos = lookahead_pos;
91
92            let n = flush_block(d, callback, TDEFLFlush::None)
93                .unwrap_or(TDEFLStatus::PutBufFailed as i32);
94            if n != 0 {
95                return n > 0;
96            }
97            bytes_written = d.lz.total_bytes;
98        }
99    }
100
101    d.lz.total_bytes = bytes_written;
102    d.params.src_pos = src_pos;
103    d.dict.lookahead_size = lookahead_size;
104    d.dict.lookahead_pos = lookahead_pos;
105    true
106}
107
108/*
109fn compress_rle(d: &mut CompressorOxide, callback: &mut CallbackOxide) -> bool {
110    let mut src_pos = d.params.src_pos;
111    let in_buf = match callback.in_buf {
112        None => return true,
113        Some(in_buf) => in_buf,
114    };
115
116    let mut lookahead_size = d.dict.lookahead_size;
117    let mut lookahead_pos = d.dict.lookahead_pos;
118    let mut saved_lit = d.params.saved_lit;
119    let mut saved_match_dist = d.params.saved_match_dist;
120    let mut saved_match_len = d.params.saved_match_len;
121
122    while src_pos < in_buf.len() || (d.params.flush != TDEFLFlush::None && lookahead_size != 0) {
123        let src_buf_left = in_buf.len() - src_pos;
124        let num_bytes_to_process = cmp::min(src_buf_left, MAX_MATCH_LEN - lookahead_size);
125
126        if lookahead_size + d.dict.size >= usize::from(MIN_MATCH_LEN) - 1
127            && num_bytes_to_process > 0
128        {
129            let dictb = &mut d.dict.b;
130
131            let mut dst_pos = (lookahead_pos + lookahead_size) & LZ_DICT_SIZE_MASK;
132            let mut ins_pos = lookahead_pos + lookahead_size - 2;
133            // Start the hash value from the first two bytes
134            let mut hash = update_hash(
135                u16::from(dictb.dict[ins_pos & LZ_DICT_SIZE_MASK]),
136                dictb.dict[(ins_pos + 1) & LZ_DICT_SIZE_MASK],
137            );
138
139            lookahead_size += num_bytes_to_process;
140
141            for &c in &in_buf[src_pos..src_pos + num_bytes_to_process] {
142                // Add byte to input buffer.
143                dictb.dict[dst_pos] = c;
144                if dst_pos < MAX_MATCH_LEN - 1 {
145                    dictb.dict[LZ_DICT_SIZE + dst_pos] = c;
146                }
147
148                // Generate hash from the current byte,
149                hash = update_hash(hash, c);
150                dictb.next[ins_pos & LZ_DICT_SIZE_MASK] = dictb.hash[hash as usize];
151                // and insert it into the hash chain.
152                dictb.hash[hash as usize] = ins_pos as u16;
153                dst_pos = (dst_pos + 1) & LZ_DICT_SIZE_MASK;
154                ins_pos += 1;
155            }
156            src_pos += num_bytes_to_process;
157        } else {
158            let dictb = &mut d.dict.b;
159            for &c in &in_buf[src_pos..src_pos + num_bytes_to_process] {
160                let dst_pos = (lookahead_pos + lookahead_size) & LZ_DICT_SIZE_MASK;
161                dictb.dict[dst_pos] = c;
162                if dst_pos < MAX_MATCH_LEN - 1 {
163                    dictb.dict[LZ_DICT_SIZE + dst_pos] = c;
164                }
165
166                lookahead_size += 1;
167                if lookahead_size + d.dict.size >= MIN_MATCH_LEN.into() {
168                    let ins_pos = lookahead_pos + lookahead_size - 3;
169                    let hash = ((u32::from(dictb.dict[ins_pos & LZ_DICT_SIZE_MASK])
170                        << (LZ_HASH_SHIFT * 2))
171                        ^ ((u32::from(dictb.dict[(ins_pos + 1) & LZ_DICT_SIZE_MASK])
172                            << LZ_HASH_SHIFT)
173                            ^ u32::from(c)))
174                        & (LZ_HASH_SIZE as u32 - 1);
175
176                    dictb.next[ins_pos & LZ_DICT_SIZE_MASK] = dictb.hash[hash as usize];
177                    dictb.hash[hash as usize] = ins_pos as u16;
178                }
179            }
180
181            src_pos += num_bytes_to_process;
182        }
183
184        d.dict.size = cmp::min(LZ_DICT_SIZE - lookahead_size, d.dict.size);
185        if d.params.flush == TDEFLFlush::None && lookahead_size < MAX_MATCH_LEN {
186            break;
187        }
188
189        let mut len_to_move = 1;
190        let mut cur_match_dist = 0;
191        let mut cur_match_len = if saved_match_len != 0 {
192            saved_match_len
193        } else {
194            u32::from(MIN_MATCH_LEN) - 1
195        };
196        let cur_pos = lookahead_pos & LZ_DICT_SIZE_MASK;
197                // If TDEFL_RLE_MATCHES is set, we only look for repeating sequences of the current byte.
198        if d.dict.size != 0 && d.params.flags & TDEFL_FORCE_ALL_RAW_BLOCKS == 0 {
199            let c = d.dict.b.dict[(cur_pos.wrapping_sub(1)) & LZ_DICT_SIZE_MASK];
200                    cur_match_len = d.dict.b.dict[cur_pos..(cur_pos + lookahead_size)]
201                        .iter()
202                        .take_while(|&x| *x == c)
203                        .count() as u32;
204                    if cur_match_len < MIN_MATCH_LEN.into() {
205                        cur_match_len = 0
206                    } else {
207                        cur_match_dist = 1
208                    }
209                }
210
211
212        let far_and_small = cur_match_len == MIN_MATCH_LEN.into() && cur_match_dist >= 8 * 1024;
213        let filter_small = d.params.flags & TDEFL_FILTER_MATCHES != 0 && cur_match_len <= 5;
214        if far_and_small || filter_small || cur_pos == cur_match_dist as usize {
215            cur_match_dist = 0;
216            cur_match_len = 0;
217        }
218
219        if saved_match_len != 0 {
220            if cur_match_len > saved_match_len {
221                record_literal(&mut d.huff, &mut d.lz, saved_lit);
222                if cur_match_len >= 128 {
223                    record_match(&mut d.huff, &mut d.lz, cur_match_len, cur_match_dist);
224                    saved_match_len = 0;
225                    len_to_move = cur_match_len as usize;
226                } else {
227                    saved_lit = d.dict.b.dict[cur_pos];
228                    saved_match_dist = cur_match_dist;
229                    saved_match_len = cur_match_len;
230                }
231            } else {
232                record_match(&mut d.huff, &mut d.lz, saved_match_len, saved_match_dist);
233                len_to_move = (saved_match_len - 1) as usize;
234                saved_match_len = 0;
235            }
236        } else if cur_match_dist == 0 {
237            record_literal(
238                &mut d.huff,
239                &mut d.lz,
240                d.dict.b.dict[cmp::min(cur_pos, d.dict.b.dict.len() - 1)],
241            );
242        } else if d.params.greedy_parsing
243            || (d.params.flags & TDEFL_RLE_MATCHES != 0)
244            || cur_match_len >= 128
245        {
246            // If we are using lazy matching, check for matches at the next byte if the current
247            // match was shorter than 128 bytes.
248            record_match(&mut d.huff, &mut d.lz, cur_match_len, cur_match_dist);
249            len_to_move = cur_match_len as usize;
250        } else {
251            saved_lit = d.dict.b.dict[cmp::min(cur_pos, d.dict.b.dict.len() - 1)];
252            saved_match_dist = cur_match_dist;
253            saved_match_len = cur_match_len;
254        }
255
256        lookahead_pos += len_to_move;
257        assert!(lookahead_size >= len_to_move);
258        lookahead_size -= len_to_move;
259        d.dict.size = cmp::min(d.dict.size + len_to_move, LZ_DICT_SIZE);
260
261        let lz_buf_tight = d.lz.code_position > LZ_CODE_BUF_SIZE - 8;
262        let raw = d.params.flags & TDEFL_FORCE_ALL_RAW_BLOCKS != 0;
263        let fat = ((d.lz.code_position * 115) >> 7) >= d.lz.total_bytes as usize;
264        let fat_or_raw = (d.lz.total_bytes > 31 * 1024) && (fat || raw);
265
266        if lz_buf_tight || fat_or_raw {
267            d.params.src_pos = src_pos;
268            // These values are used in flush_block, so we need to write them back here.
269            d.dict.lookahead_size = lookahead_size;
270            d.dict.lookahead_pos = lookahead_pos;
271
272            let n = flush_block(d, callback, TDEFLFlush::None)
273                .unwrap_or(TDEFLStatus::PutBufFailed as i32);
274            if n != 0 {
275                d.params.saved_lit = saved_lit;
276                d.params.saved_match_dist = saved_match_dist;
277                d.params.saved_match_len = saved_match_len;
278                return n > 0;
279            }
280        }
281    }
282
283    d.params.src_pos = src_pos;
284    d.dict.lookahead_size = lookahead_size;
285    d.dict.lookahead_pos = lookahead_pos;
286    d.params.saved_lit = saved_lit;
287    d.params.saved_match_dist = saved_match_dist;
288    d.params.saved_match_len = saved_match_len;
289    true
290}*/