miniz_oxide/deflate/stored.rs
1use crate::deflate::core::{
2 flush_block, CallbackOxide, CompressorOxide, TDEFLFlush, TDEFLStatus, LZ_DICT_SIZE,
3 LZ_DICT_SIZE_MASK, MAX_MATCH_LEN, MIN_MATCH_LEN,
4};
5use core::cmp;
6
7/// Compression function for stored blocks, split out from the main compression function.
8pub(crate) fn compress_stored(d: &mut CompressorOxide, callback: &mut CallbackOxide) -> bool {
9 let in_buf = match callback.buf() {
10 None => return true,
11 Some(in_buf) => in_buf,
12 };
13
14 // Right now there isn't any code for re-adding previous data to the hash chain if compression is switched
15 // from stored mode to level 1 or higher after first starting compression at level 0
16 // which causes a slight deviation from oroginal miniz behaviour but much faster
17 // stored compression since original miniz does this by being super inefficient
18 // and adds data to hash table when doing stored and rle compression.
19 // Ideally we would handle it like zlib which keeps track of previous data and
20 // only adds to hash table when switching over to actual compression but that
21 // would need a bunch of more work.
22
23 // Make sure this is cleared in case compression level is switched later.
24 // TODO: It's possible we don't need this or could do this elsewhere later
25 // but just do this here to avoid causing issues for now.
26 d.params.saved_match_len = 0;
27 let mut bytes_written = d.lz.total_bytes;
28 let mut src_pos = d.params.src_pos;
29 let mut lookahead_size = d.dict.lookahead_size;
30 let mut lookahead_pos = d.dict.lookahead_pos;
31
32 // TODO: This mostly copied from the existing miniz code that was part of the main compression function
33 // but could be much simplified and optimized further to a simple copy.
34 while src_pos < in_buf.len() || (d.params.flush != TDEFLFlush::None && lookahead_size != 0) {
35 let src_buf_left = in_buf.len() - src_pos;
36 let num_bytes_to_process = cmp::min(src_buf_left, MAX_MATCH_LEN - lookahead_size);
37
38 if lookahead_size + d.dict.size >= usize::from(MIN_MATCH_LEN) - 1
39 && num_bytes_to_process > 0
40 {
41 let dictb = &mut d.dict.b;
42
43 let mut dst_pos = (lookahead_pos + lookahead_size) & LZ_DICT_SIZE_MASK;
44
45 lookahead_size += num_bytes_to_process;
46
47 for &c in &in_buf[src_pos..src_pos + num_bytes_to_process] {
48 // Add byte to input buffer.
49 dictb.dict[dst_pos] = c;
50 if dst_pos < MAX_MATCH_LEN - 1 {
51 dictb.dict[LZ_DICT_SIZE + dst_pos] = c;
52 }
53
54 dst_pos = (dst_pos + 1) & LZ_DICT_SIZE_MASK;
55 }
56 } else {
57 let dictb = &mut d.dict.b;
58 for &c in &in_buf[src_pos..src_pos + num_bytes_to_process] {
59 let dst_pos = (lookahead_pos + lookahead_size) & LZ_DICT_SIZE_MASK;
60 dictb.dict[dst_pos] = c;
61 if dst_pos < MAX_MATCH_LEN - 1 {
62 dictb.dict[LZ_DICT_SIZE + dst_pos] = c;
63 }
64
65 lookahead_size += 1;
66 }
67 }
68
69 src_pos += num_bytes_to_process;
70
71 d.dict.size = cmp::min(LZ_DICT_SIZE - lookahead_size, d.dict.size);
72 if d.params.flush == TDEFLFlush::None && lookahead_size < MAX_MATCH_LEN {
73 break;
74 }
75
76 let len_to_move = 1;
77
78 bytes_written += 1;
79
80 lookahead_pos += len_to_move;
81 lookahead_size -= len_to_move;
82 d.dict.size = cmp::min(d.dict.size + len_to_move, LZ_DICT_SIZE);
83
84 if bytes_written > 31 * 1024 {
85 d.lz.total_bytes = bytes_written;
86
87 d.params.src_pos = src_pos;
88 // These values are used in flush_block, so we need to write them back here.
89 d.dict.lookahead_size = lookahead_size;
90 d.dict.lookahead_pos = lookahead_pos;
91
92 let n = flush_block(d, callback, TDEFLFlush::None)
93 .unwrap_or(TDEFLStatus::PutBufFailed as i32);
94 if n != 0 {
95 return n > 0;
96 }
97 bytes_written = d.lz.total_bytes;
98 }
99 }
100
101 d.lz.total_bytes = bytes_written;
102 d.params.src_pos = src_pos;
103 d.dict.lookahead_size = lookahead_size;
104 d.dict.lookahead_pos = lookahead_pos;
105 true
106}
107
108/*
109fn compress_rle(d: &mut CompressorOxide, callback: &mut CallbackOxide) -> bool {
110 let mut src_pos = d.params.src_pos;
111 let in_buf = match callback.in_buf {
112 None => return true,
113 Some(in_buf) => in_buf,
114 };
115
116 let mut lookahead_size = d.dict.lookahead_size;
117 let mut lookahead_pos = d.dict.lookahead_pos;
118 let mut saved_lit = d.params.saved_lit;
119 let mut saved_match_dist = d.params.saved_match_dist;
120 let mut saved_match_len = d.params.saved_match_len;
121
122 while src_pos < in_buf.len() || (d.params.flush != TDEFLFlush::None && lookahead_size != 0) {
123 let src_buf_left = in_buf.len() - src_pos;
124 let num_bytes_to_process = cmp::min(src_buf_left, MAX_MATCH_LEN - lookahead_size);
125
126 if lookahead_size + d.dict.size >= usize::from(MIN_MATCH_LEN) - 1
127 && num_bytes_to_process > 0
128 {
129 let dictb = &mut d.dict.b;
130
131 let mut dst_pos = (lookahead_pos + lookahead_size) & LZ_DICT_SIZE_MASK;
132 let mut ins_pos = lookahead_pos + lookahead_size - 2;
133 // Start the hash value from the first two bytes
134 let mut hash = update_hash(
135 u16::from(dictb.dict[ins_pos & LZ_DICT_SIZE_MASK]),
136 dictb.dict[(ins_pos + 1) & LZ_DICT_SIZE_MASK],
137 );
138
139 lookahead_size += num_bytes_to_process;
140
141 for &c in &in_buf[src_pos..src_pos + num_bytes_to_process] {
142 // Add byte to input buffer.
143 dictb.dict[dst_pos] = c;
144 if dst_pos < MAX_MATCH_LEN - 1 {
145 dictb.dict[LZ_DICT_SIZE + dst_pos] = c;
146 }
147
148 // Generate hash from the current byte,
149 hash = update_hash(hash, c);
150 dictb.next[ins_pos & LZ_DICT_SIZE_MASK] = dictb.hash[hash as usize];
151 // and insert it into the hash chain.
152 dictb.hash[hash as usize] = ins_pos as u16;
153 dst_pos = (dst_pos + 1) & LZ_DICT_SIZE_MASK;
154 ins_pos += 1;
155 }
156 src_pos += num_bytes_to_process;
157 } else {
158 let dictb = &mut d.dict.b;
159 for &c in &in_buf[src_pos..src_pos + num_bytes_to_process] {
160 let dst_pos = (lookahead_pos + lookahead_size) & LZ_DICT_SIZE_MASK;
161 dictb.dict[dst_pos] = c;
162 if dst_pos < MAX_MATCH_LEN - 1 {
163 dictb.dict[LZ_DICT_SIZE + dst_pos] = c;
164 }
165
166 lookahead_size += 1;
167 if lookahead_size + d.dict.size >= MIN_MATCH_LEN.into() {
168 let ins_pos = lookahead_pos + lookahead_size - 3;
169 let hash = ((u32::from(dictb.dict[ins_pos & LZ_DICT_SIZE_MASK])
170 << (LZ_HASH_SHIFT * 2))
171 ^ ((u32::from(dictb.dict[(ins_pos + 1) & LZ_DICT_SIZE_MASK])
172 << LZ_HASH_SHIFT)
173 ^ u32::from(c)))
174 & (LZ_HASH_SIZE as u32 - 1);
175
176 dictb.next[ins_pos & LZ_DICT_SIZE_MASK] = dictb.hash[hash as usize];
177 dictb.hash[hash as usize] = ins_pos as u16;
178 }
179 }
180
181 src_pos += num_bytes_to_process;
182 }
183
184 d.dict.size = cmp::min(LZ_DICT_SIZE - lookahead_size, d.dict.size);
185 if d.params.flush == TDEFLFlush::None && lookahead_size < MAX_MATCH_LEN {
186 break;
187 }
188
189 let mut len_to_move = 1;
190 let mut cur_match_dist = 0;
191 let mut cur_match_len = if saved_match_len != 0 {
192 saved_match_len
193 } else {
194 u32::from(MIN_MATCH_LEN) - 1
195 };
196 let cur_pos = lookahead_pos & LZ_DICT_SIZE_MASK;
197 // If TDEFL_RLE_MATCHES is set, we only look for repeating sequences of the current byte.
198 if d.dict.size != 0 && d.params.flags & TDEFL_FORCE_ALL_RAW_BLOCKS == 0 {
199 let c = d.dict.b.dict[(cur_pos.wrapping_sub(1)) & LZ_DICT_SIZE_MASK];
200 cur_match_len = d.dict.b.dict[cur_pos..(cur_pos + lookahead_size)]
201 .iter()
202 .take_while(|&x| *x == c)
203 .count() as u32;
204 if cur_match_len < MIN_MATCH_LEN.into() {
205 cur_match_len = 0
206 } else {
207 cur_match_dist = 1
208 }
209 }
210
211
212 let far_and_small = cur_match_len == MIN_MATCH_LEN.into() && cur_match_dist >= 8 * 1024;
213 let filter_small = d.params.flags & TDEFL_FILTER_MATCHES != 0 && cur_match_len <= 5;
214 if far_and_small || filter_small || cur_pos == cur_match_dist as usize {
215 cur_match_dist = 0;
216 cur_match_len = 0;
217 }
218
219 if saved_match_len != 0 {
220 if cur_match_len > saved_match_len {
221 record_literal(&mut d.huff, &mut d.lz, saved_lit);
222 if cur_match_len >= 128 {
223 record_match(&mut d.huff, &mut d.lz, cur_match_len, cur_match_dist);
224 saved_match_len = 0;
225 len_to_move = cur_match_len as usize;
226 } else {
227 saved_lit = d.dict.b.dict[cur_pos];
228 saved_match_dist = cur_match_dist;
229 saved_match_len = cur_match_len;
230 }
231 } else {
232 record_match(&mut d.huff, &mut d.lz, saved_match_len, saved_match_dist);
233 len_to_move = (saved_match_len - 1) as usize;
234 saved_match_len = 0;
235 }
236 } else if cur_match_dist == 0 {
237 record_literal(
238 &mut d.huff,
239 &mut d.lz,
240 d.dict.b.dict[cmp::min(cur_pos, d.dict.b.dict.len() - 1)],
241 );
242 } else if d.params.greedy_parsing
243 || (d.params.flags & TDEFL_RLE_MATCHES != 0)
244 || cur_match_len >= 128
245 {
246 // If we are using lazy matching, check for matches at the next byte if the current
247 // match was shorter than 128 bytes.
248 record_match(&mut d.huff, &mut d.lz, cur_match_len, cur_match_dist);
249 len_to_move = cur_match_len as usize;
250 } else {
251 saved_lit = d.dict.b.dict[cmp::min(cur_pos, d.dict.b.dict.len() - 1)];
252 saved_match_dist = cur_match_dist;
253 saved_match_len = cur_match_len;
254 }
255
256 lookahead_pos += len_to_move;
257 assert!(lookahead_size >= len_to_move);
258 lookahead_size -= len_to_move;
259 d.dict.size = cmp::min(d.dict.size + len_to_move, LZ_DICT_SIZE);
260
261 let lz_buf_tight = d.lz.code_position > LZ_CODE_BUF_SIZE - 8;
262 let raw = d.params.flags & TDEFL_FORCE_ALL_RAW_BLOCKS != 0;
263 let fat = ((d.lz.code_position * 115) >> 7) >= d.lz.total_bytes as usize;
264 let fat_or_raw = (d.lz.total_bytes > 31 * 1024) && (fat || raw);
265
266 if lz_buf_tight || fat_or_raw {
267 d.params.src_pos = src_pos;
268 // These values are used in flush_block, so we need to write them back here.
269 d.dict.lookahead_size = lookahead_size;
270 d.dict.lookahead_pos = lookahead_pos;
271
272 let n = flush_block(d, callback, TDEFLFlush::None)
273 .unwrap_or(TDEFLStatus::PutBufFailed as i32);
274 if n != 0 {
275 d.params.saved_lit = saved_lit;
276 d.params.saved_match_dist = saved_match_dist;
277 d.params.saved_match_len = saved_match_len;
278 return n > 0;
279 }
280 }
281 }
282
283 d.params.src_pos = src_pos;
284 d.dict.lookahead_size = lookahead_size;
285 d.dict.lookahead_pos = lookahead_pos;
286 d.params.saved_lit = saved_lit;
287 d.params.saved_match_dist = saved_match_dist;
288 d.params.saved_match_len = saved_match_len;
289 true
290}*/