libbz2_rs_sys/
huffman.rs

1#![forbid(unsafe_code)]
2
3use crate::{
4    assert_h,
5    bzlib::{BZ_MAX_ALPHA_SIZE, BZ_MAX_CODE_LEN},
6};
7
8#[inline]
9const fn weight_of(zz0: i32) -> i32 {
10    zz0 & 0xffffff00u32 as i32
11}
12
13#[inline]
14const fn depth_of(zz1: i32) -> i32 {
15    zz1 & 0xff
16}
17
18#[inline]
19fn add_weights(zw1: i32, zw2: i32) -> i32 {
20    (weight_of(zw1)).wrapping_add(weight_of(zw2)) | (1 + Ord::max(depth_of(zw1), depth_of(zw2)))
21}
22
23#[inline]
24fn upheap(
25    heap: &mut [i32; BZ_MAX_ALPHA_SIZE + 2],
26    weight: &mut [i32; BZ_MAX_ALPHA_SIZE * 2],
27    mut z: usize,
28) {
29    let tmp = heap[z];
30    while weight[tmp as usize] < weight[heap[z >> 1] as usize] {
31        heap[z] = heap[z >> 1];
32        z >>= 1;
33    }
34    heap[z] = tmp;
35}
36
37#[inline]
38fn downheap(
39    heap: &mut [i32; BZ_MAX_ALPHA_SIZE + 2],
40    weight: &mut [i32; BZ_MAX_ALPHA_SIZE * 2],
41    nHeap: usize,
42    mut z: usize,
43) {
44    let tmp = heap[z];
45    loop {
46        let mut yy = z << 1;
47        if yy > nHeap {
48            break;
49        }
50        if yy < nHeap && weight[heap[yy + 1] as usize] < weight[heap[yy] as usize] {
51            yy += 1;
52        }
53        if weight[tmp as usize] < weight[heap[yy] as usize] {
54            break;
55        }
56        heap[z] = heap[yy];
57        z = yy;
58    }
59    heap[z] = tmp;
60}
61
62pub(crate) fn make_code_lengths(len: &mut [u8], freq: &[i32], alphaSize: usize, maxLen: i32) {
63    /*--
64       Nodes and heap entries run from 1.  Entry 0
65       for both the heap and nodes is a sentinel.
66    --*/
67    let mut nNodes: usize;
68    let mut nHeap: usize;
69    let mut j: i32;
70    let mut heap = [0i32; BZ_MAX_ALPHA_SIZE + 2];
71    let mut weight = [0i32; BZ_MAX_ALPHA_SIZE * 2];
72    let mut parent = [0i32; BZ_MAX_ALPHA_SIZE * 2];
73
74    for i in 0..alphaSize {
75        weight[i + 1] = (if freq[i] == 0 { 1 } else { freq[i] }) << 8;
76    }
77
78    loop {
79        nNodes = alphaSize;
80        nHeap = 0;
81
82        heap[0] = 0;
83        weight[0] = 0;
84        parent[0] = -2;
85
86        parent[1..=alphaSize].fill(-1);
87
88        for i in 1..=alphaSize {
89            nHeap += 1;
90            heap[nHeap] = i as i32;
91            upheap(&mut heap, &mut weight, nHeap);
92        }
93
94        assert_h!(nHeap < (BZ_MAX_ALPHA_SIZE + 2), 2001);
95
96        while nHeap > 1 {
97            let n1 = heap[1] as usize;
98            heap[1] = heap[nHeap];
99            nHeap -= 1;
100            downheap(&mut heap, &mut weight, nHeap, 1);
101            let n2 = heap[1] as usize;
102            heap[1] = heap[nHeap];
103            nHeap -= 1;
104            downheap(&mut heap, &mut weight, nHeap, 1);
105            nNodes += 1;
106            parent[n1] = nNodes as i32;
107            parent[n2] = nNodes as i32;
108            weight[nNodes] = add_weights(weight[n1], weight[n2]);
109            parent[nNodes] = -1;
110            nHeap += 1;
111            heap[nHeap] = nNodes as i32;
112            upheap(&mut heap, &mut weight, nHeap);
113        }
114
115        assert_h!(nNodes < (BZ_MAX_ALPHA_SIZE * 2), 2002);
116
117        let mut tooLong = false;
118        for i in 1..=alphaSize {
119            j = 0;
120            let mut k = i;
121            while parent[k] >= 0 {
122                k = parent[k] as usize;
123                j += 1;
124            }
125            len[i - 1] = j as u8;
126            if j > maxLen {
127                tooLong = true;
128            }
129        }
130
131        if !tooLong {
132            break;
133        }
134
135        /* 17 Oct 04: keep-going condition for the following loop used
136        to be 'i < alphaSize', which missed the last element,
137        theoretically leading to the possibility of the compressor
138        looping.  However, this count-scaling step is only needed if
139        one of the generated Huffman code words is longer than
140        maxLen, which up to and including version 1.0.2 was 20 bits,
141        which is extremely unlikely.  In version 1.0.3 maxLen was
142        changed to 17 bits, which has minimal effect on compression
143        ratio, but does mean this scaling step is used from time to
144        time, enough to verify that it works.
145
146        This means that bzip2-1.0.3 and later will only produce
147        Huffman codes with a maximum length of 17 bits.  However, in
148        order to preserve backwards compatibility with bitstreams
149        produced by versions pre-1.0.3, the decompressor must still
150        handle lengths of up to 20. */
151
152        for weight in weight[1..=alphaSize].iter_mut() {
153            *weight = (1 + (*weight >> 8) / 2) << 8;
154        }
155    }
156}
157
158#[inline]
159pub(crate) fn assign_codes(code: &mut [u32], length: &[u8], minLen: u8, maxLen: u8) {
160    let mut vec: u32 = 0;
161    for n in minLen..=maxLen {
162        for (i, &l) in length.iter().enumerate() {
163            if l == n {
164                code[i] = vec;
165                vec += 1;
166            }
167        }
168        vec <<= 1;
169    }
170}
171
172#[inline(always)]
173pub(crate) fn create_decode_tables(
174    limit: &mut [i32; 258],
175    base: &mut [i32; 258],
176    perm: &mut [u16; 258],
177    length: &[u8],
178    minLen: u8,
179    maxLen: u8,
180) {
181    assert!(length.len() <= 258);
182
183    let mut pp = 0;
184    for i in minLen..=maxLen {
185        for (j, e) in length.iter().enumerate() {
186            if *e == i {
187                perm[pp] = j as u16;
188                pp += 1;
189            }
190        }
191    }
192
193    base[0..BZ_MAX_CODE_LEN].fill(0);
194
195    for l in length {
196        base[usize::from(*l) + 1] += 1;
197    }
198
199    for i in 1..BZ_MAX_CODE_LEN {
200        base[i] += base[i - 1];
201    }
202
203    limit[0..BZ_MAX_CODE_LEN].fill(0);
204
205    let mut vec = 0;
206    for i in usize::from(minLen)..=usize::from(maxLen) {
207        vec += base[i + 1] - base[i];
208        limit[i] = vec - 1;
209        vec <<= 1;
210    }
211
212    for i in usize::from(minLen)..usize::from(maxLen) {
213        base[i + 1] = (2 * (limit[i] + 1)) - base[i + 1];
214    }
215}