001    /*
002     * @(#)UncompressInputStream.java           0.3-3 06/05/2001
003     *
004     *  This file is part of the HTTPClient package
005     *  Copyright (C) 1996-2001 Ronald Tschalar
006     *
007     *  This library is free software; you can redistribute it and/or
008     *  modify it under the terms of the GNU Lesser General Public
009     *  License as published by the Free Software Foundation; either
010     *  version 2 of the License, or (at your option) any later version.
011     *
012     *  This library is distributed in the hope that it will be useful,
013     *  but WITHOUT ANY WARRANTY; without even the implied warranty of
014     *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
015     *  Lesser General Public License for more details.
016     *
017     *  You should have received a copy of the GNU Lesser General Public
018     *  License along with this library; if not, write to the Free
019     *  Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
020     *  MA 02111-1307, USA
021     *
022     *  For questions, suggestions, bug-reports, enhancement-requests etc.
023     *  I may be contacted at:
024     *
025     *  ronald@xxxxxxxxxxxxx
026     *
027     *  The HTTPClient's home page is located at:
028     *
029     *  http://www.innovation.ch/java/HTTPClient/
030     */
031    
032    package org.biojava3.core.util;
033    
034    import java.io.EOFException;
035    import java.io.FileInputStream;
036    import java.io.FileOutputStream;
037    import java.io.FilterInputStream;
038    import java.io.IOException;
039    import java.io.InputStream;
040    
041    
042    /**
043     * This class decompresses an input stream containing data compressed with
044     * the unix "compress" utility (LZC, a LZW variant). This code is based
045     * heavily on the <var>unlzw.c</var> code in <var>gzip-1.2.4</var> (written
046     * by Peter Jannesen) and the original compress code.
047     *
048     *
049     *  This version has been modified from the original 0.3-3 version by the
050     *  Unidata Program Center (support@xxxxxxxxxxxxxxxx) to make the constructor
051     *  public and to fix a couple of bugs.
052     *  Also:
053     *   - markSupported() returns false
054     *   - add uncompress() static method
055     *
056     * @version 0.3-3 06/05/2001
057     * @author  Ronald Tschalar
058     * @author  Unidata Program Center
059     * @author  Richard Holland - making LZW_MAGIC package-visible.
060     */
061    public class UncompressInputStream extends FilterInputStream {
062      /**
063       * @param is the input stream to decompress
064       * @throws IOException if the header is malformed
065       */
066      public UncompressInputStream(InputStream is) throws IOException {
067        super(is);
068        parse_header();
069      }
070    
071    
072      byte[] one = new byte[1];
073    
074      public synchronized int read() throws IOException {
075        int b = read(one, 0, 1);
076        if (b == 1)
077          return (one[0] & 0xff);
078        else
079          return -1;
080      }
081    
082    
083      // string table stuff
084      private static final int TBL_CLEAR = 0x100;
085      private static final int TBL_FIRST = TBL_CLEAR + 1;
086    
087      private int[] tab_prefix;
088      private byte[] tab_suffix;
089      private int[] zeros = new int[256];
090      private byte[] stack;
091    
092      // various state
093      private boolean block_mode;
094      private int n_bits;
095      private int maxbits;
096      private int maxmaxcode;
097      private int maxcode;
098      private int bitmask;
099      private int oldcode;
100      private byte finchar;
101      private int stackp;
102      private int free_ent;
103    
104      // input buffer
105      private byte[] data = new byte[10000];
106      private int bit_pos = 0, end = 0, got = 0;
107      private boolean eof = false;
108      private static final int EXTRA = 64;
109    
110    
111      public synchronized int read(byte[] buf, int off, int len)
112          throws IOException {
113        if (eof) return -1;
114        int start = off;
115    
116    /* Using local copies of various variables speeds things up by as
117         * much as 30% !
118         */
119        int[] l_tab_prefix = tab_prefix;
120        byte[] l_tab_suffix = tab_suffix;
121        byte[] l_stack = stack;
122        int l_n_bits = n_bits;
123        int l_maxcode = maxcode;
124        int l_maxmaxcode = maxmaxcode;
125        int l_bitmask = bitmask;
126        int l_oldcode = oldcode;
127        byte l_finchar = finchar;
128        int l_stackp = stackp;
129        int l_free_ent = free_ent;
130        byte[] l_data = data;
131        int l_bit_pos = bit_pos;
132    
133    
134    // empty stack if stuff still left
135    
136        int s_size = l_stack.length - l_stackp;
137        if (s_size > 0) {
138          int num = (s_size >= len) ? len : s_size;
139          System.arraycopy(l_stack, l_stackp, buf, off, num);
140          off += num;
141          len -= num;
142          l_stackp += num;
143        }
144    
145        if (len == 0) {
146          stackp = l_stackp;
147          return off - start;
148        }
149    
150    
151    // loop, filling local buffer until enough data has been decompressed
152    
153        main_loop: do {
154          if (end < EXTRA) fill();
155    
156          int bit_in = (got > 0) ? (end - end % l_n_bits) << 3 :
157              (end << 3) - (l_n_bits - 1);
158    
159          while (l_bit_pos < bit_in) {
160            // handle 1-byte reads correctly
161            if (len == 0) {
162              n_bits = l_n_bits;
163              maxcode = l_maxcode;
164              maxmaxcode = l_maxmaxcode;
165              bitmask = l_bitmask;
166              oldcode = l_oldcode;
167              finchar = l_finchar;
168              stackp = l_stackp;
169              free_ent = l_free_ent;
170              bit_pos = l_bit_pos;
171    
172              return off - start;
173            }
174    
175            // check for code-width expansion
176    
177            if (l_free_ent > l_maxcode) {
178              int n_bytes = l_n_bits << 3;
179              l_bit_pos = (l_bit_pos - 1) +
180                  n_bytes - (l_bit_pos - 1 + n_bytes) % n_bytes;
181    
182              l_n_bits++;
183              l_maxcode = (l_n_bits == maxbits) ? l_maxmaxcode :
184                  (1 << l_n_bits) - 1;
185    
186              if (debug)
187                System.err.println("Code-width expanded to " + l_n_bits);
188    
189              l_bitmask = (1 << l_n_bits) - 1;
190              l_bit_pos = resetbuf(l_bit_pos);
191              continue main_loop;
192            }
193    
194    
195            // read next code
196    
197            int pos = l_bit_pos >> 3;
198            int code = (((l_data[pos] & 0xFF) | ((l_data[pos + 1] & 0xFF) << 8) |
199                ((l_data[pos + 2] & 0xFF) << 16))
200                >> (l_bit_pos & 0x7)) & l_bitmask;
201            l_bit_pos += l_n_bits;
202    
203    
204            // handle first iteration
205    
206            if (l_oldcode == -1) {
207              if (code >= 256)
208                throw new IOException("corrupt input: " + code +
209                    " > 255");
210              l_finchar = (byte) (l_oldcode = code);
211              buf[off++] = l_finchar;
212              len--;
213              continue;
214            }
215    
216    
217            // handle CLEAR code
218    
219            if (code == TBL_CLEAR && block_mode) {
220              System.arraycopy(zeros, 0, l_tab_prefix, 0, zeros.length);
221              l_free_ent = TBL_FIRST - 1;
222    
223              int n_bytes = l_n_bits << 3;
224              l_bit_pos = (l_bit_pos - 1) +
225                  n_bytes - (l_bit_pos - 1 + n_bytes) % n_bytes;
226              l_n_bits = INIT_BITS;
227              l_maxcode = (1 << l_n_bits) - 1;
228              l_bitmask = l_maxcode;
229    
230              if (debug) System.err.println("Code tables reset");
231    
232              l_bit_pos = resetbuf(l_bit_pos);
233              continue main_loop;
234            }
235    
236    
237            // setup
238    
239            int incode = code;
240            l_stackp = l_stack.length;
241    
242    
243            // Handle KwK case
244    
245            if (code >= l_free_ent) {
246              if (code > l_free_ent)
247                throw new IOException("corrupt input: code=" + code +
248                    ", free_ent=" + l_free_ent);
249    
250              l_stack[--l_stackp] = l_finchar;
251              code = l_oldcode;
252            }
253    
254    
255            // Generate output characters in reverse order
256    
257            while (code >= 256) {
258              l_stack[--l_stackp] = l_tab_suffix[code];
259              code = l_tab_prefix[code];
260            }
261            l_finchar = l_tab_suffix[code];
262            buf[off++] = l_finchar;
263            len--;
264    
265    
266            // And put them out in forward order
267    
268            s_size = l_stack.length - l_stackp;
269            int num = (s_size >= len) ? len : s_size;
270            System.arraycopy(l_stack, l_stackp, buf, off, num);
271            off += num;
272            len -= num;
273            l_stackp += num;
274    
275    
276            // generate new entry in table
277    
278            if (l_free_ent < l_maxmaxcode) {
279              l_tab_prefix[l_free_ent] = l_oldcode;
280              l_tab_suffix[l_free_ent] = l_finchar;
281              l_free_ent++;
282            }
283    
284    
285            // Remember previous code
286    
287            l_oldcode = incode;
288    
289    
290            // if output buffer full, then return
291    
292            if (len == 0) {
293              n_bits = l_n_bits;
294              maxcode = l_maxcode;
295              bitmask = l_bitmask;
296              oldcode = l_oldcode;
297              finchar = l_finchar;
298              stackp = l_stackp;
299              free_ent = l_free_ent;
300              bit_pos = l_bit_pos;
301    
302              return off - start;
303            }
304          }
305    
306          l_bit_pos = resetbuf(l_bit_pos);
307        } while (got > 0);
308    
309        n_bits = l_n_bits;
310        maxcode = l_maxcode;
311        bitmask = l_bitmask;
312        oldcode = l_oldcode;
313        finchar = l_finchar;
314        stackp = l_stackp;
315        free_ent = l_free_ent;
316        bit_pos = l_bit_pos;
317    
318        eof = true;
319        return off - start;
320      }
321    
322    
323      /**
324       * Moves the unread data in the buffer to the beginning and resets
325       * the pointers.
326       */
327      private final int resetbuf(int bit_pos) {
328        int pos = bit_pos >> 3;
329        System.arraycopy(data, pos, data, 0, end - pos);
330        end -= pos;
331        return 0;
332      }
333    
334    
335      private final void fill() throws IOException {
336        got = in.read(data, end, data.length - 1 - end);
337        if (got > 0) end += got;
338      }
339    
340    
341      public synchronized long skip(long num) throws IOException {
342        byte[] tmp = new byte[(int) num];
343        int got = read(tmp, 0, (int) num);
344    
345        if (got > 0)
346          return (long) got;
347        else
348          return 0L;
349      }
350    
351    
352      public synchronized int available() throws IOException {
353        if (eof) return 0;
354    
355        return in.available();
356      }
357    
358    
359      static final int LZW_MAGIC = 0x1f9d;
360      private static final int MAX_BITS = 16;
361      private static final int INIT_BITS = 9;
362      private static final int HDR_MAXBITS = 0x1f;
363      private static final int HDR_EXTENDED = 0x20;
364      private static final int HDR_FREE = 0x40;
365      private static final int HDR_BLOCK_MODE = 0x80;
366    
367      private void parse_header() throws IOException {
368    // read in and check magic number
369    
370        int t = in.read();
371        if (t < 0) throw new EOFException("Failed to read magic number");
372        int magic = (t & 0xff) << 8;
373        t = in.read();
374        if (t < 0) throw new EOFException("Failed to read magic number");
375        magic += t & 0xff;
376        if (magic != LZW_MAGIC)
377          throw new IOException("Input not in compress format (read " +
378              "magic number 0x" +
379              Integer.toHexString(magic) + ")");
380    
381    
382    // read in header byte
383    
384        int header = in.read();
385        if (header < 0) throw new EOFException("Failed to read header");
386    
387        block_mode = (header & HDR_BLOCK_MODE) > 0;
388        maxbits = header & HDR_MAXBITS;
389    
390        if (maxbits > MAX_BITS)
391          throw new IOException("Stream compressed with " + maxbits +
392              " bits, but can only handle " + MAX_BITS +
393              " bits");
394    
395        if ((header & HDR_EXTENDED) > 0)
396          throw new IOException("Header extension bit set");
397    
398        if ((header & HDR_FREE) > 0)
399          throw new IOException("Header bit 6 set");
400    
401        if (debug) {
402          System.err.println("block mode: " + block_mode);
403          System.err.println("max bits:   " + maxbits);
404        }
405    
406    
407    // initialize stuff
408    
409        maxmaxcode = 1 << maxbits;
410        n_bits = INIT_BITS;
411        maxcode = (1 << n_bits) - 1;
412        bitmask = maxcode;
413        oldcode = -1;
414        finchar = 0;
415        free_ent = block_mode ? TBL_FIRST : 256;
416    
417        tab_prefix = new int[1 << maxbits];
418        tab_suffix = new byte[1 << maxbits];
419        stack = new byte[1 << maxbits];
420        stackp = stack.length;
421    
422        for (int idx = 255; idx >= 0; idx--)
423          tab_suffix[idx] = (byte) idx;
424      }
425    
426      /**
427       * This stream does not support mark/reset on the stream.
428       *
429       * @return false
430       */
431      public boolean markSupported() {
432        return false;
433      }
434    
435      static public void uncompress( String fileInName, FileOutputStream out) throws IOException {
436        long start = System.currentTimeMillis();
437    
438        InputStream in = new UncompressInputStream(  new FileInputStream(fileInName));
439    
440        int total = 0;
441        byte[] buffer = new byte[100000];
442        while (true) {
443          int bytesRead = in.read(buffer);
444          if (bytesRead == -1) break;
445          out.write(buffer, 0, bytesRead);
446          total += bytesRead;
447        }
448        in.close();
449        out.close();
450    
451        if (debugTiming) {
452          long end = System.currentTimeMillis();
453          System.err.println("Decompressed " + total + " bytes");
454          System.err.println("Time: " + (end - start) / 1000. + " seconds");
455        }
456      }
457    
458    
459      private static final boolean debug = false, debugTiming = false;
460    
461      public static void main(String args[]) throws Exception {
462        if (args.length != 1) {
463          System.err.println("Usage: UncompressInputStream <file>");
464          System.exit(1);
465        }
466    
467        InputStream in =
468            new UncompressInputStream(new FileInputStream(args[0]));
469    
470        byte[] buf = new byte[100000];
471        int tot = 0;
472        long beg = System.currentTimeMillis();
473    
474        while (true) {
475          int got = in.read(buf);
476          if (got < 0) break;
477          System.out.write(buf, 0, got);
478          tot += got;
479        }
480    
481        long end = System.currentTimeMillis();
482        System.err.println("Decompressed " + tot + " bytes");
483        System.err.println("Time: " + (end - beg) / 1000. + " seconds");
484      }
485    }