001 /*
002 * @(#)UncompressInputStream.java 0.3-3 06/05/2001
003 *
004 * This file is part of the HTTPClient package
005 * Copyright (C) 1996-2001 Ronald Tschalar
006 *
007 * This library is free software; you can redistribute it and/or
008 * modify it under the terms of the GNU Lesser General Public
009 * License as published by the Free Software Foundation; either
010 * version 2 of the License, or (at your option) any later version.
011 *
012 * This library is distributed in the hope that it will be useful,
013 * but WITHOUT ANY WARRANTY; without even the implied warranty of
014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
015 * Lesser General Public License for more details.
016 *
017 * You should have received a copy of the GNU Lesser General Public
018 * License along with this library; if not, write to the Free
019 * Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
020 * MA 02111-1307, USA
021 *
022 * For questions, suggestions, bug-reports, enhancement-requests etc.
023 * I may be contacted at:
024 *
025 * ronald@xxxxxxxxxxxxx
026 *
027 * The HTTPClient's home page is located at:
028 *
029 * http://www.innovation.ch/java/HTTPClient/
030 */
031
032 package org.biojava3.core.util;
033
034 import java.io.EOFException;
035 import java.io.FileInputStream;
036 import java.io.FileOutputStream;
037 import java.io.FilterInputStream;
038 import java.io.IOException;
039 import java.io.InputStream;
040
041
042 /**
043 * This class decompresses an input stream containing data compressed with
044 * the unix "compress" utility (LZC, a LZW variant). This code is based
045 * heavily on the <var>unlzw.c</var> code in <var>gzip-1.2.4</var> (written
046 * by Peter Jannesen) and the original compress code.
047 *
048 *
049 * This version has been modified from the original 0.3-3 version by the
050 * Unidata Program Center (support@xxxxxxxxxxxxxxxx) to make the constructor
051 * public and to fix a couple of bugs.
052 * Also:
053 * - markSupported() returns false
054 * - add uncompress() static method
055 *
056 * @version 0.3-3 06/05/2001
057 * @author Ronald Tschalar
058 * @author Unidata Program Center
059 * @author Richard Holland - making LZW_MAGIC package-visible.
060 */
061 public class UncompressInputStream extends FilterInputStream {
062 /**
063 * @param is the input stream to decompress
064 * @throws IOException if the header is malformed
065 */
066 public UncompressInputStream(InputStream is) throws IOException {
067 super(is);
068 parse_header();
069 }
070
071
072 byte[] one = new byte[1];
073
074 public synchronized int read() throws IOException {
075 int b = read(one, 0, 1);
076 if (b == 1)
077 return (one[0] & 0xff);
078 else
079 return -1;
080 }
081
082
083 // string table stuff
084 private static final int TBL_CLEAR = 0x100;
085 private static final int TBL_FIRST = TBL_CLEAR + 1;
086
087 private int[] tab_prefix;
088 private byte[] tab_suffix;
089 private int[] zeros = new int[256];
090 private byte[] stack;
091
092 // various state
093 private boolean block_mode;
094 private int n_bits;
095 private int maxbits;
096 private int maxmaxcode;
097 private int maxcode;
098 private int bitmask;
099 private int oldcode;
100 private byte finchar;
101 private int stackp;
102 private int free_ent;
103
104 // input buffer
105 private byte[] data = new byte[10000];
106 private int bit_pos = 0, end = 0, got = 0;
107 private boolean eof = false;
108 private static final int EXTRA = 64;
109
110
111 public synchronized int read(byte[] buf, int off, int len)
112 throws IOException {
113 if (eof) return -1;
114 int start = off;
115
116 /* Using local copies of various variables speeds things up by as
117 * much as 30% !
118 */
119 int[] l_tab_prefix = tab_prefix;
120 byte[] l_tab_suffix = tab_suffix;
121 byte[] l_stack = stack;
122 int l_n_bits = n_bits;
123 int l_maxcode = maxcode;
124 int l_maxmaxcode = maxmaxcode;
125 int l_bitmask = bitmask;
126 int l_oldcode = oldcode;
127 byte l_finchar = finchar;
128 int l_stackp = stackp;
129 int l_free_ent = free_ent;
130 byte[] l_data = data;
131 int l_bit_pos = bit_pos;
132
133
134 // empty stack if stuff still left
135
136 int s_size = l_stack.length - l_stackp;
137 if (s_size > 0) {
138 int num = (s_size >= len) ? len : s_size;
139 System.arraycopy(l_stack, l_stackp, buf, off, num);
140 off += num;
141 len -= num;
142 l_stackp += num;
143 }
144
145 if (len == 0) {
146 stackp = l_stackp;
147 return off - start;
148 }
149
150
151 // loop, filling local buffer until enough data has been decompressed
152
153 main_loop: do {
154 if (end < EXTRA) fill();
155
156 int bit_in = (got > 0) ? (end - end % l_n_bits) << 3 :
157 (end << 3) - (l_n_bits - 1);
158
159 while (l_bit_pos < bit_in) {
160 // handle 1-byte reads correctly
161 if (len == 0) {
162 n_bits = l_n_bits;
163 maxcode = l_maxcode;
164 maxmaxcode = l_maxmaxcode;
165 bitmask = l_bitmask;
166 oldcode = l_oldcode;
167 finchar = l_finchar;
168 stackp = l_stackp;
169 free_ent = l_free_ent;
170 bit_pos = l_bit_pos;
171
172 return off - start;
173 }
174
175 // check for code-width expansion
176
177 if (l_free_ent > l_maxcode) {
178 int n_bytes = l_n_bits << 3;
179 l_bit_pos = (l_bit_pos - 1) +
180 n_bytes - (l_bit_pos - 1 + n_bytes) % n_bytes;
181
182 l_n_bits++;
183 l_maxcode = (l_n_bits == maxbits) ? l_maxmaxcode :
184 (1 << l_n_bits) - 1;
185
186 if (debug)
187 System.err.println("Code-width expanded to " + l_n_bits);
188
189 l_bitmask = (1 << l_n_bits) - 1;
190 l_bit_pos = resetbuf(l_bit_pos);
191 continue main_loop;
192 }
193
194
195 // read next code
196
197 int pos = l_bit_pos >> 3;
198 int code = (((l_data[pos] & 0xFF) | ((l_data[pos + 1] & 0xFF) << 8) |
199 ((l_data[pos + 2] & 0xFF) << 16))
200 >> (l_bit_pos & 0x7)) & l_bitmask;
201 l_bit_pos += l_n_bits;
202
203
204 // handle first iteration
205
206 if (l_oldcode == -1) {
207 if (code >= 256)
208 throw new IOException("corrupt input: " + code +
209 " > 255");
210 l_finchar = (byte) (l_oldcode = code);
211 buf[off++] = l_finchar;
212 len--;
213 continue;
214 }
215
216
217 // handle CLEAR code
218
219 if (code == TBL_CLEAR && block_mode) {
220 System.arraycopy(zeros, 0, l_tab_prefix, 0, zeros.length);
221 l_free_ent = TBL_FIRST - 1;
222
223 int n_bytes = l_n_bits << 3;
224 l_bit_pos = (l_bit_pos - 1) +
225 n_bytes - (l_bit_pos - 1 + n_bytes) % n_bytes;
226 l_n_bits = INIT_BITS;
227 l_maxcode = (1 << l_n_bits) - 1;
228 l_bitmask = l_maxcode;
229
230 if (debug) System.err.println("Code tables reset");
231
232 l_bit_pos = resetbuf(l_bit_pos);
233 continue main_loop;
234 }
235
236
237 // setup
238
239 int incode = code;
240 l_stackp = l_stack.length;
241
242
243 // Handle KwK case
244
245 if (code >= l_free_ent) {
246 if (code > l_free_ent)
247 throw new IOException("corrupt input: code=" + code +
248 ", free_ent=" + l_free_ent);
249
250 l_stack[--l_stackp] = l_finchar;
251 code = l_oldcode;
252 }
253
254
255 // Generate output characters in reverse order
256
257 while (code >= 256) {
258 l_stack[--l_stackp] = l_tab_suffix[code];
259 code = l_tab_prefix[code];
260 }
261 l_finchar = l_tab_suffix[code];
262 buf[off++] = l_finchar;
263 len--;
264
265
266 // And put them out in forward order
267
268 s_size = l_stack.length - l_stackp;
269 int num = (s_size >= len) ? len : s_size;
270 System.arraycopy(l_stack, l_stackp, buf, off, num);
271 off += num;
272 len -= num;
273 l_stackp += num;
274
275
276 // generate new entry in table
277
278 if (l_free_ent < l_maxmaxcode) {
279 l_tab_prefix[l_free_ent] = l_oldcode;
280 l_tab_suffix[l_free_ent] = l_finchar;
281 l_free_ent++;
282 }
283
284
285 // Remember previous code
286
287 l_oldcode = incode;
288
289
290 // if output buffer full, then return
291
292 if (len == 0) {
293 n_bits = l_n_bits;
294 maxcode = l_maxcode;
295 bitmask = l_bitmask;
296 oldcode = l_oldcode;
297 finchar = l_finchar;
298 stackp = l_stackp;
299 free_ent = l_free_ent;
300 bit_pos = l_bit_pos;
301
302 return off - start;
303 }
304 }
305
306 l_bit_pos = resetbuf(l_bit_pos);
307 } while (got > 0);
308
309 n_bits = l_n_bits;
310 maxcode = l_maxcode;
311 bitmask = l_bitmask;
312 oldcode = l_oldcode;
313 finchar = l_finchar;
314 stackp = l_stackp;
315 free_ent = l_free_ent;
316 bit_pos = l_bit_pos;
317
318 eof = true;
319 return off - start;
320 }
321
322
323 /**
324 * Moves the unread data in the buffer to the beginning and resets
325 * the pointers.
326 */
327 private final int resetbuf(int bit_pos) {
328 int pos = bit_pos >> 3;
329 System.arraycopy(data, pos, data, 0, end - pos);
330 end -= pos;
331 return 0;
332 }
333
334
335 private final void fill() throws IOException {
336 got = in.read(data, end, data.length - 1 - end);
337 if (got > 0) end += got;
338 }
339
340
341 public synchronized long skip(long num) throws IOException {
342 byte[] tmp = new byte[(int) num];
343 int got = read(tmp, 0, (int) num);
344
345 if (got > 0)
346 return (long) got;
347 else
348 return 0L;
349 }
350
351
352 public synchronized int available() throws IOException {
353 if (eof) return 0;
354
355 return in.available();
356 }
357
358
359 static final int LZW_MAGIC = 0x1f9d;
360 private static final int MAX_BITS = 16;
361 private static final int INIT_BITS = 9;
362 private static final int HDR_MAXBITS = 0x1f;
363 private static final int HDR_EXTENDED = 0x20;
364 private static final int HDR_FREE = 0x40;
365 private static final int HDR_BLOCK_MODE = 0x80;
366
367 private void parse_header() throws IOException {
368 // read in and check magic number
369
370 int t = in.read();
371 if (t < 0) throw new EOFException("Failed to read magic number");
372 int magic = (t & 0xff) << 8;
373 t = in.read();
374 if (t < 0) throw new EOFException("Failed to read magic number");
375 magic += t & 0xff;
376 if (magic != LZW_MAGIC)
377 throw new IOException("Input not in compress format (read " +
378 "magic number 0x" +
379 Integer.toHexString(magic) + ")");
380
381
382 // read in header byte
383
384 int header = in.read();
385 if (header < 0) throw new EOFException("Failed to read header");
386
387 block_mode = (header & HDR_BLOCK_MODE) > 0;
388 maxbits = header & HDR_MAXBITS;
389
390 if (maxbits > MAX_BITS)
391 throw new IOException("Stream compressed with " + maxbits +
392 " bits, but can only handle " + MAX_BITS +
393 " bits");
394
395 if ((header & HDR_EXTENDED) > 0)
396 throw new IOException("Header extension bit set");
397
398 if ((header & HDR_FREE) > 0)
399 throw new IOException("Header bit 6 set");
400
401 if (debug) {
402 System.err.println("block mode: " + block_mode);
403 System.err.println("max bits: " + maxbits);
404 }
405
406
407 // initialize stuff
408
409 maxmaxcode = 1 << maxbits;
410 n_bits = INIT_BITS;
411 maxcode = (1 << n_bits) - 1;
412 bitmask = maxcode;
413 oldcode = -1;
414 finchar = 0;
415 free_ent = block_mode ? TBL_FIRST : 256;
416
417 tab_prefix = new int[1 << maxbits];
418 tab_suffix = new byte[1 << maxbits];
419 stack = new byte[1 << maxbits];
420 stackp = stack.length;
421
422 for (int idx = 255; idx >= 0; idx--)
423 tab_suffix[idx] = (byte) idx;
424 }
425
426 /**
427 * This stream does not support mark/reset on the stream.
428 *
429 * @return false
430 */
431 public boolean markSupported() {
432 return false;
433 }
434
435 static public void uncompress( String fileInName, FileOutputStream out) throws IOException {
436 long start = System.currentTimeMillis();
437
438 InputStream in = new UncompressInputStream( new FileInputStream(fileInName));
439
440 int total = 0;
441 byte[] buffer = new byte[100000];
442 while (true) {
443 int bytesRead = in.read(buffer);
444 if (bytesRead == -1) break;
445 out.write(buffer, 0, bytesRead);
446 total += bytesRead;
447 }
448 in.close();
449 out.close();
450
451 if (debugTiming) {
452 long end = System.currentTimeMillis();
453 System.err.println("Decompressed " + total + " bytes");
454 System.err.println("Time: " + (end - start) / 1000. + " seconds");
455 }
456 }
457
458
459 private static final boolean debug = false, debugTiming = false;
460
461 public static void main(String args[]) throws Exception {
462 if (args.length != 1) {
463 System.err.println("Usage: UncompressInputStream <file>");
464 System.exit(1);
465 }
466
467 InputStream in =
468 new UncompressInputStream(new FileInputStream(args[0]));
469
470 byte[] buf = new byte[100000];
471 int tot = 0;
472 long beg = System.currentTimeMillis();
473
474 while (true) {
475 int got = in.read(buf);
476 if (got < 0) break;
477 System.out.write(buf, 0, got);
478 tot += got;
479 }
480
481 long end = System.currentTimeMillis();
482 System.err.println("Decompressed " + tot + " bytes");
483 System.err.println("Time: " + (end - beg) / 1000. + " seconds");
484 }
485 }