001    /*
002     * To change this template, choose Tools | Templates
003     * and open the template in the editor.
004     */
005    package org.biojava3.core.sequence.io;
006    
007    import java.io.FileReader;
008    import java.io.IOException;
009    import java.io.InputStreamReader;
010    import java.io.Reader;
011    
012    /**
013     * Need to keep track of actual bytes read and take advantage of buffered reader
014     * performance. Took java source for BufferedReader and added BytesRead functionality
015     * @author Scooter Willis <willishf at gmail dot com>
016     * 
017     * Reads text from a character-input stream, buffering characters so as to
018     * provide for the efficient reading of characters, arrays, and lines.
019     *
020     * <p> The buffer size may be specified, or the default size may be used.  The
021     * default is large enough for most purposes.
022     *
023     * <p> In general, each read request made of a Reader causes a corresponding
024     * read request to be made of the underlying character or byte stream.  It is
025     * therefore advisable to wrap a BufferedReaderBytesRead around any Reader whose read()
026     * operations may be costly, such as FileReaders and InputStreamReaders.  For
027     * example,
028     *
029     * <pre>
030     * BufferedReaderBytesRead in
031     *   = new BufferedReaderBytesRead(new FileReader("foo.in"));
032     * </pre>
033     *
034     * will buffer the input from the specified file.  Without buffering, each
035     * invocation of read() or readLine() could cause bytes to be read from the
036     * file, converted into characters, and then returned, which can be very
037     * inefficient.
038     *
039     * <p> Programs that use DataInputStreams for textual input can be localized by
040     * replacing each DataInputStream with an appropriate BufferedReaderBytesRead.
041     *
042     * @see FileReader
043     * @see InputStreamReader
044     *
045     * @version     1.37, 06/03/15
046     * @author      Mark Reinhold
047     * @since       JDK1.1
048     */
049    public class BufferedReaderBytesRead extends Reader {
050    
051        private Reader in;
052        private char cb[];
053        private int nChars, nextChar;
054        private static final int INVALIDATED = -2;
055        private static final int UNMARKED = -1;
056        private int markedChar = UNMARKED;
057        private int readAheadLimit = 0; /* Valid only when markedChar > 0 */
058    
059        /** If the next character is a line feed, skip it */
060        private boolean skipLF = false;
061        /** The skipLF flag when the mark was set */
062        private boolean markedSkipLF = false;
063        private static int defaultCharBufferSize = 8192;
064        private static int defaultExpectedLineLength = 80;
065        long bytesRead = 0;
066    
067        /**
068         * Creates a buffering character-input stream that uses an input buffer of
069         * the specified size.
070         *
071         * @param  in   A Reader
072         * @param  sz   Input-buffer size
073         *
074         * @exception  IllegalArgumentException  If sz is <= 0
075         */
076        public BufferedReaderBytesRead(Reader in, int sz) {
077            super(in);
078            if (sz <= 0) {
079                throw new IllegalArgumentException("Buffer size <= 0");
080            }
081            this.in = in;
082            cb = new char[sz];
083            nextChar = nChars = 0;
084        }
085    
086        /**
087         * Creates a buffering character-input stream that uses a default-sized
088         * input buffer.
089         *
090         * @param  in   A Reader
091         */
092        public BufferedReaderBytesRead(Reader in) {
093            this(in, defaultCharBufferSize);
094        }
095    
096        /**
097         * Keep track of bytesread via ReadLine to account for CR-LF in the stream. Does not keep track of position if
098         * use methods other than ReadLine.
099         * //TODO should override other methods and throw exception or keep track of bytes read
100         * @return
101         */
102        public long getBytesRead() {
103            return bytesRead;
104        }
105    
106        /** Checks to make sure that the stream has not been closed */
107        private void ensureOpen() throws IOException {
108            if (in == null) {
109                throw new IOException("Stream closed");
110            }
111        }
112    
113        /**
114         * Fills the input buffer, taking the mark into account if it is valid.
115         */
116        private void fill() throws IOException {
117            int dst;
118            if (markedChar <= UNMARKED) {
119                /* No mark */
120                dst = 0;
121            } else {
122                /* Marked */
123                int delta = nextChar - markedChar;
124                if (delta >= readAheadLimit) {
125                    /* Gone past read-ahead limit: Invalidate mark */
126                    markedChar = INVALIDATED;
127                    readAheadLimit = 0;
128                    dst = 0;
129                } else {
130                    if (readAheadLimit <= cb.length) {
131                        /* Shuffle in the current buffer */
132                        System.arraycopy(cb, markedChar, cb, 0, delta);
133                        markedChar = 0;
134                        dst = delta;
135                    } else {
136                        /* Reallocate buffer to accommodate read-ahead limit */
137                        char ncb[] = new char[readAheadLimit];
138                        System.arraycopy(cb, markedChar, ncb, 0, delta);
139                        cb = ncb;
140                        markedChar = 0;
141                        dst = delta;
142                    }
143                    nextChar = nChars = delta;
144                }
145            }
146    
147            int n;
148            do {
149                n = in.read(cb, dst, cb.length - dst);
150            } while (n == 0);
151            if (n > 0) {
152                nChars = dst + n;
153                nextChar = dst;
154            }
155        }
156    
157        /**
158         * Reads a single character.
159         *
160         * @return The character read, as an integer in the range
161         *         0 to 65535 (<tt>0x00-0xffff</tt>), or -1 if the
162         *         end of the stream has been reached
163         * @exception  IOException  If an I/O error occurs
164         */
165        public int read() throws IOException {
166            synchronized (lock) {
167                ensureOpen();
168                for (;;) {
169                    if (nextChar >= nChars) {
170                        fill();
171                        if (nextChar >= nChars) {
172                            return -1;
173                        }
174                    }
175                    if (skipLF) {
176                        skipLF = false;
177                        if (cb[nextChar] == '\n') {
178                            bytesRead++;
179                            nextChar++;
180                            continue;
181                        }
182                    }
183                    bytesRead++;
184                    return cb[nextChar++];
185                }
186            }
187        }
188    
189        /**
190         * Reads characters into a portion of an array, reading from the underlying
191         * stream if necessary.
192         */
193        private int read1(char[] cbuf, int off, int len) throws IOException {
194            if (nextChar >= nChars) {
195                /* If the requested length is at least as large as the buffer, and
196                if there is no mark/reset activity, and if line feeds are not
197                being skipped, do not bother to copy the characters into the
198                local buffer.  In this way buffered streams will cascade
199                harmlessly. */
200                if (len >= cb.length && markedChar <= UNMARKED && !skipLF) {
201                    return in.read(cbuf, off, len);
202                }
203                fill();
204            }
205            if (nextChar >= nChars) {
206                return -1;
207            }
208            if (skipLF) {
209                skipLF = false;
210                if (cb[nextChar] == '\n') {
211                    nextChar++;
212                    if (nextChar >= nChars) {
213                        fill();
214                    }
215                    if (nextChar >= nChars) {
216                        return -1;
217                    }
218                }
219            }
220            int n = Math.min(len, nChars - nextChar);
221            System.arraycopy(cb, nextChar, cbuf, off, n);
222            nextChar += n;
223            return n;
224        }
225    
226        /**
227         * Reads characters into a portion of an array.
228         *
229         * <p> This method implements the general contract of the corresponding
230         * <code>{@link Reader#read(char[], int, int) read}</code> method of the
231         * <code>{@link Reader}</code> class.  As an additional convenience, it
232         * attempts to read as many characters as possible by repeatedly invoking
233         * the <code>read</code> method of the underlying stream.  This iterated
234         * <code>read</code> continues until one of the following conditions becomes
235         * true: <ul>
236         *
237         *   <li> The specified number of characters have been read,
238         *
239         *   <li> The <code>read</code> method of the underlying stream returns
240         *   <code>-1</code>, indicating end-of-file, or
241         *
242         *   <li> The <code>ready</code> method of the underlying stream
243         *   returns <code>false</code>, indicating that further input requests
244         *   would block.
245         *
246         * </ul> If the first <code>read</code> on the underlying stream returns
247         * <code>-1</code> to indicate end-of-file then this method returns
248         * <code>-1</code>.  Otherwise this method returns the number of characters
249         * actually read.
250         *
251         * <p> Subclasses of this class are encouraged, but not required, to
252         * attempt to read as many characters as possible in the same fashion.
253         *
254         * <p> Ordinarily this method takes characters from this stream's character
255         * buffer, filling it from the underlying stream as necessary.  If,
256         * however, the buffer is empty, the mark is not valid, and the requested
257         * length is at least as large as the buffer, then this method will read
258         * characters directly from the underlying stream into the given array.
259         * Thus redundant <code>BufferedReaderBytesRead</code>s will not copy data
260         * unnecessarily.
261         *
262         * @param      cbuf  Destination buffer
263         * @param      off   Offset at which to start storing characters
264         * @param      len   Maximum number of characters to read
265         *
266         * @return     The number of characters read, or -1 if the end of the
267         *             stream has been reached
268         *
269         * @exception  IOException  If an I/O error occurs
270         */
271        public int read(char cbuf[], int off, int len) throws IOException {
272            synchronized (lock) {
273                ensureOpen();
274                if ((off < 0) || (off > cbuf.length) || (len < 0)
275                        || ((off + len) > cbuf.length) || ((off + len) < 0)) {
276                    throw new IndexOutOfBoundsException();
277                } else if (len == 0) {
278                    return 0;
279                }
280    
281                int n = read1(cbuf, off, len);
282                if (n <= 0) {
283                    return n;
284                }
285                while ((n < len) && in.ready()) {
286                    int n1 = read1(cbuf, off + n, len - n);
287                    if (n1 <= 0) {
288                        break;
289                    }
290                    n += n1;
291                }
292                bytesRead = bytesRead + n;
293                return n;
294            }
295        }
296    
297        /**
298         * Reads a line of text.  A line is considered to be terminated by any one
299         * of a line feed ('\n'), a carriage return ('\r'), or a carriage return
300         * followed immediately by a linefeed.
301         *
302         * @param      ignoreLF  If true, the next '\n' will be skipped
303         *
304         * @return     A String containing the contents of the line, not including
305         *             any line-termination characters, or null if the end of the
306         *             stream has been reached
307         *
308         * @see        java.io.LineNumberReader#readLine()
309         *
310         * @exception  IOException  If an I/O error occurs
311         */
312        @SuppressWarnings("unused")
313        private String readLine(boolean ignoreLF) throws IOException {
314            StringBuffer s = null;
315            int startChar;
316    
317            synchronized (lock) {
318                ensureOpen();
319                boolean omitLF = ignoreLF || skipLF;
320    
321                bufferLoop:
322                for (;;) {
323    
324                    if (nextChar >= nChars) {
325                        fill();
326                    }
327                    if (nextChar >= nChars) { /* EOF */
328                        if (s != null && s.length() > 0) {
329    
330                            return s.toString();
331                        } else {
332                            return null;
333                        }
334                    }
335                    boolean eol = false;
336                    char c = 0;
337                    int i;
338    
339                    /* Skip a leftover '\n', if necessary */
340                    if (omitLF && (cb[nextChar] == '\n')) {
341                        nextChar++;
342                        bytesRead++;
343                    }
344                    skipLF = false;
345                    omitLF = false;
346    
347                    charLoop:
348                    for (i = nextChar; i < nChars; i++) {
349                        c = cb[i];
350                        if ((c == '\n') || (c == '\r')) {
351                            bytesRead++;
352                            eol = true;
353                            break charLoop;
354                        }
355                    }
356    
357                    startChar = nextChar;
358                    nextChar = i;
359    
360                    if (eol) {
361                        String str;
362                        if (s == null) {
363                            str = new String(cb, startChar, i - startChar);
364                        } else {
365                            s.append(cb, startChar, i - startChar);
366                            str = s.toString();
367                        }
368                        nextChar++;
369                        if (c == '\r') {
370                            bytesRead++;
371                            skipLF = true;
372                        }
373    
374                        return str;
375                    }
376    
377                    if (s == null) {
378                        s = new StringBuffer(defaultExpectedLineLength);
379                    }
380                    s.append(cb, startChar, i - startChar);
381    
382                }
383            }
384        }
385    
386        /**
387         * Reads a line of text.  A line is considered to be terminated by any one
388         * of a line feed ('\n'), a carriage return ('\r'), or a carriage return
389         * followed immediately by a linefeed.
390         *
391         * @return     A String containing the contents of the line, not including
392         *             any line-termination characters, or null if the end of the
393         *             stream has been reached
394         *
395         * @exception  IOException  If an I/O error occurs
396         */
397        public String readLine() throws IOException {
398            String line = readLine(false);
399            if (line != null) {
400                bytesRead = bytesRead + line.length();
401            }
402            return line;
403        }
404    
405        /**
406         * Skips characters.
407         *
408         * @param  n  The number of characters to skip
409         *
410         * @return    The number of characters actually skipped
411         *
412         * @exception  IllegalArgumentException  If <code>n</code> is negative.
413         * @exception  IOException  If an I/O error occurs
414         */
415        public long skip(long n) throws IOException {
416            if (n < 0L) {
417                throw new IllegalArgumentException("skip value is negative");
418            }
419            synchronized (lock) {
420                ensureOpen();
421                long r = n;
422                while (r > 0) {
423                    if (nextChar >= nChars) {
424                        fill();
425                    }
426                    if (nextChar >= nChars) /* EOF */ {
427                        break;
428                    }
429                    if (skipLF) {
430                        skipLF = false;
431                        if (cb[nextChar] == '\n') {
432                            nextChar++;
433                        }
434                    }
435                    long d = nChars - nextChar;
436                    if (r <= d) {
437                        nextChar += r;
438                        r = 0;
439                        break;
440                    } else {
441                        r -= d;
442                        nextChar = nChars;
443                    }
444                }
445                bytesRead = bytesRead + (n - r);
446                return n - r;
447            }
448        }
449    
450        /**
451         * Tells whether this stream is ready to be read.  A buffered character
452         * stream is ready if the buffer is not empty, or if the underlying
453         * character stream is ready.
454         *
455         * @exception  IOException  If an I/O error occurs
456         */
457        public boolean ready() throws IOException {
458            synchronized (lock) {
459                ensureOpen();
460    
461                /*
462                 * If newline needs to be skipped and the next char to be read
463                 * is a newline character, then just skip it right away.
464                 */
465                if (skipLF) {
466                    /* Note that in.ready() will return true if and only if the next
467                     * read on the stream will not block.
468                     */
469                    if (nextChar >= nChars && in.ready()) {
470                        fill();
471                    }
472                    if (nextChar < nChars) {
473                        if (cb[nextChar] == '\n') {
474                            nextChar++;
475                        }
476                        skipLF = false;
477                    }
478                }
479                return (nextChar < nChars) || in.ready();
480            }
481        }
482    
483        /**
484         * Tells whether this stream supports the mark() operation, which it does.
485         */
486        public boolean markSupported() {
487            return true;
488        }
489    
490        /**
491         * Marks the present position in the stream.  Subsequent calls to reset()
492         * will attempt to reposition the stream to this point.
493         *
494         * @param readAheadLimit   Limit on the number of characters that may be
495         *                         read while still preserving the mark. An attempt
496         *                         to reset the stream after reading characters
497         *                         up to this limit or beyond may fail.
498         *                         A limit value larger than the size of the input
499         *                         buffer will cause a new buffer to be allocated
500         *                         whose size is no smaller than limit.
501         *                         Therefore large values should be used with care.
502         *
503         * @exception  IllegalArgumentException  If readAheadLimit is < 0
504         * @exception  IOException  If an I/O error occurs
505         */
506        public void mark(int readAheadLimit) throws IOException {
507            if (readAheadLimit < 0) {
508                throw new IllegalArgumentException("Read-ahead limit < 0");
509            }
510            synchronized (lock) {
511                ensureOpen();
512                this.readAheadLimit = readAheadLimit;
513                markedChar = nextChar;
514                markedSkipLF = skipLF;
515            }
516        }
517    
518        /**
519         * Resets the stream to the most recent mark.
520         *
521         * @exception  IOException  If the stream has never been marked,
522         *                          or if the mark has been invalidated
523         */
524        public void reset() throws IOException {
525            synchronized (lock) {
526                ensureOpen();
527                if (markedChar < 0) {
528                    throw new IOException((markedChar == INVALIDATED)
529                            ? "Mark invalid"
530                            : "Stream not marked");
531                }
532                nextChar = markedChar;
533                skipLF = markedSkipLF;
534            }
535        }
536    
537        public void close() throws IOException {
538            synchronized (lock) {
539                if (in == null) {
540                    return;
541                }
542                in.close();
543                in = null;
544                cb = null;
545            }
546        }
547    }
548