001 /*
002 * To change this template, choose Tools | Templates
003 * and open the template in the editor.
004 */
005 package org.biojava3.core.sequence.io;
006
007 import java.io.FileReader;
008 import java.io.IOException;
009 import java.io.InputStreamReader;
010 import java.io.Reader;
011
012 /**
013 * Need to keep track of actual bytes read and take advantage of buffered reader
014 * performance. Took java source for BufferedReader and added BytesRead functionality
015 * @author Scooter Willis <willishf at gmail dot com>
016 *
017 * Reads text from a character-input stream, buffering characters so as to
018 * provide for the efficient reading of characters, arrays, and lines.
019 *
020 * <p> The buffer size may be specified, or the default size may be used. The
021 * default is large enough for most purposes.
022 *
023 * <p> In general, each read request made of a Reader causes a corresponding
024 * read request to be made of the underlying character or byte stream. It is
025 * therefore advisable to wrap a BufferedReaderBytesRead around any Reader whose read()
026 * operations may be costly, such as FileReaders and InputStreamReaders. For
027 * example,
028 *
029 * <pre>
030 * BufferedReaderBytesRead in
031 * = new BufferedReaderBytesRead(new FileReader("foo.in"));
032 * </pre>
033 *
034 * will buffer the input from the specified file. Without buffering, each
035 * invocation of read() or readLine() could cause bytes to be read from the
036 * file, converted into characters, and then returned, which can be very
037 * inefficient.
038 *
039 * <p> Programs that use DataInputStreams for textual input can be localized by
040 * replacing each DataInputStream with an appropriate BufferedReaderBytesRead.
041 *
042 * @see FileReader
043 * @see InputStreamReader
044 *
045 * @version 1.37, 06/03/15
046 * @author Mark Reinhold
047 * @since JDK1.1
048 */
049 public class BufferedReaderBytesRead extends Reader {
050
051 private Reader in;
052 private char cb[];
053 private int nChars, nextChar;
054 private static final int INVALIDATED = -2;
055 private static final int UNMARKED = -1;
056 private int markedChar = UNMARKED;
057 private int readAheadLimit = 0; /* Valid only when markedChar > 0 */
058
059 /** If the next character is a line feed, skip it */
060 private boolean skipLF = false;
061 /** The skipLF flag when the mark was set */
062 private boolean markedSkipLF = false;
063 private static int defaultCharBufferSize = 8192;
064 private static int defaultExpectedLineLength = 80;
065 long bytesRead = 0;
066
067 /**
068 * Creates a buffering character-input stream that uses an input buffer of
069 * the specified size.
070 *
071 * @param in A Reader
072 * @param sz Input-buffer size
073 *
074 * @exception IllegalArgumentException If sz is <= 0
075 */
076 public BufferedReaderBytesRead(Reader in, int sz) {
077 super(in);
078 if (sz <= 0) {
079 throw new IllegalArgumentException("Buffer size <= 0");
080 }
081 this.in = in;
082 cb = new char[sz];
083 nextChar = nChars = 0;
084 }
085
086 /**
087 * Creates a buffering character-input stream that uses a default-sized
088 * input buffer.
089 *
090 * @param in A Reader
091 */
092 public BufferedReaderBytesRead(Reader in) {
093 this(in, defaultCharBufferSize);
094 }
095
096 /**
097 * Keep track of bytesread via ReadLine to account for CR-LF in the stream. Does not keep track of position if
098 * use methods other than ReadLine.
099 * //TODO should override other methods and throw exception or keep track of bytes read
100 * @return
101 */
102 public long getBytesRead() {
103 return bytesRead;
104 }
105
106 /** Checks to make sure that the stream has not been closed */
107 private void ensureOpen() throws IOException {
108 if (in == null) {
109 throw new IOException("Stream closed");
110 }
111 }
112
113 /**
114 * Fills the input buffer, taking the mark into account if it is valid.
115 */
116 private void fill() throws IOException {
117 int dst;
118 if (markedChar <= UNMARKED) {
119 /* No mark */
120 dst = 0;
121 } else {
122 /* Marked */
123 int delta = nextChar - markedChar;
124 if (delta >= readAheadLimit) {
125 /* Gone past read-ahead limit: Invalidate mark */
126 markedChar = INVALIDATED;
127 readAheadLimit = 0;
128 dst = 0;
129 } else {
130 if (readAheadLimit <= cb.length) {
131 /* Shuffle in the current buffer */
132 System.arraycopy(cb, markedChar, cb, 0, delta);
133 markedChar = 0;
134 dst = delta;
135 } else {
136 /* Reallocate buffer to accommodate read-ahead limit */
137 char ncb[] = new char[readAheadLimit];
138 System.arraycopy(cb, markedChar, ncb, 0, delta);
139 cb = ncb;
140 markedChar = 0;
141 dst = delta;
142 }
143 nextChar = nChars = delta;
144 }
145 }
146
147 int n;
148 do {
149 n = in.read(cb, dst, cb.length - dst);
150 } while (n == 0);
151 if (n > 0) {
152 nChars = dst + n;
153 nextChar = dst;
154 }
155 }
156
157 /**
158 * Reads a single character.
159 *
160 * @return The character read, as an integer in the range
161 * 0 to 65535 (<tt>0x00-0xffff</tt>), or -1 if the
162 * end of the stream has been reached
163 * @exception IOException If an I/O error occurs
164 */
165 public int read() throws IOException {
166 synchronized (lock) {
167 ensureOpen();
168 for (;;) {
169 if (nextChar >= nChars) {
170 fill();
171 if (nextChar >= nChars) {
172 return -1;
173 }
174 }
175 if (skipLF) {
176 skipLF = false;
177 if (cb[nextChar] == '\n') {
178 bytesRead++;
179 nextChar++;
180 continue;
181 }
182 }
183 bytesRead++;
184 return cb[nextChar++];
185 }
186 }
187 }
188
189 /**
190 * Reads characters into a portion of an array, reading from the underlying
191 * stream if necessary.
192 */
193 private int read1(char[] cbuf, int off, int len) throws IOException {
194 if (nextChar >= nChars) {
195 /* If the requested length is at least as large as the buffer, and
196 if there is no mark/reset activity, and if line feeds are not
197 being skipped, do not bother to copy the characters into the
198 local buffer. In this way buffered streams will cascade
199 harmlessly. */
200 if (len >= cb.length && markedChar <= UNMARKED && !skipLF) {
201 return in.read(cbuf, off, len);
202 }
203 fill();
204 }
205 if (nextChar >= nChars) {
206 return -1;
207 }
208 if (skipLF) {
209 skipLF = false;
210 if (cb[nextChar] == '\n') {
211 nextChar++;
212 if (nextChar >= nChars) {
213 fill();
214 }
215 if (nextChar >= nChars) {
216 return -1;
217 }
218 }
219 }
220 int n = Math.min(len, nChars - nextChar);
221 System.arraycopy(cb, nextChar, cbuf, off, n);
222 nextChar += n;
223 return n;
224 }
225
226 /**
227 * Reads characters into a portion of an array.
228 *
229 * <p> This method implements the general contract of the corresponding
230 * <code>{@link Reader#read(char[], int, int) read}</code> method of the
231 * <code>{@link Reader}</code> class. As an additional convenience, it
232 * attempts to read as many characters as possible by repeatedly invoking
233 * the <code>read</code> method of the underlying stream. This iterated
234 * <code>read</code> continues until one of the following conditions becomes
235 * true: <ul>
236 *
237 * <li> The specified number of characters have been read,
238 *
239 * <li> The <code>read</code> method of the underlying stream returns
240 * <code>-1</code>, indicating end-of-file, or
241 *
242 * <li> The <code>ready</code> method of the underlying stream
243 * returns <code>false</code>, indicating that further input requests
244 * would block.
245 *
246 * </ul> If the first <code>read</code> on the underlying stream returns
247 * <code>-1</code> to indicate end-of-file then this method returns
248 * <code>-1</code>. Otherwise this method returns the number of characters
249 * actually read.
250 *
251 * <p> Subclasses of this class are encouraged, but not required, to
252 * attempt to read as many characters as possible in the same fashion.
253 *
254 * <p> Ordinarily this method takes characters from this stream's character
255 * buffer, filling it from the underlying stream as necessary. If,
256 * however, the buffer is empty, the mark is not valid, and the requested
257 * length is at least as large as the buffer, then this method will read
258 * characters directly from the underlying stream into the given array.
259 * Thus redundant <code>BufferedReaderBytesRead</code>s will not copy data
260 * unnecessarily.
261 *
262 * @param cbuf Destination buffer
263 * @param off Offset at which to start storing characters
264 * @param len Maximum number of characters to read
265 *
266 * @return The number of characters read, or -1 if the end of the
267 * stream has been reached
268 *
269 * @exception IOException If an I/O error occurs
270 */
271 public int read(char cbuf[], int off, int len) throws IOException {
272 synchronized (lock) {
273 ensureOpen();
274 if ((off < 0) || (off > cbuf.length) || (len < 0)
275 || ((off + len) > cbuf.length) || ((off + len) < 0)) {
276 throw new IndexOutOfBoundsException();
277 } else if (len == 0) {
278 return 0;
279 }
280
281 int n = read1(cbuf, off, len);
282 if (n <= 0) {
283 return n;
284 }
285 while ((n < len) && in.ready()) {
286 int n1 = read1(cbuf, off + n, len - n);
287 if (n1 <= 0) {
288 break;
289 }
290 n += n1;
291 }
292 bytesRead = bytesRead + n;
293 return n;
294 }
295 }
296
297 /**
298 * Reads a line of text. A line is considered to be terminated by any one
299 * of a line feed ('\n'), a carriage return ('\r'), or a carriage return
300 * followed immediately by a linefeed.
301 *
302 * @param ignoreLF If true, the next '\n' will be skipped
303 *
304 * @return A String containing the contents of the line, not including
305 * any line-termination characters, or null if the end of the
306 * stream has been reached
307 *
308 * @see java.io.LineNumberReader#readLine()
309 *
310 * @exception IOException If an I/O error occurs
311 */
312 @SuppressWarnings("unused")
313 private String readLine(boolean ignoreLF) throws IOException {
314 StringBuffer s = null;
315 int startChar;
316
317 synchronized (lock) {
318 ensureOpen();
319 boolean omitLF = ignoreLF || skipLF;
320
321 bufferLoop:
322 for (;;) {
323
324 if (nextChar >= nChars) {
325 fill();
326 }
327 if (nextChar >= nChars) { /* EOF */
328 if (s != null && s.length() > 0) {
329
330 return s.toString();
331 } else {
332 return null;
333 }
334 }
335 boolean eol = false;
336 char c = 0;
337 int i;
338
339 /* Skip a leftover '\n', if necessary */
340 if (omitLF && (cb[nextChar] == '\n')) {
341 nextChar++;
342 bytesRead++;
343 }
344 skipLF = false;
345 omitLF = false;
346
347 charLoop:
348 for (i = nextChar; i < nChars; i++) {
349 c = cb[i];
350 if ((c == '\n') || (c == '\r')) {
351 bytesRead++;
352 eol = true;
353 break charLoop;
354 }
355 }
356
357 startChar = nextChar;
358 nextChar = i;
359
360 if (eol) {
361 String str;
362 if (s == null) {
363 str = new String(cb, startChar, i - startChar);
364 } else {
365 s.append(cb, startChar, i - startChar);
366 str = s.toString();
367 }
368 nextChar++;
369 if (c == '\r') {
370 bytesRead++;
371 skipLF = true;
372 }
373
374 return str;
375 }
376
377 if (s == null) {
378 s = new StringBuffer(defaultExpectedLineLength);
379 }
380 s.append(cb, startChar, i - startChar);
381
382 }
383 }
384 }
385
386 /**
387 * Reads a line of text. A line is considered to be terminated by any one
388 * of a line feed ('\n'), a carriage return ('\r'), or a carriage return
389 * followed immediately by a linefeed.
390 *
391 * @return A String containing the contents of the line, not including
392 * any line-termination characters, or null if the end of the
393 * stream has been reached
394 *
395 * @exception IOException If an I/O error occurs
396 */
397 public String readLine() throws IOException {
398 String line = readLine(false);
399 if (line != null) {
400 bytesRead = bytesRead + line.length();
401 }
402 return line;
403 }
404
405 /**
406 * Skips characters.
407 *
408 * @param n The number of characters to skip
409 *
410 * @return The number of characters actually skipped
411 *
412 * @exception IllegalArgumentException If <code>n</code> is negative.
413 * @exception IOException If an I/O error occurs
414 */
415 public long skip(long n) throws IOException {
416 if (n < 0L) {
417 throw new IllegalArgumentException("skip value is negative");
418 }
419 synchronized (lock) {
420 ensureOpen();
421 long r = n;
422 while (r > 0) {
423 if (nextChar >= nChars) {
424 fill();
425 }
426 if (nextChar >= nChars) /* EOF */ {
427 break;
428 }
429 if (skipLF) {
430 skipLF = false;
431 if (cb[nextChar] == '\n') {
432 nextChar++;
433 }
434 }
435 long d = nChars - nextChar;
436 if (r <= d) {
437 nextChar += r;
438 r = 0;
439 break;
440 } else {
441 r -= d;
442 nextChar = nChars;
443 }
444 }
445 bytesRead = bytesRead + (n - r);
446 return n - r;
447 }
448 }
449
450 /**
451 * Tells whether this stream is ready to be read. A buffered character
452 * stream is ready if the buffer is not empty, or if the underlying
453 * character stream is ready.
454 *
455 * @exception IOException If an I/O error occurs
456 */
457 public boolean ready() throws IOException {
458 synchronized (lock) {
459 ensureOpen();
460
461 /*
462 * If newline needs to be skipped and the next char to be read
463 * is a newline character, then just skip it right away.
464 */
465 if (skipLF) {
466 /* Note that in.ready() will return true if and only if the next
467 * read on the stream will not block.
468 */
469 if (nextChar >= nChars && in.ready()) {
470 fill();
471 }
472 if (nextChar < nChars) {
473 if (cb[nextChar] == '\n') {
474 nextChar++;
475 }
476 skipLF = false;
477 }
478 }
479 return (nextChar < nChars) || in.ready();
480 }
481 }
482
483 /**
484 * Tells whether this stream supports the mark() operation, which it does.
485 */
486 public boolean markSupported() {
487 return true;
488 }
489
490 /**
491 * Marks the present position in the stream. Subsequent calls to reset()
492 * will attempt to reposition the stream to this point.
493 *
494 * @param readAheadLimit Limit on the number of characters that may be
495 * read while still preserving the mark. An attempt
496 * to reset the stream after reading characters
497 * up to this limit or beyond may fail.
498 * A limit value larger than the size of the input
499 * buffer will cause a new buffer to be allocated
500 * whose size is no smaller than limit.
501 * Therefore large values should be used with care.
502 *
503 * @exception IllegalArgumentException If readAheadLimit is < 0
504 * @exception IOException If an I/O error occurs
505 */
506 public void mark(int readAheadLimit) throws IOException {
507 if (readAheadLimit < 0) {
508 throw new IllegalArgumentException("Read-ahead limit < 0");
509 }
510 synchronized (lock) {
511 ensureOpen();
512 this.readAheadLimit = readAheadLimit;
513 markedChar = nextChar;
514 markedSkipLF = skipLF;
515 }
516 }
517
518 /**
519 * Resets the stream to the most recent mark.
520 *
521 * @exception IOException If the stream has never been marked,
522 * or if the mark has been invalidated
523 */
524 public void reset() throws IOException {
525 synchronized (lock) {
526 ensureOpen();
527 if (markedChar < 0) {
528 throw new IOException((markedChar == INVALIDATED)
529 ? "Mark invalid"
530 : "Stream not marked");
531 }
532 nextChar = markedChar;
533 skipLF = markedSkipLF;
534 }
535 }
536
537 public void close() throws IOException {
538 synchronized (lock) {
539 if (in == null) {
540 return;
541 }
542 in.close();
543 in = null;
544 cb = null;
545 }
546 }
547 }
548