001 /*
002 * BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence. This should
006 * be distributed with the code. If you do not have a copy,
007 * see:
008 *
009 * http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors. These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 * http://www.biojava.org/
019 *
020 * Created on 01-21-2010
021 */
022 package org.biojava3.core.sequence.io.util;
023
024 import java.io.BufferedReader;
025 import java.io.Closeable;
026 import java.io.File;
027 import java.io.FileInputStream;
028 import java.io.IOException;
029 import java.io.InputStream;
030 import java.io.InputStreamReader;
031 import java.io.OutputStream;
032 import java.util.ArrayList;
033 import java.util.List;
034 import java.util.logging.Level;
035 import java.util.logging.Logger;
036 import java.util.zip.GZIPInputStream;
037
038 import org.biojava3.core.exceptions.ParserException;
039 import org.biojava3.core.sequence.compound.AmbiguityDNACompoundSet;
040 import org.biojava3.core.sequence.compound.AmbiguityRNACompoundSet;
041 import org.biojava3.core.sequence.compound.DNACompoundSet;
042 import org.biojava3.core.sequence.compound.RNACompoundSet;
043 import org.biojava3.core.sequence.template.Compound;
044 import org.biojava3.core.sequence.template.CompoundSet;
045 import org.biojava3.core.sequence.template.Sequence;
046
047 public class IOUtils {
048
049 private static final int BUFFER = 4096;
050
051 /**
052 * Closes any Object which implements the interface {@link Closeable} and
053 * sending any error to the logger but not forcing any explicit catching of
054 * stream errors.
055 *
056 * @param c The stream to close
057 */
058 public static void close(Closeable c) {
059 try {
060 if (c != null) {
061 c.close();
062 }
063 } catch (IOException e) {
064 Logger log = Logger.getLogger(IOUtils.class.getName());
065 log.log(Level.WARNING, "Cannot close down the given Closeable object", e);
066 }
067 }
068
069 /**
070 * Moves the bytes from input to output using a 4KB byte array.
071 *
072 * @param input Input stream of bytes
073 * @param output Output stream of bytes
074 * @throws IOException If anything occurs in the case of the reads and writes
075 */
076 public static void copy(InputStream input, OutputStream output)
077 throws IOException {
078 byte[] buffer = new byte[BUFFER];
079 int n = 0;
080 while (-1 != (n = input.read(buffer))) {
081 output.write(buffer, 0, n);
082 }
083 }
084
085 /**
086 * Takes in a reader and a processor, reads every line from the given
087 * file and then invokes the processor. What you do with the lines is
088 * dependent on your processor.
089 *
090 * The code will automatically close the given BufferedReader.
091 *
092 * @param br The reader to process
093 * @param processor The processor to invoke on all lines
094 * @throws ParserException Can throw this if we cannot parse the given reader
095 */
096 public static void processReader(BufferedReader br, ReaderProcessor processor) throws ParserException {
097 String line;
098 try {
099 while( (line = br.readLine()) != null ) {
100 processor.process(line);
101 }
102 }
103 catch(IOException e) {
104 throw new ParserException("Could not read from the given BufferedReader");
105 }
106 finally {
107 close(br);
108 }
109 }
110
111 /**
112 * Returns the contents of a buffered reader as a list of strings
113 *
114 * @param br BufferedReader to read from; <strong>will be closed</strong>
115 * @return List of Strings
116 * @throws ParserException Can throw this if we cannot parse the given reader
117 */
118 public static List<String> getList(BufferedReader br) throws ParserException {
119 final List<String> list = new ArrayList<String>();
120 processReader(br, new ReaderProcessor() {
121 public void process(String line) {
122 list.add(line);
123 }
124 });
125 return list;
126 }
127
128 /**
129 * Delegates to {@link #getList(BufferedReader)} by wrapping the InputStream
130 * in a valid reader. No encoding is mentioned so if you need anything
131 * more advanced then use the other version of this method.
132 *
133 * @param is InputStream which is a text file
134 * @return List of Strings representing the lines of the files
135 * @throws ParserException Can throw this if the file is not a file or we
136 * cannot parse it
137 */
138 public static List<String> getList(InputStream is) throws ParserException {
139 return getList(new BufferedReader(new InputStreamReader(is)));
140 }
141
142 /**
143 * Delegates to {@link #getList(InputStream)} by wrapping the File
144 * in a valid stream. No encoding is mentioned so if you need anything
145 * more advanced then use the other version of this method. Since this
146 * uses {@link #openFile(File)} this code can support GZipped and plain
147 * files.
148 *
149 * @param file File which is a text file
150 * @return List of Strings representing the lines of the files
151 * @throws ParserException Can throw this if the file is not a file or we
152 * cannot parse it
153 */
154 public static List<String> getList(File file) throws ParserException {
155 return getList(openFile(file));
156 }
157
158 /**
159 * For a filename this code will check the extension of the file for a
160 * .gz extension. If it finds one then the InputStream given back
161 * is a {@link GZIPInputStream}. Otherwise we return a normal
162 * {@link FileInputStream}.
163 *
164 * @param file File which may or may not be GZipped
165 * @return The final stream
166 * @throws ParserException Can throw this if the file is not a file or we
167 * cannot open it for processing
168 */
169 public static InputStream openFile(File file) throws ParserException {
170 final InputStream is;
171 if(!file.isFile()) {
172 throw new ParserException("The file "+file+" is not a file.");
173 }
174 String name = file.getName();
175 try {
176 if(name.endsWith(".gz")) {
177 is = new GZIPInputStream(new FileInputStream(file));
178 }
179 else {
180 is = new FileInputStream(file);
181 }
182 }
183 catch(IOException e) {
184 throw new ParserException("Cannot open "+file+" for processing", e);
185 }
186 return is;
187 }
188
189 /**
190 * Closure interface used when working with
191 * {@link IOUtils#processReader(String)}. Each time a line is encountered
192 * the object that implements this interface will be invoked.
193 *
194 * @author ayates
195 */
196 public static interface ReaderProcessor {
197 void process(String line) throws IOException;
198 }
199
200 /**
201 * Calculates GCG checksum for entire list of sequences
202 *
203 * @param sequences list of sequences
204 * @return GCG checksum
205 */
206 public static <S extends Sequence<C>, C extends Compound> int getGCGChecksum(List<S> sequences) {
207 int check = 0;
208 for (S as : sequences) {
209 check += getGCGChecksum(as);
210 }
211 return check % 10000;
212 }
213
214 /**
215 * Calculates GCG checksum for a given sequence
216 *
217 * @param sequence given sequence
218 * @return GCG checksum
219 */
220 public static <S extends Sequence<C>, C extends Compound> int getGCGChecksum(S sequence) {
221 String s = sequence.toString().toUpperCase();
222 int count = 0, check = 0;
223 for (int i = 0; i < s.length(); i++) {
224 count++;
225 check += count * s.charAt(i);
226 if (count == 57) {
227 count = 0;
228 }
229 }
230 return check % 10000;
231 }
232
233 /**
234 * Assembles a GCG file header
235 *
236 * @param sequences list of sequences
237 * @return GCG header
238 */
239 public static <S extends Sequence<C>, C extends Compound> String getGCGHeader(List<S> sequences) {
240 StringBuilder header = new StringBuilder();
241 S s1 = sequences.get(0);
242 header.append(String.format("MSA from BioJava%n%n MSF: %d Type: %s Check: %d ..%n%n",
243 s1.getLength(), getGCGType(s1.getCompoundSet()), getGCGChecksum(sequences)));
244 String format = " Name: " + getIDFormat(sequences) + " Len: " + s1.getLength() + " Check: %4d Weight: 1.0%n";
245 for (S as : sequences) {
246 header.append(String.format(format, as.getAccession(), getGCGChecksum(as)));
247 // TODO show weights in MSF header
248 }
249 header.append(String.format("%n//%n%n"));
250 // TODO? convert gap characters to '.'
251 return header.toString();
252 }
253
254 /**
255 * Determines GCG type
256 * @param cs compound set of sequences
257 * @return GCG type
258 */
259 public static <C extends Compound> String getGCGType(CompoundSet<C> cs) {
260 return (cs == DNACompoundSet.getDNACompoundSet() || cs == AmbiguityDNACompoundSet.getDNACompoundSet()) ? "D" :
261 (cs == RNACompoundSet.getRNACompoundSet() || cs == AmbiguityRNACompoundSet.getRNACompoundSet()) ? "R" : "P";
262 }
263
264 /**
265 * Creates format String for accession IDs
266 *
267 * @param sequences list of sequences
268 * @return format String for accession IDs
269 */
270 public static <S extends Sequence<C>, C extends Compound> String getIDFormat(List<S> sequences) {
271 int length = 0;
272 for (S as : sequences) {
273 length = Math.max(length, (as.getAccession() == null) ? 0 : as.getAccession().toString().length());
274 }
275 return (length == 0) ? null : "%-" + (length + 1) + "s";
276 }
277
278 /**
279 * Creates formatted String for a single character of PDB output
280 *
281 * @param web true for HTML display
282 * @param c1 character in first sequence
283 * @param c2 character in second sequence
284 * @param similar true if c1 and c2 are considered similar compounds
285 * @param c character to display
286 * @return formatted String
287 */
288 public static String getPDBCharacter(boolean web, char c1, char c2, boolean similar, char c) {
289 String s = c + "";
290 return getPDBString(web, c1, c2, similar, s, s, s, s);
291 }
292
293 /**
294 * Creates formatted String for displaying conservation in PDB output
295 *
296 * @param web true for HTML display
297 * @param c1 character in first sequence
298 * @param c2 character in second sequence
299 * @param similar true if c1 and c2 are considered similar compounds
300 * @return formatted String
301 */
302 public static String getPDBConservation(boolean web, char c1, char c2, boolean similar) {
303 return getPDBString(web, c1, c2, similar, "|", ".", " ", web ? " " : " ");
304 }
305
306 // helper method for getPDBCharacter and getPDBConservation
307 private static String getPDBString(boolean web, char c1, char c2, boolean similar, String m, String sm, String dm,
308 String qg) {
309 if (c1 == c2)
310 return web ? "<span class=\"m\">" + m + "</span>" : m;
311 else if (similar)
312 return web ? "<span class=\"sm\">" + sm + "</span>" : sm;
313 else if (c1 == '-' || c2 == '-')
314 return web ? "<span class=\"dm\">" + dm + "</span>" : dm;
315 else
316 return web ? "<span class=\"qg\">" + qg + "</span>" : qg;
317 }
318
319 /**
320 * Creates formatted String for displaying conservation legend in PDB output
321 *
322 * @return legend String
323 */
324 public static String getPDBLegend() {
325 StringBuilder s = new StringBuilder();
326 s.append("</pre></div>");
327 s.append(" <div class=\"subText\">");
328 s.append(" <b>Legend:</b>");
329 s.append(" <span class=\"m\">Black</span> - identical residues |");
330 s.append(" <span class=\"sm\">Pink</span> - similar residues | ");
331 s.append(" <span class=\"qg\">Blue</span> - sequence mismatch |");
332 s.append(" <span class=\"dm\">Brown</span> - insertion/deletion |");
333 s.append(" </div>");
334 s.append(String.format("%n"));
335 return s.toString();
336 }
337
338 }