001    /*
002     *                    BioJava development code
003     *
004     * This code may be freely distributed and modified under the
005     * terms of the GNU Lesser General Public Licence.  This should
006     * be distributed with the code.  If you do not have a copy,
007     * see:
008     *
009     *      http://www.gnu.org/copyleft/lesser.html
010     *
011     * Copyright for this code is held jointly by the individual
012     * authors.  These should be listed in @author doc comments.
013     *
014     * For more information on the BioJava project and its aims,
015     * or to join the biojava-l mailing list, visit the home page
016     * at:
017     *
018     *      http://www.biojava.org/
019     *
020     * Created on 01-21-2010
021     */
022    package org.biojava3.core.sequence.io.util;
023    
024    import java.io.BufferedReader;
025    import java.io.Closeable;
026    import java.io.File;
027    import java.io.FileInputStream;
028    import java.io.IOException;
029    import java.io.InputStream;
030    import java.io.InputStreamReader;
031    import java.io.OutputStream;
032    import java.util.ArrayList;
033    import java.util.List;
034    import java.util.logging.Level;
035    import java.util.logging.Logger;
036    import java.util.zip.GZIPInputStream;
037    
038    import org.biojava3.core.exceptions.ParserException;
039    import org.biojava3.core.sequence.compound.AmbiguityDNACompoundSet;
040    import org.biojava3.core.sequence.compound.AmbiguityRNACompoundSet;
041    import org.biojava3.core.sequence.compound.DNACompoundSet;
042    import org.biojava3.core.sequence.compound.RNACompoundSet;
043    import org.biojava3.core.sequence.template.Compound;
044    import org.biojava3.core.sequence.template.CompoundSet;
045    import org.biojava3.core.sequence.template.Sequence;
046    
047    public class IOUtils {
048    
049      private static final int BUFFER = 4096;
050    
051      /**
052       * Closes any Object which implements the interface {@link Closeable} and
053       * sending any error to the logger but not forcing any explicit catching of
054       * stream errors.
055       *
056       * @param c The stream to close
057       */
058      public static void close(Closeable c) {
059        try {
060          if (c != null) {
061            c.close();
062          }
063        } catch (IOException e) {
064          Logger log = Logger.getLogger(IOUtils.class.getName());
065          log.log(Level.WARNING, "Cannot close down the given Closeable object", e);
066        }
067      }
068    
069      /**
070       * Moves the bytes from input to output using a 4KB byte array.
071       *
072       * @param input Input stream of bytes
073       * @param output Output stream of bytes
074       * @throws IOException If anything occurs in the case of the reads and writes
075       */
076      public static void copy(InputStream input, OutputStream output)
077          throws IOException {
078        byte[] buffer = new byte[BUFFER];
079        int n = 0;
080        while (-1 != (n = input.read(buffer))) {
081          output.write(buffer, 0, n);
082        }
083      }
084    
085      /**
086       * Takes in a reader and a processor, reads every line from the given
087       * file and then invokes the processor. What you do with the lines is
088       * dependent on your processor.
089       *
090       * The code will automatically close the given BufferedReader.
091       *
092       * @param br The reader to process
093       * @param processor The processor to invoke on all lines
094       * @throws ParserException Can throw this if we cannot parse the given reader
095       */
096      public static void processReader(BufferedReader br, ReaderProcessor processor) throws ParserException {
097        String line;
098        try {
099          while( (line = br.readLine()) != null ) {
100            processor.process(line);
101          }
102        }
103        catch(IOException e) {
104          throw new ParserException("Could not read from the given BufferedReader");
105        }
106        finally {
107          close(br);
108        }
109      }
110    
111      /**
112       * Returns the contents of a buffered reader as a list of strings
113       *
114       * @param br BufferedReader to read from; <strong>will be closed</strong>
115       * @return List of Strings
116       * @throws ParserException Can throw this if we cannot parse the given reader
117       */
118      public static List<String> getList(BufferedReader br) throws ParserException {
119        final List<String> list = new ArrayList<String>();
120        processReader(br, new ReaderProcessor() {
121          public void process(String line) {
122            list.add(line);
123          }
124        });
125        return list;
126      }
127    
128      /**
129       * Delegates to {@link #getList(BufferedReader)} by wrapping the InputStream
130       * in a valid reader. No encoding is mentioned so if you need anything
131       * more advanced then use the other version of this method.
132       *
133       * @param is InputStream which is a text file
134       * @return List of Strings representing the lines of the files
135       * @throws ParserException Can throw this if the file is not a file or we
136       * cannot parse it
137       */
138      public static List<String> getList(InputStream is) throws ParserException {
139        return getList(new BufferedReader(new InputStreamReader(is)));
140      }
141    
142      /**
143       * Delegates to {@link #getList(InputStream)} by wrapping the File
144       * in a valid stream. No encoding is mentioned so if you need anything
145       * more advanced then use the other version of this method. Since this
146       * uses {@link #openFile(File)} this code can support GZipped and plain
147       * files.
148       *
149       * @param file File which is a text file
150       * @return List of Strings representing the lines of the files
151       * @throws ParserException Can throw this if the file is not a file or we
152       * cannot parse it
153       */
154      public static List<String> getList(File file) throws ParserException {
155        return getList(openFile(file));
156      }
157    
158      /**
159       * For a filename this code will check the extension of the file for a
160       * .gz extension. If it finds one then the InputStream given back
161       * is a {@link GZIPInputStream}. Otherwise we return a normal
162       * {@link FileInputStream}.
163       *
164       * @param file File which may or may not be GZipped
165       * @return The final stream
166       * @throws ParserException Can throw this if the file is not a file or we
167       * cannot open it for processing
168       */
169      public static InputStream openFile(File file) throws ParserException {
170        final InputStream is;
171        if(!file.isFile()) {
172          throw new ParserException("The file "+file+" is not a file.");
173        }
174        String name = file.getName();
175        try {
176          if(name.endsWith(".gz")) {
177            is = new GZIPInputStream(new FileInputStream(file));
178          }
179          else {
180            is = new FileInputStream(file);
181          }
182        }
183        catch(IOException e) {
184          throw new ParserException("Cannot open "+file+" for processing", e);
185        }
186        return is;
187      }
188    
189      /**
190       * Closure interface used when working with
191       * {@link IOUtils#processReader(String)}. Each time a line is encountered
192       * the object that implements this interface will be invoked.
193       *
194       * @author ayates
195       */
196      public static interface ReaderProcessor {
197        void process(String line) throws IOException;
198      }
199    
200      /**
201       * Calculates GCG checksum for entire list of sequences
202       *
203       * @param sequences list of sequences
204       * @return GCG checksum
205       */
206      public static <S extends Sequence<C>, C extends Compound> int getGCGChecksum(List<S> sequences) {
207          int check = 0;
208          for (S as : sequences) {
209              check += getGCGChecksum(as);
210          }
211          return check % 10000;
212      }
213    
214      /**
215       * Calculates GCG checksum for a given sequence
216       *
217       * @param sequence given sequence
218       * @return GCG checksum
219       */
220      public static <S extends Sequence<C>, C extends Compound> int getGCGChecksum(S sequence) {
221          String s = sequence.toString().toUpperCase();
222          int count = 0, check = 0;
223          for (int i = 0; i < s.length(); i++) {
224              count++;
225              check += count * s.charAt(i);
226              if (count == 57) {
227                  count = 0;
228              }
229          }
230          return check % 10000;
231      }
232    
233      /**
234       * Assembles a GCG file header
235       *
236       * @param sequences list of sequences
237       * @return GCG header
238       */
239      public static <S extends Sequence<C>, C extends Compound> String getGCGHeader(List<S> sequences) {
240          StringBuilder header = new StringBuilder();
241          S s1 = sequences.get(0);
242          header.append(String.format("MSA from BioJava%n%n MSF: %d  Type: %s  Check: %d ..%n%n",
243                  s1.getLength(), getGCGType(s1.getCompoundSet()), getGCGChecksum(sequences)));
244          String format = " Name: " + getIDFormat(sequences) + " Len: " + s1.getLength() + "  Check: %4d  Weight: 1.0%n";
245          for (S as : sequences) {
246              header.append(String.format(format, as.getAccession(), getGCGChecksum(as)));
247              // TODO show weights in MSF header
248          }
249          header.append(String.format("%n//%n%n"));
250          // TODO? convert gap characters to '.'
251          return header.toString();
252      }
253    
254      /**
255       * Determines GCG type
256       * @param cs compound set of sequences
257       * @return GCG type
258       */
259      public static <C extends Compound> String getGCGType(CompoundSet<C> cs) {
260          return (cs == DNACompoundSet.getDNACompoundSet() || cs == AmbiguityDNACompoundSet.getDNACompoundSet()) ? "D" :
261              (cs == RNACompoundSet.getRNACompoundSet() || cs == AmbiguityRNACompoundSet.getRNACompoundSet()) ? "R" : "P";
262      }
263    
264      /**
265       * Creates format String for accession IDs
266       *
267       * @param sequences list of sequences
268       * @return format String for accession IDs
269       */
270      public static <S extends Sequence<C>, C extends Compound> String getIDFormat(List<S> sequences) {
271          int length = 0;
272          for (S as : sequences) {
273              length = Math.max(length, (as.getAccession() == null) ? 0 : as.getAccession().toString().length());
274          }
275          return (length == 0) ? null : "%-" + (length + 1) + "s";
276      }
277    
278      /**
279       * Creates formatted String for a single character of PDB output
280       *
281       * @param web true for HTML display
282       * @param c1 character in first sequence
283       * @param c2 character in second sequence
284       * @param similar true if c1 and c2 are considered similar compounds
285       * @param c character to display
286       * @return formatted String
287       */
288      public static String getPDBCharacter(boolean web, char c1, char c2, boolean similar, char c) {
289          String s = c + "";
290          return getPDBString(web, c1, c2, similar, s, s, s, s);
291      }
292    
293      /**
294       * Creates formatted String for displaying conservation in PDB output
295       *
296       * @param web true for HTML display
297       * @param c1 character in first sequence
298       * @param c2 character in second sequence
299       * @param similar true if c1 and c2 are considered similar compounds
300       * @return formatted String
301       */
302      public static String getPDBConservation(boolean web, char c1, char c2, boolean similar) {
303          return getPDBString(web, c1, c2, similar, "|", ".", " ", web ? "&nbsp;" : " ");
304      }
305    
306      // helper method for getPDBCharacter and getPDBConservation
307      private static String getPDBString(boolean web, char c1, char c2, boolean similar, String m, String sm, String dm,
308              String qg) {
309          if (c1 == c2)
310              return web ? "<span class=\"m\">" + m + "</span>" : m;                             
311          else if (similar)
312              return web ? "<span class=\"sm\">" + sm + "</span>" : sm;
313          else if (c1 == '-' || c2 == '-')
314              return web ? "<span class=\"dm\">" + dm + "</span>" : dm;
315          else
316              return web ? "<span class=\"qg\">" + qg + "</span>" : qg;
317      }
318    
319      /**
320       * Creates formatted String for displaying conservation legend in PDB output
321       *
322       * @return legend String
323       */
324      public static String getPDBLegend() {
325          StringBuilder s = new StringBuilder();
326          s.append("</pre></div>");
327          s.append("          <div class=\"subText\">");
328          s.append("          <b>Legend:</b>");
329          s.append("          <span class=\"m\">Black</span> - identical residues |"); 
330          s.append("          <span class=\"sm\">Pink</span> - similar residues | ");
331          s.append("          <span class=\"qg\">Blue</span> - sequence mismatch |");
332          s.append("          <span class=\"dm\">Brown</span> - insertion/deletion |");                  
333          s.append("      </div>");
334          s.append(String.format("%n"));
335          return s.toString();
336      }
337    
338    }