001 /*
002 * BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence. This should
006 * be distributed with the code. If you do not have a copy,
007 * see:
008 *
009 * http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors. These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 * http://www.biojava.org/
019 *
020 * Created on 01-21-2010
021 */
022 package org.biojava3.core.sequence.io;
023
024 import java.io.FileInputStream;
025 import java.io.FileOutputStream;
026 import java.io.OutputStream;
027 import java.util.Collection;
028 import java.util.LinkedHashMap;
029
030 import org.biojava3.core.sequence.ProteinSequence;
031 import org.biojava3.core.sequence.compound.AminoAcidCompound;
032 import org.biojava3.core.sequence.compound.AminoAcidCompoundSet;
033 import org.biojava3.core.sequence.io.template.FastaHeaderFormatInterface;
034 import org.biojava3.core.sequence.template.Compound;
035 import org.biojava3.core.sequence.template.Sequence;
036
037 /**
038 * The FastaWriter writes a collection of sequences to an outputStream. FastaWriterHelper should be
039 * used to write out sequences. Each sequence loaded from a fasta file retains the original Fasta header
040 * and that is used when writing to the stream. This behavior can be overwritten by implementing
041 * a custom FastaHeaderFormatInterface.
042 *
043 * @author Scooter Willis <willishf at gmail dot com>
044 */
045 public class FastaWriter<S extends Sequence<?>, C extends Compound> {
046
047 OutputStream os;
048 Collection<S> sequences;
049 FastaHeaderFormatInterface<S, C> headerFormat;
050 private int lineLength = 60;
051 /**
052 * Use default line length of 60
053 * @param os
054 * @param sequences
055 * @param headerFormat
056 */
057 public FastaWriter(OutputStream os, Collection<S> sequences, FastaHeaderFormatInterface<S, C> headerFormat) {
058 this.os = os;
059 this.sequences = sequences;
060 this.headerFormat = headerFormat;
061 }
062
063 /**
064 * Set custom lineLength
065 * @param os
066 * @param sequences
067 * @param headerFormat
068 * @param lineLength
069 */
070
071 public FastaWriter(OutputStream os, Collection<S> sequences, FastaHeaderFormatInterface<S, C> headerFormat, int lineLength) {
072 this.os = os;
073 this.sequences = sequences;
074 this.headerFormat = headerFormat;
075 this.lineLength = lineLength;
076 }
077
078 public void process() throws Exception {
079
080 byte[] lineSep = System.getProperty("line.separator").getBytes();
081
082 for (S sequence : sequences) {
083 String header = headerFormat.getHeader(sequence);
084 os.write('>');
085 os.write(header.getBytes());
086 os.write(lineSep);
087
088 int compoundCount = 0;
089 String seq = "";
090
091 seq = sequence.getSequenceAsString();
092
093 for (int i = 0; i < seq.length(); i++) {
094 os.write(seq.charAt(i));
095 compoundCount++;
096 if (compoundCount == lineLength) {
097 os.write(lineSep);
098 compoundCount = 0;
099 }
100
101 }
102 // for(Compound c: sequence) {
103 // os.write(c.getShortName().getBytes());
104 // compoundCount++;
105 // if(compoundCount == lineLength) {
106 // os.write(lineSep);
107 // compoundCount = 0;
108 // }
109
110 // }
111
112 //If we had sequence which was a reciprocal of line length
113 //then don't write the line terminator as this has already written
114 //it
115 if ((sequence.getLength() % getLineLength()) != 0) {
116 os.write(lineSep);
117 }
118 }
119 }
120
121 public static void main(String[] args) {
122 try {
123 FileInputStream is = new FileInputStream("test.fasta");
124
125
126 FastaReader<ProteinSequence, AminoAcidCompound> fastaReader = new FastaReader<ProteinSequence, AminoAcidCompound>(is, new GenericFastaHeaderParser<ProteinSequence, AminoAcidCompound>(), new ProteinSequenceCreator(AminoAcidCompoundSet.getAminoAcidCompoundSet()));
127 LinkedHashMap<String, ProteinSequence> proteinSequences = fastaReader.process();
128 is.close();
129
130
131 System.out.println(proteinSequences);
132
133 FileOutputStream fileOutputStream = new FileOutputStream("test_out.fasta");
134
135 FastaWriter<ProteinSequence, AminoAcidCompound> fastaWriter = new FastaWriter<ProteinSequence, AminoAcidCompound>(fileOutputStream, proteinSequences.values(), new GenericFastaHeaderFormat<ProteinSequence, AminoAcidCompound>());
136 fastaWriter.process();
137 fileOutputStream.close();
138
139
140 } catch (Exception e) {
141 e.printStackTrace();
142 }
143 }
144
145 /**
146 * @return the lineLength
147 */
148 public int getLineLength() {
149 return lineLength;
150 }
151
152 /**
153 * @param lineLength the lineLength to set
154 */
155 public void setLineLength(int lineLength) {
156 this.lineLength = lineLength;
157 }
158 }