001    /*
002     *                    BioJava development code
003     *
004     * This code may be freely distributed and modified under the
005     * terms of the GNU Lesser General Public Licence.  This should
006     * be distributed with the code.  If you do not have a copy,
007     * see:
008     *
009     *      http://www.gnu.org/copyleft/lesser.html
010     *
011     * Copyright for this code is held jointly by the individual
012     * authors.  These should be listed in @author doc comments.
013     *
014     * For more information on the BioJava project and its aims,
015     * or to join the biojava-l mailing list, visit the home page
016     * at:
017     *
018     *      http://www.biojava.org/
019     *
020     * Created on 01-21-2010
021     */
022    
023    package org.biojava3.core.sequence.io;
024    
025    import java.io.OutputStream;
026    import java.util.ArrayList;
027    import java.util.Collection;
028    
029    import org.biojava3.core.sequence.AccessionID;
030    import org.biojava3.core.sequence.ChromosomeSequence;
031    import org.biojava3.core.sequence.ExonSequence;
032    import org.biojava3.core.sequence.GeneSequence;
033    import org.biojava3.core.sequence.Strand;
034    import org.biojava3.core.sequence.compound.NucleotideCompound;
035    import org.biojava3.core.sequence.io.template.FastaHeaderFormatInterface;
036    
037    /**
038     * A Gene sequence has a Positive or Negative Strand where we want to write out to a stream the 5 to 3 prime version.
039     * It is also an option to write out the gene sequence where the exon regions are upper case
040     * 6/22/2010 FastaWriter needs to be sequence aware to handle writing out a GeneSequence which is negative Strand with the proper sequence
041     * @author Scooter Willis <willishf at gmail dot com>
042     */
043    public class FastaGeneWriter {
044    
045        boolean showExonUppercase = false;
046        OutputStream os;
047        Collection<GeneSequence> sequences;
048        FastaHeaderFormatInterface<GeneSequence, NucleotideCompound> headerFormat;
049        private int lineLength = 60;
050    /**
051     *
052     * @param os
053     * @param sequences
054     * @param headerFormat
055     * @param showExonUppercase
056     */
057        public FastaGeneWriter(OutputStream os, Collection<GeneSequence> sequences, FastaHeaderFormatInterface<GeneSequence, NucleotideCompound> headerFormat, boolean showExonUppercase) {
058            this(os, sequences, headerFormat, showExonUppercase, 60);
059        }
060    /**
061     *
062     * @param os
063     * @param sequences
064     * @param headerFormat
065     * @param showExonUppercase
066     * @param lineLength
067     */
068        public FastaGeneWriter(OutputStream os, Collection<GeneSequence> sequences, FastaHeaderFormatInterface<GeneSequence, NucleotideCompound> headerFormat, boolean showExonUppercase, int lineLength) {
069            this.os = os;
070            this.sequences = sequences;
071            this.headerFormat = headerFormat;
072            this.lineLength = lineLength;
073            this.showExonUppercase = showExonUppercase;
074        }
075    /**
076     *
077     * @throws Exception
078     */
079        public void process() throws Exception {
080            byte[] lineSep = System.getProperty("line.separator").getBytes();
081    
082            for (GeneSequence sequence : sequences) {
083                String header = headerFormat.getHeader(sequence);
084                os.write('>');
085                os.write(header.getBytes());
086                os.write(lineSep);
087    
088                int compoundCount = 0;
089                String seq = "";
090                //GeneSequence currently has a strand attribute to indicate direction
091    
092                seq = sequence.getSequence5PrimeTo3Prime().getSequenceAsString();
093                if (showExonUppercase) {
094                    StringBuilder sb = new StringBuilder(seq.toLowerCase());
095                    int geneBioBegin = sequence.getBioBegin();
096                    int geneBioEnd = sequence.getBioEnd();
097                    for (ExonSequence exonSequence : sequence.getExonSequences()) {
098                        int featureBioBegin = 0;
099                        int featureBioEnd = 0;
100                        if (sequence.getStrand() != Strand.NEGATIVE) {
101                            featureBioBegin = exonSequence.getBioBegin() - geneBioBegin;
102                            featureBioEnd = exonSequence.getBioEnd() - geneBioBegin;
103                        } else {
104                            featureBioBegin = geneBioEnd - exonSequence.getBioEnd();
105                            featureBioEnd = geneBioEnd - exonSequence.getBioBegin();
106                        }
107                        if (featureBioBegin < 0 || featureBioEnd < 0 || featureBioEnd > sb.length() || featureBioBegin > sb.length()) {
108                            System.out.println("Bad Feature " + sequence.getAccession().toString() + " " + sequence.getStrand() + " " + geneBioBegin + " " + geneBioEnd + " " + exonSequence.getBioBegin() + " " + exonSequence.getBioEnd());
109                        } else {
110                            for (int i = featureBioBegin; i <= featureBioEnd; i++) {
111                                char ch = sb.charAt(i);
112                                //probably not the fastest but the safest way if language is not standard ASCII
113                                String temp = ch + "";
114                                ch = temp.toUpperCase().charAt(0);
115                                sb.setCharAt(i, ch);
116                            }
117                        }
118                    }
119                    seq = sb.toString();
120                }
121    
122                for (int i = 0; i < seq.length(); i++) {
123                    os.write(seq.charAt(i));
124                    compoundCount++;
125                    if (compoundCount == lineLength) {
126                        os.write(lineSep);
127                        compoundCount = 0;
128                    }
129    
130                }
131    
132    
133                //If we had sequence which was a reciprocal of line length
134                //then don't write the line terminator as this has already written
135                //it
136                if ((sequence.getLength() % getLineLength()) != 0) {
137                    os.write(lineSep);
138                }
139            }
140        }
141    
142        /**
143         * @return the lineLength
144         */
145        public int getLineLength() {
146            return lineLength;
147        }
148    
149        /**
150         * @param lineLength the lineLength to set
151         */
152        public void setLineLength(int lineLength) {
153            this.lineLength = lineLength;
154        }
155    
156        public static void main(String[] args) {
157    
158            try {
159                ArrayList<GeneSequence> sequences = new ArrayList<GeneSequence>();
160                ChromosomeSequence seq1 = new ChromosomeSequence("ATATATATATATATATATATATATATATATATACGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCATATATATATATATATATATATACGCGCGCGCGCGCGCGCATATATATATATATATATATATATATATATATACGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCATATATATATATATATATATATACGCGCGCGCGCGCGCGC");
161                GeneSequence gene1 = seq1.addGene(new AccessionID("gene1"), 1, 20, Strand.POSITIVE);
162    
163                gene1.addExon(new AccessionID("t1_1_10"), 1, 10);
164                gene1.addExon(new AccessionID("t1_12_15"), 12, 15);
165                GeneSequence gene2 = seq1.addGene(new AccessionID("gene2"), 1, 20, Strand.NEGATIVE);
166    
167                gene2.addExon(new AccessionID("t2_1_10"), 1, 10);
168                gene2.addExon(new AccessionID("t2_12_15"), 12, 15);
169                sequences.add(gene1);
170                sequences.add(gene2);
171    
172    
173                FastaGeneWriter fastaWriter = new FastaGeneWriter(System.out, sequences, new GenericFastaHeaderFormat<GeneSequence, NucleotideCompound>(), true);
174                fastaWriter.process();
175    
176    
177            } catch (Exception e) {
178                e.printStackTrace();
179            }
180        }
181    }