001 /*
002 * BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence. This should
006 * be distributed with the code. If you do not have a copy,
007 * see:
008 *
009 * http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors. These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 * http://www.biojava.org/
019 *
020 * Created on 01-21-2010
021 */
022
023 package org.biojava3.core.sequence.io;
024
025 import java.io.OutputStream;
026 import java.util.ArrayList;
027 import java.util.Collection;
028
029 import org.biojava3.core.sequence.AccessionID;
030 import org.biojava3.core.sequence.ChromosomeSequence;
031 import org.biojava3.core.sequence.ExonSequence;
032 import org.biojava3.core.sequence.GeneSequence;
033 import org.biojava3.core.sequence.Strand;
034 import org.biojava3.core.sequence.compound.NucleotideCompound;
035 import org.biojava3.core.sequence.io.template.FastaHeaderFormatInterface;
036
037 /**
038 * A Gene sequence has a Positive or Negative Strand where we want to write out to a stream the 5 to 3 prime version.
039 * It is also an option to write out the gene sequence where the exon regions are upper case
040 * 6/22/2010 FastaWriter needs to be sequence aware to handle writing out a GeneSequence which is negative Strand with the proper sequence
041 * @author Scooter Willis <willishf at gmail dot com>
042 */
043 public class FastaGeneWriter {
044
045 boolean showExonUppercase = false;
046 OutputStream os;
047 Collection<GeneSequence> sequences;
048 FastaHeaderFormatInterface<GeneSequence, NucleotideCompound> headerFormat;
049 private int lineLength = 60;
050 /**
051 *
052 * @param os
053 * @param sequences
054 * @param headerFormat
055 * @param showExonUppercase
056 */
057 public FastaGeneWriter(OutputStream os, Collection<GeneSequence> sequences, FastaHeaderFormatInterface<GeneSequence, NucleotideCompound> headerFormat, boolean showExonUppercase) {
058 this(os, sequences, headerFormat, showExonUppercase, 60);
059 }
060 /**
061 *
062 * @param os
063 * @param sequences
064 * @param headerFormat
065 * @param showExonUppercase
066 * @param lineLength
067 */
068 public FastaGeneWriter(OutputStream os, Collection<GeneSequence> sequences, FastaHeaderFormatInterface<GeneSequence, NucleotideCompound> headerFormat, boolean showExonUppercase, int lineLength) {
069 this.os = os;
070 this.sequences = sequences;
071 this.headerFormat = headerFormat;
072 this.lineLength = lineLength;
073 this.showExonUppercase = showExonUppercase;
074 }
075 /**
076 *
077 * @throws Exception
078 */
079 public void process() throws Exception {
080 byte[] lineSep = System.getProperty("line.separator").getBytes();
081
082 for (GeneSequence sequence : sequences) {
083 String header = headerFormat.getHeader(sequence);
084 os.write('>');
085 os.write(header.getBytes());
086 os.write(lineSep);
087
088 int compoundCount = 0;
089 String seq = "";
090 //GeneSequence currently has a strand attribute to indicate direction
091
092 seq = sequence.getSequence5PrimeTo3Prime().getSequenceAsString();
093 if (showExonUppercase) {
094 StringBuilder sb = new StringBuilder(seq.toLowerCase());
095 int geneBioBegin = sequence.getBioBegin();
096 int geneBioEnd = sequence.getBioEnd();
097 for (ExonSequence exonSequence : sequence.getExonSequences()) {
098 int featureBioBegin = 0;
099 int featureBioEnd = 0;
100 if (sequence.getStrand() != Strand.NEGATIVE) {
101 featureBioBegin = exonSequence.getBioBegin() - geneBioBegin;
102 featureBioEnd = exonSequence.getBioEnd() - geneBioBegin;
103 } else {
104 featureBioBegin = geneBioEnd - exonSequence.getBioEnd();
105 featureBioEnd = geneBioEnd - exonSequence.getBioBegin();
106 }
107 if (featureBioBegin < 0 || featureBioEnd < 0 || featureBioEnd > sb.length() || featureBioBegin > sb.length()) {
108 System.out.println("Bad Feature " + sequence.getAccession().toString() + " " + sequence.getStrand() + " " + geneBioBegin + " " + geneBioEnd + " " + exonSequence.getBioBegin() + " " + exonSequence.getBioEnd());
109 } else {
110 for (int i = featureBioBegin; i <= featureBioEnd; i++) {
111 char ch = sb.charAt(i);
112 //probably not the fastest but the safest way if language is not standard ASCII
113 String temp = ch + "";
114 ch = temp.toUpperCase().charAt(0);
115 sb.setCharAt(i, ch);
116 }
117 }
118 }
119 seq = sb.toString();
120 }
121
122 for (int i = 0; i < seq.length(); i++) {
123 os.write(seq.charAt(i));
124 compoundCount++;
125 if (compoundCount == lineLength) {
126 os.write(lineSep);
127 compoundCount = 0;
128 }
129
130 }
131
132
133 //If we had sequence which was a reciprocal of line length
134 //then don't write the line terminator as this has already written
135 //it
136 if ((sequence.getLength() % getLineLength()) != 0) {
137 os.write(lineSep);
138 }
139 }
140 }
141
142 /**
143 * @return the lineLength
144 */
145 public int getLineLength() {
146 return lineLength;
147 }
148
149 /**
150 * @param lineLength the lineLength to set
151 */
152 public void setLineLength(int lineLength) {
153 this.lineLength = lineLength;
154 }
155
156 public static void main(String[] args) {
157
158 try {
159 ArrayList<GeneSequence> sequences = new ArrayList<GeneSequence>();
160 ChromosomeSequence seq1 = new ChromosomeSequence("ATATATATATATATATATATATATATATATATACGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCATATATATATATATATATATATACGCGCGCGCGCGCGCGCATATATATATATATATATATATATATATATATACGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCATATATATATATATATATATATACGCGCGCGCGCGCGCGC");
161 GeneSequence gene1 = seq1.addGene(new AccessionID("gene1"), 1, 20, Strand.POSITIVE);
162
163 gene1.addExon(new AccessionID("t1_1_10"), 1, 10);
164 gene1.addExon(new AccessionID("t1_12_15"), 12, 15);
165 GeneSequence gene2 = seq1.addGene(new AccessionID("gene2"), 1, 20, Strand.NEGATIVE);
166
167 gene2.addExon(new AccessionID("t2_1_10"), 1, 10);
168 gene2.addExon(new AccessionID("t2_12_15"), 12, 15);
169 sequences.add(gene1);
170 sequences.add(gene2);
171
172
173 FastaGeneWriter fastaWriter = new FastaGeneWriter(System.out, sequences, new GenericFastaHeaderFormat<GeneSequence, NucleotideCompound>(), true);
174 fastaWriter.process();
175
176
177 } catch (Exception e) {
178 e.printStackTrace();
179 }
180 }
181 }