001 /*
002 * BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence. This should
006 * be distributed with the code. If you do not have a copy,
007 * see:
008 *
009 * http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors. These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 * http://www.biojava.org/
019 *
020 * Created on 01-21-2010
021 */
022 package org.biojava3.core.sequence.transcription;
023
024 import java.util.EnumMap;
025 import java.util.Map;
026
027 import org.biojava3.core.sequence.compound.AminoAcidCompound;
028 import org.biojava3.core.sequence.compound.AminoAcidCompoundSet;
029 import org.biojava3.core.sequence.compound.DNACompoundSet;
030 import org.biojava3.core.sequence.compound.NucleotideCompound;
031 import org.biojava3.core.sequence.compound.RNACompoundSet;
032 import org.biojava3.core.sequence.io.IUPACParser;
033 import org.biojava3.core.sequence.io.ProteinSequenceCreator;
034 import org.biojava3.core.sequence.io.RNASequenceCreator;
035 import org.biojava3.core.sequence.io.IUPACParser.IUPACTable;
036 import org.biojava3.core.sequence.io.template.SequenceCreatorInterface;
037 import org.biojava3.core.sequence.template.CompoundSet;
038 import org.biojava3.core.sequence.template.Sequence;
039 import org.biojava3.core.sequence.transcription.Table.Codon;
040
041 /**
042 * Used as a way of encapsulating the data structures required to parse
043 * DNA to a Protein sequence.
044 *
045 * In order to build one look at {@ TranscriptionEngine.Builder} which provides
046 * intelligent defaults & allows you to build an engine which is nearly
047 * the same as the default one but with a few changes. All of the engine
048 * is customisable.
049 *
050 * By default the code will attempt to:
051 *
052 * <ul>
053 * <li>Trim Stops</li>
054 * <li>Convert initiating codons to M</li>
055 * <li>Allow for the fuzzy translation of Codons i.e. if it contains an
056 * N that produces a {@link Sequence}<{@link{AminoAcidCompound}>
057 * with an X at that position
058 * </ul>
059 *
060 * @author ayates
061 */
062 public class TranscriptionEngine {
063
064 private static final class IOD {
065
066 public static final TranscriptionEngine INSTANCE = new TranscriptionEngine.Builder().build();
067 }
068
069 /**
070 * Default instance to use when Transcribing from DNA -> RNA ->
071 * Protein. If you require anything that is not a default setting then look
072 * at {@ TranscriptionEngine.Builder} for customisation options.
073 */
074 public static TranscriptionEngine getDefault() {
075 return IOD.INSTANCE;
076 }
077 private final Table table;
078 private final RNAToAminoAcidTranslator rnaAminoAcidTranslator;
079 private final DNAToRNATranslator dnaRnaTranslator;
080 private final SequenceCreatorInterface<AminoAcidCompound> proteinSequenceCreator;
081 private final SequenceCreatorInterface<NucleotideCompound> rnaSequenceCreator;
082 private final CompoundSet<NucleotideCompound> dnaCompounds;
083 private final CompoundSet<NucleotideCompound> rnaCompounds;
084 private final CompoundSet<AminoAcidCompound> aminoAcidCompounds;
085
086 private TranscriptionEngine(
087 Table table,
088 RNAToAminoAcidTranslator rnaAminoAcidTranslator,
089 DNAToRNATranslator dnaRnaTranslator,
090 SequenceCreatorInterface<AminoAcidCompound> proteinSequenceCreator,
091 SequenceCreatorInterface<NucleotideCompound> rnaSequenceCreator,
092 CompoundSet<NucleotideCompound> dnaCompounds,
093 CompoundSet<NucleotideCompound> rnaCompounds,
094 CompoundSet<AminoAcidCompound> aminoAcidCompounds) {
095 this.table = table;
096 this.rnaAminoAcidTranslator = rnaAminoAcidTranslator;
097 this.dnaRnaTranslator = dnaRnaTranslator;
098 this.proteinSequenceCreator = proteinSequenceCreator;
099 this.rnaSequenceCreator = rnaSequenceCreator;
100 this.dnaCompounds = dnaCompounds;
101 this.rnaCompounds = rnaCompounds;
102 this.aminoAcidCompounds = aminoAcidCompounds;
103 }
104
105 /**
106 * Quick method to let you go from a CDS to a Peptide quickly. It assumes
107 * you are translating only in the first frame
108 *
109 * @param dna The CDS to translate
110 * @return The Protein Sequence
111 */
112 public Sequence<AminoAcidCompound> translate(Sequence<NucleotideCompound> dna) {
113 Map<Frame, Sequence<AminoAcidCompound>> trans =
114 multipleFrameTranslation(dna, Frame.ONE);
115 return trans.get(Frame.ONE);
116 }
117
118 /**
119 * A way of translating DNA in a number of frames
120 *
121 * @param dna The CDS to translate
122 * @param frames The Frames to translate in
123 * @return All generated protein sequences in the given frames. Can have
124 * null entries
125 */
126 public Map<Frame, Sequence<AminoAcidCompound>> multipleFrameTranslation(
127 Sequence<NucleotideCompound> dna, Frame... frames) {
128 Map<Frame, Sequence<AminoAcidCompound>> results =
129 new EnumMap<Frame, Sequence<AminoAcidCompound>>(Frame.class);
130 for (Frame frame : frames) {
131 Sequence<NucleotideCompound> rna =
132 getDnaRnaTranslator().createSequence(dna, frame);
133 Sequence<AminoAcidCompound> peptide =
134 getRnaAminoAcidTranslator().createSequence(rna);
135 results.put(frame, peptide);
136 }
137 return results;
138 }
139
140 public Table getTable() {
141 return table;
142 }
143
144 public RNAToAminoAcidTranslator getRnaAminoAcidTranslator() {
145 return rnaAminoAcidTranslator;
146 }
147
148 public DNAToRNATranslator getDnaRnaTranslator() {
149 return dnaRnaTranslator;
150 }
151
152 public SequenceCreatorInterface<AminoAcidCompound> getProteinSequenceCreator() {
153 return proteinSequenceCreator;
154 }
155
156 public SequenceCreatorInterface<NucleotideCompound> getRnaSequenceCreator() {
157 return rnaSequenceCreator;
158 }
159
160 public CompoundSet<NucleotideCompound> getDnaCompounds() {
161 return dnaCompounds;
162 }
163
164 public CompoundSet<NucleotideCompound> getRnaCompounds() {
165 return rnaCompounds;
166 }
167
168 public CompoundSet<AminoAcidCompound> getAminoAcidCompounds() {
169 return aminoAcidCompounds;
170 }
171
172 /**
173 * This class is the way to create a {@link TranslationEngine}.
174 */
175 public static class Builder {
176
177 private Table table;
178 private RNAToAminoAcidTranslator rnaAminoAcidTranslator;
179 private DNAToRNATranslator dnaRnaTranslator;
180 private SequenceCreatorInterface<AminoAcidCompound> proteinSequenceCreator;
181 private SequenceCreatorInterface<NucleotideCompound> rnaSequenceCreator;
182 private CompoundSet<NucleotideCompound> dnaCompounds;
183 private CompoundSet<NucleotideCompound> rnaCompounds;
184 private CompoundSet<AminoAcidCompound> aminoAcidCompounds;
185 private boolean initMet = true;
186 private boolean trimStop = true;
187 private boolean translateNCodons = true;
188 private boolean decorateRna = false;
189
190 /**
191 * The method to finish any calls to the builder with which returns
192 * a transcription engine. The engine is designed to provide everything
193 * required for transcription to those classes which will do the
194 * transcription.
195 */
196 public TranscriptionEngine build() {
197 return new TranscriptionEngine(
198 getTable(),
199 getRnaAminoAcidTranslator(),
200 getDnaRnaTranslator(),
201 getProteinCreator(),
202 getRnaCreator(),
203 getDnaCompounds(),
204 getRnaCompounds(),
205 getAminoAcidCompounds());
206 }
207
208 //---- START OF BUILDER METHODS
209 /**
210 * Uses the static instance of {@link IUPACParser} to find instances of
211 * {@link IUPACTable}s by ID.
212 */
213 public Builder table(Integer id) {
214 table = IUPACParser.getInstance().getTable(id);
215 return this;
216 }
217
218 /**
219 * Uses the static instance of {@link IUPACParser} to find instances of
220 * {@link IUPACTable}s by its String name
221 */
222 public Builder table(String name) {
223 table = IUPACParser.getInstance().getTable(name);
224 return this;
225 }
226
227 public Builder table(Table table) {
228 this.table = table;
229 return this;
230 }
231
232 public Builder dnaCompounds(CompoundSet<NucleotideCompound> compounds) {
233 this.dnaCompounds = compounds;
234 return this;
235 }
236
237 public Builder rnaCompounds(CompoundSet<NucleotideCompound> compounds) {
238 this.rnaCompounds = compounds;
239 return this;
240 }
241
242 public Builder aminoAcidsCompounds(CompoundSet<AminoAcidCompound> compounds) {
243 this.aminoAcidCompounds = compounds;
244 return this;
245 }
246
247 public Builder dnaRnaTranslator(DNAToRNATranslator translator) {
248 this.dnaRnaTranslator = translator;
249 return this;
250 }
251
252 public Builder rnaAminoAcidTranslator(RNAToAminoAcidTranslator translator) {
253 this.rnaAminoAcidTranslator = translator;
254 return this;
255 }
256
257 public Builder proteinCreator(SequenceCreatorInterface<AminoAcidCompound> creator) {
258 this.proteinSequenceCreator = creator;
259 return this;
260 }
261
262 public Builder rnaCreator(SequenceCreatorInterface<NucleotideCompound> creator) {
263 this.rnaSequenceCreator = creator;
264 return this;
265 }
266
267 public Builder initMet(boolean initMet) {
268 this.initMet = initMet;
269 return this;
270 }
271
272 public Builder trimStop(boolean trimStop) {
273 this.trimStop = trimStop;
274 return this;
275 }
276
277 public Builder translateNCodons(boolean translateNCodons) {
278 this.translateNCodons = translateNCodons;
279 return this;
280 }
281
282 /**
283 * Performs an optimisation where RNASequences are not translated into
284 * their own objects but are views onto the base DNA sequence.
285 */
286 public Builder decorateRna(boolean decorateRna) {
287 this.decorateRna = decorateRna;
288 return this;
289 }
290
291 //------ INTERNAL BUILDERS with defaults if exists
292 private CompoundSet<NucleotideCompound> getDnaCompounds() {
293 if (dnaCompounds != null) {
294 return dnaCompounds;
295 }
296 return DNACompoundSet.getDNACompoundSet();
297 }
298
299 private CompoundSet<NucleotideCompound> getRnaCompounds() {
300 if (rnaCompounds != null) {
301 return rnaCompounds;
302 }
303 return RNACompoundSet.getRNACompoundSet();
304 }
305
306 private CompoundSet<AminoAcidCompound> getAminoAcidCompounds() {
307 if (aminoAcidCompounds != null) {
308 return aminoAcidCompounds;
309 }
310 return AminoAcidCompoundSet.getAminoAcidCompoundSet();
311 }
312
313 private DNAToRNATranslator getDnaRnaTranslator() {
314 if (dnaRnaTranslator != null) {
315 return dnaRnaTranslator;
316 }
317 return new DNAToRNATranslator(new RNASequenceCreator(getRnaCompounds()),
318 getDnaCompounds(), getRnaCompounds(), isDecorateRna());
319 }
320
321 private RNAToAminoAcidTranslator getRnaAminoAcidTranslator() {
322 if (rnaAminoAcidTranslator != null) {
323 return rnaAminoAcidTranslator;
324 }
325 return new RNAToAminoAcidTranslator(
326 getProteinCreator(), getRnaCompounds(), getCodons(),
327 getAminoAcidCompounds(), getTable(), isTrimStop(), isInitMet(), isTranslateNCodons());
328 }
329
330 private CompoundSet<Codon> getCodons() {
331 return getTable().getCodonCompoundSet(getRnaCompounds(), getAminoAcidCompounds());
332 }
333
334 private SequenceCreatorInterface<AminoAcidCompound> getProteinCreator() {
335 if (proteinSequenceCreator != null) {
336 return proteinSequenceCreator;
337 }
338 return new ProteinSequenceCreator(getAminoAcidCompounds());
339 }
340
341 private SequenceCreatorInterface<NucleotideCompound> getRnaCreator() {
342 if (rnaSequenceCreator != null) {
343 return rnaSequenceCreator;
344 }
345 return new RNASequenceCreator(getRnaCompounds());
346 }
347
348 private Table getTable() {
349 if (table != null) {
350 return table;
351 }
352 table(1); //Will set table to default IUPAC codee
353 return table;
354 }
355
356 private boolean isInitMet() {
357 return initMet;
358 }
359
360 private boolean isTrimStop() {
361 return trimStop;
362 }
363 private boolean isTranslateNCodons() {
364 return translateNCodons;
365 }
366 private boolean isDecorateRna() {
367 return decorateRna;
368 }
369 }
370 }