001    /*
002     *                    BioJava development code
003     *
004     * This code may be freely distributed and modified under the
005     * terms of the GNU Lesser General Public Licence.  This should
006     * be distributed with the code.  If you do not have a copy,
007     * see:
008     *
009     *      http://www.gnu.org/copyleft/lesser.html
010     *
011     * Copyright for this code is held jointly by the individual
012     * authors.  These should be listed in @author doc comments.
013     *
014     * For more information on the BioJava project and its aims,
015     * or to join the biojava-l mailing list, visit the home page
016     * at:
017     *
018     *      http://www.biojava.org/
019     *
020     * Created on 01-21-2010
021     */
022    package org.biojava3.core.sequence.transcription;
023    
024    import java.util.EnumMap;
025    import java.util.Map;
026    
027    import org.biojava3.core.sequence.compound.AminoAcidCompound;
028    import org.biojava3.core.sequence.compound.AminoAcidCompoundSet;
029    import org.biojava3.core.sequence.compound.DNACompoundSet;
030    import org.biojava3.core.sequence.compound.NucleotideCompound;
031    import org.biojava3.core.sequence.compound.RNACompoundSet;
032    import org.biojava3.core.sequence.io.IUPACParser;
033    import org.biojava3.core.sequence.io.ProteinSequenceCreator;
034    import org.biojava3.core.sequence.io.RNASequenceCreator;
035    import org.biojava3.core.sequence.io.IUPACParser.IUPACTable;
036    import org.biojava3.core.sequence.io.template.SequenceCreatorInterface;
037    import org.biojava3.core.sequence.template.CompoundSet;
038    import org.biojava3.core.sequence.template.Sequence;
039    import org.biojava3.core.sequence.transcription.Table.Codon;
040    
041    /**
042     * Used as a way of encapsulating the data structures required to parse
043     * DNA to a Protein sequence.
044     *
045     * In order to build one look at {@ TranscriptionEngine.Builder} which provides
046     * intelligent defaults & allows you to build an engine which is nearly
047     * the same as the default one but with a few changes. All of the engine
048     * is customisable.
049     *
050     * By default the code will attempt to:
051     *
052     * <ul>
053     * <li>Trim Stops</li>
054     * <li>Convert initiating codons to M</li>
055     * <li>Allow for the fuzzy translation of Codons i.e. if it contains an
056     * N that produces a {@link Sequence}&lt;{@link{AminoAcidCompound}&gt;
057     * with an X at that position
058     * </ul>
059     *
060     * @author ayates
061     */
062    public class TranscriptionEngine {
063    
064        private static final class IOD {
065    
066            public static final TranscriptionEngine INSTANCE = new TranscriptionEngine.Builder().build();
067        }
068    
069        /**
070         * Default instance to use when Transcribing from DNA -&gt; RNA -&gt;
071         * Protein. If you require anything that is not a default setting then look
072         * at {@ TranscriptionEngine.Builder} for customisation options.
073         */
074        public static TranscriptionEngine getDefault() {
075            return IOD.INSTANCE;
076        }
077        private final Table table;
078        private final RNAToAminoAcidTranslator rnaAminoAcidTranslator;
079        private final DNAToRNATranslator dnaRnaTranslator;
080        private final SequenceCreatorInterface<AminoAcidCompound> proteinSequenceCreator;
081        private final SequenceCreatorInterface<NucleotideCompound> rnaSequenceCreator;
082        private final CompoundSet<NucleotideCompound> dnaCompounds;
083        private final CompoundSet<NucleotideCompound> rnaCompounds;
084        private final CompoundSet<AminoAcidCompound> aminoAcidCompounds;
085    
086        private TranscriptionEngine(
087                Table table,
088                RNAToAminoAcidTranslator rnaAminoAcidTranslator,
089                DNAToRNATranslator dnaRnaTranslator,
090                SequenceCreatorInterface<AminoAcidCompound> proteinSequenceCreator,
091                SequenceCreatorInterface<NucleotideCompound> rnaSequenceCreator,
092                CompoundSet<NucleotideCompound> dnaCompounds,
093                CompoundSet<NucleotideCompound> rnaCompounds,
094                CompoundSet<AminoAcidCompound> aminoAcidCompounds) {
095            this.table = table;
096            this.rnaAminoAcidTranslator = rnaAminoAcidTranslator;
097            this.dnaRnaTranslator = dnaRnaTranslator;
098            this.proteinSequenceCreator = proteinSequenceCreator;
099            this.rnaSequenceCreator = rnaSequenceCreator;
100            this.dnaCompounds = dnaCompounds;
101            this.rnaCompounds = rnaCompounds;
102            this.aminoAcidCompounds = aminoAcidCompounds;
103        }
104    
105        /**
106         * Quick method to let you go from a CDS to a Peptide quickly. It assumes
107         * you are translating only in the first frame
108         *
109         * @param dna The CDS to translate
110         * @return The Protein Sequence
111         */
112        public Sequence<AminoAcidCompound> translate(Sequence<NucleotideCompound> dna) {
113            Map<Frame, Sequence<AminoAcidCompound>> trans =
114                    multipleFrameTranslation(dna, Frame.ONE);
115            return trans.get(Frame.ONE);
116        }
117    
118        /**
119         * A way of translating DNA in a number of frames
120         *
121         * @param dna The CDS to translate
122         * @param frames The Frames to translate in
123         * @return All generated protein sequences in the given frames. Can have
124         * null entries
125         */
126        public Map<Frame, Sequence<AminoAcidCompound>> multipleFrameTranslation(
127                Sequence<NucleotideCompound> dna, Frame... frames) {
128            Map<Frame, Sequence<AminoAcidCompound>> results =
129                    new EnumMap<Frame, Sequence<AminoAcidCompound>>(Frame.class);
130            for (Frame frame : frames) {
131                Sequence<NucleotideCompound> rna =
132                        getDnaRnaTranslator().createSequence(dna, frame);
133                Sequence<AminoAcidCompound> peptide =
134                        getRnaAminoAcidTranslator().createSequence(rna);
135                results.put(frame, peptide);
136            }
137            return results;
138        }
139    
140        public Table getTable() {
141            return table;
142        }
143    
144        public RNAToAminoAcidTranslator getRnaAminoAcidTranslator() {
145            return rnaAminoAcidTranslator;
146        }
147    
148        public DNAToRNATranslator getDnaRnaTranslator() {
149            return dnaRnaTranslator;
150        }
151    
152        public SequenceCreatorInterface<AminoAcidCompound> getProteinSequenceCreator() {
153            return proteinSequenceCreator;
154        }
155    
156        public SequenceCreatorInterface<NucleotideCompound> getRnaSequenceCreator() {
157            return rnaSequenceCreator;
158        }
159    
160        public CompoundSet<NucleotideCompound> getDnaCompounds() {
161            return dnaCompounds;
162        }
163    
164        public CompoundSet<NucleotideCompound> getRnaCompounds() {
165            return rnaCompounds;
166        }
167    
168        public CompoundSet<AminoAcidCompound> getAminoAcidCompounds() {
169            return aminoAcidCompounds;
170        }
171    
172        /**
173         * This class is the way to create a {@link TranslationEngine}.
174         */
175        public static class Builder {
176    
177            private Table table;
178            private RNAToAminoAcidTranslator rnaAminoAcidTranslator;
179            private DNAToRNATranslator dnaRnaTranslator;
180            private SequenceCreatorInterface<AminoAcidCompound> proteinSequenceCreator;
181            private SequenceCreatorInterface<NucleotideCompound> rnaSequenceCreator;
182            private CompoundSet<NucleotideCompound> dnaCompounds;
183            private CompoundSet<NucleotideCompound> rnaCompounds;
184            private CompoundSet<AminoAcidCompound> aminoAcidCompounds;
185            private boolean initMet = true;
186            private boolean trimStop = true;
187            private boolean translateNCodons = true;
188            private boolean decorateRna = false;
189    
190            /**
191             * The method to finish any calls to the builder with which returns
192             * a transcription engine. The engine is designed to provide everything
193             * required for transcription to those classes which will do the
194             * transcription.
195             */
196            public TranscriptionEngine build() {
197                return new TranscriptionEngine(
198                        getTable(),
199                        getRnaAminoAcidTranslator(),
200                        getDnaRnaTranslator(),
201                        getProteinCreator(),
202                        getRnaCreator(),
203                        getDnaCompounds(),
204                        getRnaCompounds(),
205                        getAminoAcidCompounds());
206            }
207    
208            //---- START OF BUILDER METHODS
209            /**
210             * Uses the static instance of {@link IUPACParser} to find instances of
211             * {@link IUPACTable}s by ID.
212             */
213            public Builder table(Integer id) {
214                table = IUPACParser.getInstance().getTable(id);
215                return this;
216            }
217    
218            /**
219             * Uses the static instance of {@link IUPACParser} to find instances of
220             * {@link IUPACTable}s by its String name
221             */
222            public Builder table(String name) {
223                table = IUPACParser.getInstance().getTable(name);
224                return this;
225            }
226    
227            public Builder table(Table table) {
228                this.table = table;
229                return this;
230            }
231    
232            public Builder dnaCompounds(CompoundSet<NucleotideCompound> compounds) {
233                this.dnaCompounds = compounds;
234                return this;
235            }
236    
237            public Builder rnaCompounds(CompoundSet<NucleotideCompound> compounds) {
238                this.rnaCompounds = compounds;
239                return this;
240            }
241    
242            public Builder aminoAcidsCompounds(CompoundSet<AminoAcidCompound> compounds) {
243                this.aminoAcidCompounds = compounds;
244                return this;
245            }
246    
247            public Builder dnaRnaTranslator(DNAToRNATranslator translator) {
248                this.dnaRnaTranslator = translator;
249                return this;
250            }
251    
252            public Builder rnaAminoAcidTranslator(RNAToAminoAcidTranslator translator) {
253                this.rnaAminoAcidTranslator = translator;
254                return this;
255            }
256    
257            public Builder proteinCreator(SequenceCreatorInterface<AminoAcidCompound> creator) {
258                this.proteinSequenceCreator = creator;
259                return this;
260            }
261    
262            public Builder rnaCreator(SequenceCreatorInterface<NucleotideCompound> creator) {
263                this.rnaSequenceCreator = creator;
264                return this;
265            }
266    
267            public Builder initMet(boolean initMet) {
268                this.initMet = initMet;
269                return this;
270            }
271    
272            public Builder trimStop(boolean trimStop) {
273                this.trimStop = trimStop;
274                return this;
275            }
276    
277            public Builder translateNCodons(boolean translateNCodons) {
278                this.translateNCodons = translateNCodons;
279                return this;
280            }
281    
282            /**
283             * Performs an optimisation where RNASequences are not translated into
284             * their own objects but are views onto the base DNA sequence.
285             */
286            public Builder decorateRna(boolean decorateRna) {
287                this.decorateRna = decorateRna;
288                return this;
289            }
290    
291            //------ INTERNAL BUILDERS with defaults if exists
292            private CompoundSet<NucleotideCompound> getDnaCompounds() {
293                if (dnaCompounds != null) {
294                    return dnaCompounds;
295                }
296                return DNACompoundSet.getDNACompoundSet();
297            }
298    
299            private CompoundSet<NucleotideCompound> getRnaCompounds() {
300                if (rnaCompounds != null) {
301                    return rnaCompounds;
302                }
303                return RNACompoundSet.getRNACompoundSet();
304            }
305    
306            private CompoundSet<AminoAcidCompound> getAminoAcidCompounds() {
307                if (aminoAcidCompounds != null) {
308                    return aminoAcidCompounds;
309                }
310                return AminoAcidCompoundSet.getAminoAcidCompoundSet();
311            }
312    
313            private DNAToRNATranslator getDnaRnaTranslator() {
314                if (dnaRnaTranslator != null) {
315                    return dnaRnaTranslator;
316                }
317                return new DNAToRNATranslator(new RNASequenceCreator(getRnaCompounds()),
318                        getDnaCompounds(), getRnaCompounds(), isDecorateRna());
319            }
320    
321            private RNAToAminoAcidTranslator getRnaAminoAcidTranslator() {
322                if (rnaAminoAcidTranslator != null) {
323                    return rnaAminoAcidTranslator;
324                }
325                return new RNAToAminoAcidTranslator(
326                        getProteinCreator(), getRnaCompounds(), getCodons(),
327                        getAminoAcidCompounds(), getTable(), isTrimStop(), isInitMet(), isTranslateNCodons());
328            }
329    
330            private CompoundSet<Codon> getCodons() {
331                return getTable().getCodonCompoundSet(getRnaCompounds(), getAminoAcidCompounds());
332            }
333    
334            private SequenceCreatorInterface<AminoAcidCompound> getProteinCreator() {
335                if (proteinSequenceCreator != null) {
336                    return proteinSequenceCreator;
337                }
338                return new ProteinSequenceCreator(getAminoAcidCompounds());
339            }
340    
341            private SequenceCreatorInterface<NucleotideCompound> getRnaCreator() {
342                if (rnaSequenceCreator != null) {
343                    return rnaSequenceCreator;
344                }
345                return new RNASequenceCreator(getRnaCompounds());
346            }
347    
348            private Table getTable() {
349                if (table != null) {
350                    return table;
351                }
352                table(1); //Will set table to default IUPAC codee
353                return table;
354            }
355    
356            private boolean isInitMet() {
357                return initMet;
358            }
359    
360            private boolean isTrimStop() {
361                return trimStop;
362            }
363            private boolean isTranslateNCodons() {
364                return translateNCodons;
365            }
366            private boolean isDecorateRna() {
367                return decorateRna;
368            }
369        }
370    }