001 /*
002 * BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence. This should
006 * be distributed with the code. If you do not have a copy,
007 * see:
008 *
009 * http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors. These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 * http://www.biojava.org/
019 *
020 * Created on 01-21-2010
021 */
022 package org.biojava3.core.sequence.transcription;
023
024 import java.util.ArrayList;
025 import java.util.Arrays;
026 import java.util.HashMap;
027 import java.util.List;
028 import java.util.Map;
029
030 import org.biojava3.core.sequence.RNASequence;
031 import org.biojava3.core.sequence.compound.AminoAcidCompound;
032 import org.biojava3.core.sequence.compound.NucleotideCompound;
033 import org.biojava3.core.sequence.io.template.SequenceCreatorInterface;
034 import org.biojava3.core.sequence.template.AbstractCompoundTranslator;
035 import org.biojava3.core.sequence.template.CompoundSet;
036 import org.biojava3.core.sequence.template.Sequence;
037 import org.biojava3.core.sequence.template.SequenceView;
038 import org.biojava3.core.sequence.transcription.Table.Codon;
039 import org.biojava3.core.sequence.views.WindowedSequence;
040
041 /**
042 * Takes a {@link Sequence} of {@link NucleotideCompound} which should
043 * represent an RNA sequence ({@link RNASequence} is good for this) and returns
044 * a list of {@link Sequence} which hold {@link AminoAcidCompound}. The
045 * translator can also trim stop codons as well as changing any valid
046 * start codon to an initiating met.
047 *
048 * @author ayates
049 */
050 public class RNAToAminoAcidTranslator extends AbstractCompoundTranslator<NucleotideCompound, AminoAcidCompound> {
051
052 private final boolean trimStops;
053 private final boolean initMetOnly;
054 private final Map<Table.CaseInsensitiveTriplet, Codon> quickLookup;
055 private final Map<AminoAcidCompound, List<Codon>> aminoAcidToCodon;
056 //Cheeky lookup which uses a hashing value; key is to switch to using this all the time
057 private final Codon[] codonArray = new Codon[64000];
058 private final AminoAcidCompound unknownAminoAcidCompound;
059 private final boolean translateNCodons;
060
061 public RNAToAminoAcidTranslator(
062 SequenceCreatorInterface<AminoAcidCompound> creator,
063 CompoundSet<NucleotideCompound> nucleotides, CompoundSet<Codon> codons,
064 CompoundSet<AminoAcidCompound> aminoAcids, Table table,
065 boolean trimStops, boolean initMetOnly, boolean translateNCodons) {
066
067 super(creator, nucleotides, aminoAcids);
068 this.trimStops = trimStops;
069 this.initMetOnly = initMetOnly;
070 this.translateNCodons = translateNCodons;
071
072 quickLookup = new HashMap<Table.CaseInsensitiveTriplet, Codon>(codons.getAllCompounds().size());
073 aminoAcidToCodon = new HashMap<AminoAcidCompound, List<Codon>>();
074
075 List<Codon> codonList = table.getCodons(nucleotides, aminoAcids);
076 for (Codon codon : codonList) {
077 quickLookup.put(codon.getTriplet(), codon);
078 codonArray[codon.getTriplet().intValue()] = codon;
079
080 List<Codon> codonL = aminoAcidToCodon.get(codon.getAminoAcid());
081 if ( codonL == null){
082 codonL = new ArrayList<Codon>();
083 aminoAcidToCodon.put(codon.getAminoAcid(), codonL);
084 }
085 codonL.add(codon);
086
087 }
088
089 unknownAminoAcidCompound = aminoAcids.getCompoundForString("X");
090 }
091
092 /**
093 * Performs the core conversion of RNA to Peptide. It does this by walking
094 * a windowed version of the given sequence. Any trailing DNA base pairs
095 * are ignored according to the specification of {@link WindowedSequence}.
096 */
097 @Override
098 public List<Sequence<AminoAcidCompound>> createSequences(
099 Sequence<NucleotideCompound> originalSequence) {
100
101 List<List<AminoAcidCompound>> workingList = new ArrayList<List<AminoAcidCompound>>();
102
103 Iterable<SequenceView<NucleotideCompound>> iter =
104 new WindowedSequence<NucleotideCompound>(originalSequence, 3);
105
106 for (SequenceView<NucleotideCompound> element : iter) {
107 AminoAcidCompound aminoAcid;
108
109 int i =1;
110 Table.CaseInsensitiveTriplet triplet = new Table.CaseInsensitiveTriplet(
111 element.getCompoundAt(i++), element.getCompoundAt(i++), element.getCompoundAt(i++));
112
113 Codon target;
114
115 int arrayIndex = triplet.intValue();
116 //So long as we're within range then access
117 if(arrayIndex > -1 && arrayIndex < codonArray.length) {
118 target = codonArray[arrayIndex];
119 aminoAcid = target.getAminoAcid();
120 }
121 //Otherwise we have to use the Map
122 else {
123 target = quickLookup.get(triplet);
124 aminoAcid = target.getAminoAcid();
125 }
126
127 if(aminoAcid == null && translateNCodons()) {
128 aminoAcid = unknownAminoAcidCompound;
129 }
130 addCompoundsToList(Arrays.asList(aminoAcid), workingList);
131 }
132
133 postProcessCompoundLists(workingList);
134
135 return workingListToSequences(workingList);
136 }
137
138 /**
139 * Performs the trimming of stop codons and the conversion of a valid start
140 * amino acid to M
141 */
142 @Override
143 protected void postProcessCompoundLists(
144 List<List<AminoAcidCompound>> compoundLists) {
145 for (List<AminoAcidCompound> compounds : compoundLists) {
146 if (initMetOnly) {
147 initMet(compounds);
148 }
149 if (trimStops) {
150 trimStop(compounds);
151 }
152 }
153 }
154
155 private void initMet(List<AminoAcidCompound> sequence) {
156 AminoAcidCompound initMet = getToCompoundSet().getCompoundForString("M");
157 AminoAcidCompound start = sequence.get(0);
158 boolean isStart = false;
159 for (Codon c : aminoAcidToCodon.get(start)) {
160 if (c.isStart()) {
161 isStart = true;
162 break;
163 }
164 }
165
166 if (isStart) {
167 sequence.set(0, initMet);
168 }
169 }
170
171 /**
172 * Imperfect code. Checks the last amino acid to see if a codon could
173 * have translated a stop for it. Left in for the moment
174 */
175 protected void trimStop(List<AminoAcidCompound> sequence) {
176 AminoAcidCompound stop = sequence.get(sequence.size() - 1);
177 boolean isStop = false;
178 for (Codon c : aminoAcidToCodon.get(stop)) {
179 if (c.isStop()) {
180 isStop = true;
181 break;
182 }
183 }
184
185 if (isStop) {
186 sequence.remove(sequence.size() - 1);
187 }
188 }
189
190 /**
191 * Indicates if we want to force exact translation of compounds or not i.e.
192 * those with internal N RNA bases. This will cause a translation to an
193 * X amino acid
194 */
195 public boolean translateNCodons() {
196 return translateNCodons;
197 }
198 }