001 /*
002 * BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence. This should
006 * be distributed with the code. If you do not have a copy,
007 * see:
008 *
009 * http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors. These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 * http://www.biojava.org/
019 *
020 * Created on DATE
021 *
022 */
023 package org.biojava3.core.sequence;
024
025 import java.util.ArrayList;
026 import java.util.Collections;
027 import java.util.LinkedHashMap;
028 import java.util.logging.Logger;
029
030 import org.biojava3.core.sequence.transcription.TranscriptionEngine;
031
032 /**
033 * This is the sequence if you want to go from a gene sequence to a protein sequence. Need to start with a
034 * ChromosomeSequence then getting a GeneSequence and then a TranscriptSequence
035 * @author Scooter Willis
036 */
037 public class TranscriptSequence extends DNASequence {
038
039 private static final Logger log = Logger.getLogger(TranscriptSequence.class.getName());
040 private final ArrayList<CDSSequence> cdsSequenceList = new ArrayList<CDSSequence>();
041 private final LinkedHashMap<String, CDSSequence> cdsSequenceHashMap = new LinkedHashMap<String, CDSSequence>();
042 private StartCodonSequence startCodonSequence = null;
043 private StopCodonSequence stopCodonSequence = null;
044 private GeneSequence parentGeneSequence = null;
045
046 /**
047 *
048 * @param parentDNASequence
049 * @param begin
050 * @param end inclusive of end
051 */
052 public TranscriptSequence(GeneSequence parentDNASequence, int begin, int end) {
053 setParentSequence(parentDNASequence);
054 this.parentGeneSequence = parentDNASequence;
055 setBioBegin(begin);
056 setBioEnd(end);
057
058 }
059
060 /**
061 * @return the strand
062 */
063 public Strand getStrand() {
064 return parentGeneSequence.getStrand();
065 }
066
067 /**
068 * Remove a CDS or coding sequence from the transcript sequence
069 * @param accession
070 * @return
071 */
072 public CDSSequence removeCDS(String accession) {
073 for (CDSSequence cdsSequence : cdsSequenceList) {
074 if (cdsSequence.getAccession().getID().equals(accession)) {
075 cdsSequenceList.remove(cdsSequence);
076 cdsSequenceHashMap.remove(accession);
077 return cdsSequence;
078 }
079 }
080 return null;
081 }
082
083 /**
084 * Get the CDS sequences that have been added to the TranscriptSequences
085 * @return
086 */
087 public LinkedHashMap<String, CDSSequence> getCDSSequences() {
088 return cdsSequenceHashMap;
089 }
090
091 /**
092 * Add a Coding Sequence region with phase to the transcript sequence
093 * @param accession
094 * @param begin
095 * @param end
096 * @param phase 0,1,2
097 * @return
098 */
099 public CDSSequence addCDS(AccessionID accession, int begin, int end, int phase) throws Exception {
100 if (cdsSequenceHashMap.containsKey(accession.getID())) {
101 throw new Exception("Duplicate accesion id " + accession.getID());
102 }
103 CDSSequence cdsSequence = new CDSSequence(this, begin, end, phase); //sense should be the same as parent
104 cdsSequence.setAccession(accession);
105 cdsSequenceList.add(cdsSequence);
106 Collections.sort(cdsSequenceList, new CDSComparator());
107 cdsSequenceHashMap.put(accession.getID(), cdsSequence);
108 return cdsSequence;
109 }
110
111 /**
112 * http://www.sequenceontology.org/gff3.shtml
113 * http://biowiki.org/~yam/bioe131/GFF.ppt
114 * @return
115 */
116 /**
117 * Return a list of protein sequences based on each CDS sequence
118 * where the phase shift between two CDS sequences is assigned to the
119 * CDS sequence that starts the triplet. This can be used to map
120 * a CDS/exon region of a protein sequence back to the DNA sequence
121 * If you have a protein sequence and a predicted gene you can take the
122 * predict CDS protein sequences and align back to the protein sequence.
123 * If you have errors in mapping the predicted protein CDS regions to
124 * an the known protein sequence then you can identify possible errors
125 * in the prediction
126 *
127 * @return
128 */
129 public ArrayList<ProteinSequence> getProteinCDSSequences() {
130 ArrayList<ProteinSequence> proteinSequenceList = new ArrayList<ProteinSequence>();
131 for (int i = 0; i < cdsSequenceList.size(); i++) {
132 CDSSequence cdsSequence = cdsSequenceList.get(i);
133 String codingSequence = cdsSequence.getCodingSequence();
134 // System.out.println("CDS " + getStrand() + " " + cdsSequence.getPhase() + "=" + codingSequence);
135 if (this.getStrand() == Strand.NEGATIVE) {
136 if (cdsSequence.phase == 1) {
137 codingSequence = codingSequence.substring(1, codingSequence.length());
138 } else if (cdsSequence.phase == 2) {
139 codingSequence = codingSequence.substring(2, codingSequence.length());
140 }
141 if (i < cdsSequenceList.size() - 1) {
142 CDSSequence nextCDSSequence = cdsSequenceList.get(i + 1);
143 if (nextCDSSequence.phase == 1) {
144 String nextCodingSequence = nextCDSSequence.getCodingSequence();
145 codingSequence = codingSequence + nextCodingSequence.substring(0, 1);
146 } else if (nextCDSSequence.phase == 2) {
147 String nextCodingSequence = nextCDSSequence.getCodingSequence();
148 codingSequence = codingSequence + nextCodingSequence.substring(0, 2);
149 }
150 }
151 } else {
152 if (cdsSequence.phase == 1) {
153 codingSequence = codingSequence.substring(1, codingSequence.length());
154 } else if (cdsSequence.phase == 2) {
155 codingSequence = codingSequence.substring(2, codingSequence.length());
156 }
157 if (i < cdsSequenceList.size() - 1) {
158 CDSSequence nextCDSSequence = cdsSequenceList.get(i + 1);
159 if (nextCDSSequence.phase == 1) {
160 String nextCodingSequence = nextCDSSequence.getCodingSequence();
161 codingSequence = codingSequence + nextCodingSequence.substring(0, 1);
162 } else if (nextCDSSequence.phase == 2) {
163 String nextCodingSequence = nextCDSSequence.getCodingSequence();
164 codingSequence = codingSequence + nextCodingSequence.substring(0, 2);
165 }
166 }
167 }
168
169
170 // System.out.println(codingSequence);
171 DNASequence dnaCodingSequence = new DNASequence(codingSequence.toString().toUpperCase());
172 RNASequence rnaCodingSequence = dnaCodingSequence.getRNASequence(TranscriptionEngine.getDefault());
173 ProteinSequence proteinSequence = rnaCodingSequence.getProteinSequence(TranscriptionEngine.getDefault());
174 proteinSequence.setAccession(new AccessionID(cdsSequence.getAccession().getID()));
175 proteinSequence.setParentDNASequence(cdsSequence, 1, cdsSequence.getLength());
176 proteinSequenceList.add(proteinSequence);
177 }
178 return proteinSequenceList;
179 }
180
181 /**
182 * Get the stitched together CDS sequences then maps to the cDNA
183 * @return
184 */
185 public DNASequence getDNACodingSequence() {
186 StringBuilder sb = new StringBuilder();
187 for (CDSSequence cdsSequence : cdsSequenceList) {
188 sb.append(cdsSequence.getCodingSequence());
189 }
190 DNASequence dnaSequence = new DNASequence(sb.toString().toUpperCase());
191 dnaSequence.setAccession(new AccessionID(this.getAccession().getID()));
192 return dnaSequence;
193 }
194
195 /**
196 * Get the protein sequence
197 * @return
198 */
199 public ProteinSequence getProteinSequence() {
200 return getProteinSequence(TranscriptionEngine.getDefault());
201 }
202
203 /**
204 * Get the protein sequence with user defined TranscriptEngine
205 * @param engine
206 * @return
207 */
208 public ProteinSequence getProteinSequence(TranscriptionEngine engine) {
209 DNASequence dnaCodingSequence = getDNACodingSequence();
210 RNASequence rnaCodingSequence = dnaCodingSequence.getRNASequence(engine);
211 ProteinSequence proteinSequence = rnaCodingSequence.getProteinSequence(engine);
212 proteinSequence.setAccession(new AccessionID(this.getAccession().getID()));
213 return proteinSequence;
214 }
215
216 /**
217 * @return the startCodonSequence
218 */
219 public StartCodonSequence getStartCodonSequence() {
220 return startCodonSequence;
221 }
222
223 /**
224 * @param startCodonSequence the startCodonSequence to set
225 */
226 public void addStartCodonSequence(AccessionID accession, int begin, int end) {
227 this.startCodonSequence = new StartCodonSequence(this, begin, end);
228 startCodonSequence.setAccession(accession);
229 }
230
231 /**
232 * @return the stopCodonSequence
233 */
234 public StopCodonSequence getStopCodonSequence() {
235 return stopCodonSequence;
236 }
237
238 /**
239 * @param stopCodonSequence the stopCodonSequence to set
240 */
241 public void addStopCodonSequence(AccessionID accession, int begin, int end) {
242 this.stopCodonSequence = new StopCodonSequence(this, begin, end);
243 stopCodonSequence.setAccession(accession);
244 }
245 }