001 /*
002 * BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence. This should
006 * be distributed with the code. If you do not have a copy,
007 * see:
008 *
009 * http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors. These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 * http://www.biojava.org/
019 *
020 * Created on DATE
021 *
022 */
023 package org.biojava3.core.sequence;
024
025 import java.util.logging.Logger;
026 import org.biojava3.core.sequence.compound.NucleotideCompound;
027 import org.biojava3.core.sequence.template.CompoundSet;
028
029 /**
030 * Represents a exon or coding sequence in a gene. It has a parent {@link TranscriptSequence}
031 * where a TranscriptSequence is the child of a GeneSequence
032 * Not important for protein construction but the phase is used if outputting the gene
033 * to a gff3 file. {@linkplain http://www.sequenceontology.org/gff3.shtml}
034 * @author Scooter Willis
035 */
036 public class CDSSequence extends DNASequence {
037
038 private static final Logger log = Logger.getLogger(CDSSequence.class.getName());
039 Integer phase = 0; // 0, 1, 2
040 TranscriptSequence parentTranscriptSequence;
041
042 /**
043 *
044 * @param parentSequence
045 * @param bioBegin
046 * @param bioEnd
047 * @param phase
048 */
049 public CDSSequence(TranscriptSequence parentSequence, int bioBegin, int bioEnd, int phase) {
050 parentTranscriptSequence = parentSequence;
051 this.setParentSequence(parentTranscriptSequence);
052 setBioBegin(bioBegin);
053 setBioEnd(bioEnd);
054 this.phase = phase;
055
056 }
057
058 /**
059 *
060 * @return
061 */
062 public Integer getPhase() {
063 return phase;
064 }
065
066 /**
067 *
068 * @return
069 */
070 public Strand getStrand() {
071 return parentTranscriptSequence.getStrand();
072 }
073
074 /**
075 * A CDS sequence if negative stranded needs to be reverse complement
076 * to represent the actual coding sequence. When getting a ProteinSequence
077 * from a TranscriptSequence this method is callled for each CDSSequence
078 * {@linkplain http://www.sequenceontology.org/gff3.shtml}
079 * {@linkplain http://biowiki.org/~yam/bioe131/GFF.ppt}
080 * @return
081 */
082 public String getCodingSequence() {
083 String sequence = this.getSequenceAsString(getBioBegin(), getBioEnd(), getStrand());
084 if (getStrand() == Strand.NEGATIVE) {
085 //need to take complement of sequence because it is negative and we are returning a coding sequence
086 StringBuilder b = new StringBuilder(getLength());
087 CompoundSet<NucleotideCompound> compoundSet = this.getCompoundSet();
088 for (int i = 0; i < sequence.length(); i++) {
089 String nucleotide = sequence.charAt(i) + "";
090 NucleotideCompound nucleotideCompound = compoundSet.getCompoundForString(nucleotide);
091 b.append(nucleotideCompound.getComplement().getShortName());
092 }
093 sequence = b.toString();
094 }
095 // sequence = sequence.substring(phase);
096 return sequence;
097 }
098 }