001    /*
002     *                    BioJava development code
003     *
004     * This code may be freely distributed and modified under the
005     * terms of the GNU Lesser General Public Licence.  This should
006     * be distributed with the code.  If you do not have a copy,
007     * see:
008     *
009     *      http://www.gnu.org/copyleft/lesser.html
010     *
011     * Copyright for this code is held jointly by the individual
012     * authors.  These should be listed in @author doc comments.
013     *
014     * For more information on the BioJava project and its aims,
015     * or to join the biojava-l mailing list, visit the home page
016     * at:
017     *
018     *      http://www.biojava.org/
019     *
020     * Created on DATE
021     *
022     */
023    package org.biojava3.core.sequence;
024    
025    import java.util.logging.Logger;
026    import org.biojava3.core.sequence.compound.NucleotideCompound;
027    import org.biojava3.core.sequence.template.CompoundSet;
028    
029    /**
030     * Represents a exon or coding sequence in a gene. It has a parent {@link TranscriptSequence}
031     * where a TranscriptSequence is the child of a GeneSequence
032     * Not important for protein construction but the phase is used if outputting the gene
033     * to a gff3 file. {@linkplain http://www.sequenceontology.org/gff3.shtml}
034     * @author Scooter Willis
035     */
036    public class CDSSequence extends DNASequence {
037    
038        private static final Logger log = Logger.getLogger(CDSSequence.class.getName());
039        Integer phase = 0; // 0, 1, 2 
040        TranscriptSequence parentTranscriptSequence;
041    
042        /**
043         *
044         * @param parentSequence
045         * @param bioBegin
046         * @param bioEnd
047         * @param phase
048         */
049        public CDSSequence(TranscriptSequence parentSequence, int bioBegin, int bioEnd, int phase) {
050            parentTranscriptSequence = parentSequence;
051            this.setParentSequence(parentTranscriptSequence);
052            setBioBegin(bioBegin);
053            setBioEnd(bioEnd);
054            this.phase = phase;
055    
056        }
057    
058        /**
059         *
060         * @return
061         */
062        public Integer getPhase() {
063            return phase;
064        }
065    
066        /**
067         *
068         * @return
069         */
070        public Strand getStrand() {
071            return parentTranscriptSequence.getStrand();
072        }
073    
074        /**
075         * A CDS sequence if negative stranded needs to be reverse complement
076         * to represent the actual coding sequence. When getting a ProteinSequence
077         * from a TranscriptSequence this method is callled for each CDSSequence
078         * {@linkplain http://www.sequenceontology.org/gff3.shtml}
079         * {@linkplain http://biowiki.org/~yam/bioe131/GFF.ppt}
080         * @return
081         */
082        public String getCodingSequence() {
083            String sequence = this.getSequenceAsString(getBioBegin(), getBioEnd(), getStrand());
084            if (getStrand() == Strand.NEGATIVE) {
085                //need to take complement of sequence because it is negative and we are returning a coding sequence
086                StringBuilder b = new StringBuilder(getLength());
087                CompoundSet<NucleotideCompound> compoundSet = this.getCompoundSet();
088                for (int i = 0; i < sequence.length(); i++) {
089                    String nucleotide = sequence.charAt(i) + "";
090                    NucleotideCompound nucleotideCompound = compoundSet.getCompoundForString(nucleotide);
091                    b.append(nucleotideCompound.getComplement().getShortName());
092                }
093                sequence = b.toString();
094            }
095            //  sequence = sequence.substring(phase);
096            return sequence;
097        }
098    }