001    /*
002     *                    BioJava development code
003     *
004     * This code may be freely distributed and modified under the
005     * terms of the GNU Lesser General Public Licence.  This should
006     * be distributed with the code.  If you do not have a copy,
007     * see:
008     *
009     *      http://www.gnu.org/copyleft/lesser.html
010     *
011     * Copyright for this code is held jointly by the individual
012     * authors.  These should be listed in @author doc comments.
013     *
014     * For more information on the BioJava project and its aims,
015     * or to join the biojava-l mailing list, visit the home page
016     * at:
017     *
018     *      http://www.biojava.org/
019     *
020     * Created on DATE
021     *
022     */
023    package org.biojava3.core.sequence;
024    
025    import org.biojava3.core.sequence.compound.AminoAcidCompound;
026    import org.biojava3.core.sequence.compound.AminoAcidCompoundSet;
027    import org.biojava3.core.sequence.loader.StringProxySequenceReader;
028    import org.biojava3.core.sequence.template.AbstractSequence;
029    import org.biojava3.core.sequence.template.CompoundSet;
030    import org.biojava3.core.sequence.template.ProxySequenceReader;
031    
032    /**
033     * The representation of a ProteinSequence
034     * @author Scooter Willis
035     */
036    public class ProteinSequence extends AbstractSequence<AminoAcidCompound> {
037    
038        /**
039         * Create a protein from a string
040         * @param seqString
041         */
042        public ProteinSequence(String seqString) {
043            this(seqString, AminoAcidCompoundSet.getAminoAcidCompoundSet());
044        }
045    
046        /**
047         * Create a protein from a string with a user defined set of amino acids
048         * @param seqString
049         * @param compoundSet
050         */
051        public ProteinSequence(String seqString, CompoundSet<AminoAcidCompound> compoundSet) {
052            super(seqString, compoundSet);
053        }
054    
055        /**
056         * A protein sequence where the storage of the sequence is somewhere else. Could be
057         * loaded from a large Fasta file or via a Uniprot Proxy reader via Uniprot ID
058         * @param proxyLoader
059         */
060        public ProteinSequence(ProxySequenceReader<AminoAcidCompound> proxyLoader) {
061            this(proxyLoader, AminoAcidCompoundSet.getAminoAcidCompoundSet());
062        }
063    
064        /**
065         * A protein sequence where the storage of the sequence is somewhere else with user defined
066         * set of amino acids. Could be loaded from a large Fasta file or via a Uniprot Proxy reader
067         * via Uniprot ID
068         * @param proxyLoader
069         */
070        public ProteinSequence(ProxySequenceReader<AminoAcidCompound> proxyLoader, CompoundSet<AminoAcidCompound> compoundSet) {
071            super(proxyLoader, compoundSet);
072        }
073    
074        /**
075         * A Protein sequence can be stand alone or loaded from a transcript sequence. The design goal is to allow the creation
076         * of a Protein sequence from a Uniprot ID or some other Protein ID that based on cross reference you should be able to
077         * get the GeneSequence that codes for the protein if the CDS/Gene region is known. From the GeneSequence you should then
078         * be able to get the ChromosomeSequence which then allows you explore flaning regions of the gene sequences. The
079         * framework is in place to do this but currently hasn't been implement in the reverse direction starting from the
080         * Protein sequence.
081         *
082         * @param parentDNASequence
083         * @param begin
084         * @param end
085         */
086        public void setParentDNASequence(AbstractSequence parentDNASequence, Integer begin, Integer end) {
087            this.setParentSequence(parentDNASequence);
088            setBioBegin(begin);
089            setBioEnd(end);
090        }
091    
092        public static void main(String[] args) {
093            ProteinSequence proteinSequence = new ProteinSequence("ARNDCEQGHILKMFPSTWYVBZJX");
094            System.out.println(proteinSequence.toString());
095    
096            StringProxySequenceReader<AminoAcidCompound> sequenceStringProxyLoader = new StringProxySequenceReader<AminoAcidCompound>("XRNDCEQGHILKMFPSTWYVBZJA", AminoAcidCompoundSet.getAminoAcidCompoundSet());
097            ProteinSequence proteinSequenceFromProxy = new ProteinSequence(sequenceStringProxyLoader);
098            System.out.println(proteinSequenceFromProxy.toString());
099    
100        }
101    }