001 /*
002 * BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence. This should
006 * be distributed with the code. If you do not have a copy,
007 * see:
008 *
009 * http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors. These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 * http://www.biojava.org/
019 *
020 * Created on DATE
021 *
022 */
023 package org.biojava3.core.sequence;
024
025 import org.biojava3.core.sequence.compound.AminoAcidCompound;
026 import org.biojava3.core.sequence.compound.AminoAcidCompoundSet;
027 import org.biojava3.core.sequence.loader.StringProxySequenceReader;
028 import org.biojava3.core.sequence.template.AbstractSequence;
029 import org.biojava3.core.sequence.template.CompoundSet;
030 import org.biojava3.core.sequence.template.ProxySequenceReader;
031
032 /**
033 * The representation of a ProteinSequence
034 * @author Scooter Willis
035 */
036 public class ProteinSequence extends AbstractSequence<AminoAcidCompound> {
037
038 /**
039 * Create a protein from a string
040 * @param seqString
041 */
042 public ProteinSequence(String seqString) {
043 this(seqString, AminoAcidCompoundSet.getAminoAcidCompoundSet());
044 }
045
046 /**
047 * Create a protein from a string with a user defined set of amino acids
048 * @param seqString
049 * @param compoundSet
050 */
051 public ProteinSequence(String seqString, CompoundSet<AminoAcidCompound> compoundSet) {
052 super(seqString, compoundSet);
053 }
054
055 /**
056 * A protein sequence where the storage of the sequence is somewhere else. Could be
057 * loaded from a large Fasta file or via a Uniprot Proxy reader via Uniprot ID
058 * @param proxyLoader
059 */
060 public ProteinSequence(ProxySequenceReader<AminoAcidCompound> proxyLoader) {
061 this(proxyLoader, AminoAcidCompoundSet.getAminoAcidCompoundSet());
062 }
063
064 /**
065 * A protein sequence where the storage of the sequence is somewhere else with user defined
066 * set of amino acids. Could be loaded from a large Fasta file or via a Uniprot Proxy reader
067 * via Uniprot ID
068 * @param proxyLoader
069 */
070 public ProteinSequence(ProxySequenceReader<AminoAcidCompound> proxyLoader, CompoundSet<AminoAcidCompound> compoundSet) {
071 super(proxyLoader, compoundSet);
072 }
073
074 /**
075 * A Protein sequence can be stand alone or loaded from a transcript sequence. The design goal is to allow the creation
076 * of a Protein sequence from a Uniprot ID or some other Protein ID that based on cross reference you should be able to
077 * get the GeneSequence that codes for the protein if the CDS/Gene region is known. From the GeneSequence you should then
078 * be able to get the ChromosomeSequence which then allows you explore flaning regions of the gene sequences. The
079 * framework is in place to do this but currently hasn't been implement in the reverse direction starting from the
080 * Protein sequence.
081 *
082 * @param parentDNASequence
083 * @param begin
084 * @param end
085 */
086 public void setParentDNASequence(AbstractSequence parentDNASequence, Integer begin, Integer end) {
087 this.setParentSequence(parentDNASequence);
088 setBioBegin(begin);
089 setBioEnd(end);
090 }
091
092 public static void main(String[] args) {
093 ProteinSequence proteinSequence = new ProteinSequence("ARNDCEQGHILKMFPSTWYVBZJX");
094 System.out.println(proteinSequence.toString());
095
096 StringProxySequenceReader<AminoAcidCompound> sequenceStringProxyLoader = new StringProxySequenceReader<AminoAcidCompound>("XRNDCEQGHILKMFPSTWYVBZJA", AminoAcidCompoundSet.getAminoAcidCompoundSet());
097 ProteinSequence proteinSequenceFromProxy = new ProteinSequence(sequenceStringProxyLoader);
098 System.out.println(proteinSequenceFromProxy.toString());
099
100 }
101 }