001 /*
002 * BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence. This should
006 * be distributed with the code. If you do not have a copy,
007 * see:
008 *
009 * http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors. These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 * http://www.biojava.org/
019 *
020 * Created on November 19, 2010
021 * Author: Mark Chapman
022 */
023
024 package org.biojava3.core.sequence.template;
025
026 import java.util.List;
027
028 /**
029 * Defines a minimal data structure for reading and writing a sequence alignment. The full {@code Profile} data
030 * structure in the alignment module provides additional functionality.
031 *
032 * @author Mark Chapman
033 * @param <S> each element of the alignment profile is of type S
034 * @param <C> each element of an {@link Sequence} is a {@link Compound} of type C
035 */
036 public interface LightweightProfile<S extends Sequence<C>, C extends Compound> {
037
038 /**
039 * List of output formats.
040 */
041 enum StringFormat {
042 ALN,
043 CLUSTALW,
044 FASTA,
045 GCG,
046 MSF,
047 PDBWEB
048 }
049
050 /**
051 * Returns {@link Sequence} at given index.
052 *
053 * @param listIndex index of sequence in profile
054 * @return desired sequence
055 * @throws IndexOutOfBoundsException if listIndex < 1 or listIndex > number of sequences
056 */
057 S getAlignedSequence(int listIndex);
058
059 /**
060 * Returns a {@link List} containing the individual {@link Sequence}s of this alignment.
061 *
062 * @return list of aligned sequences
063 */
064 List<S> getAlignedSequences();
065
066 /**
067 * Returns the {@link Compound} elements of the original {@link Sequence}s at the given column.
068 *
069 * @param alignmentIndex column index within an alignment
070 * @return the sequence elements
071 * @throws IndexOutOfBoundsException if alignmentIndex < 1 or alignmentIndex > {@link #getLength()}
072 */
073 List<C> getCompoundsAt(int alignmentIndex);
074
075 /**
076 * Returns {@link CompoundSet} of all {@link Sequence}s
077 *
078 * @return set of {@link Compound}s in contained sequences
079 */
080 CompoundSet<C> getCompoundSet();
081
082 /**
083 * Returns the number of columns in the alignment profile.
084 *
085 * @return the number of columns
086 */
087 int getLength();
088
089 /**
090 * Returns the number of rows in this profile. If any {@link Sequence}s are circular and overlap within the
091 * alignment, the returned size will be greater than the number of sequences, otherwise the numbers will be equal.
092 *
093 * @return number of rows
094 */
095 int getSize();
096
097 /**
098 * Returns a simple view of the alignment profile. This shows each sequence on a separate line (or multiple lines,
099 * if circular) and nothing more. This should result in {@link #getSize()} lines with {@link #getLength()}
100 * {@link Compound}s per line.
101 *
102 * @return a simple view of the alignment profile
103 */
104 String toString();
105
106 /**
107 * Returns a formatted view of the alignment profile. This shows the start and end indices of the profile for each
108 * group of lines of the given width. Each line may also be labeled.
109 *
110 * @param width limit on the line length
111 * @return a formatted view of the alignment profile
112 */
113 String toString(int width);
114
115 /**
116 * Returns a formatted view of the alignment profile. Details depend on the format given.
117 *
118 * @param format output format
119 * @return a formatted view of the alignment profile
120 */
121 String toString(StringFormat format);
122
123 }