001 package org.biojava3.core.sequence.storage;
002
003 import java.util.ArrayList;
004 import java.util.Arrays;
005 import java.util.Iterator;
006 import java.util.List;
007 import java.util.NoSuchElementException;
008
009 import org.biojava3.core.sequence.AccessionID;
010 import org.biojava3.core.sequence.template.Compound;
011 import org.biojava3.core.sequence.template.CompoundSet;
012 import org.biojava3.core.sequence.template.ProxySequenceReader;
013 import org.biojava3.core.sequence.template.Sequence;
014 import org.biojava3.core.sequence.template.SequenceMixin;
015 import org.biojava3.core.sequence.template.SequenceView;
016
017 /**
018 * This reader actually proxies onto multiple types of sequence in order
019 * to allow a number of sequence objects to act as if they are one sequence.
020 * The code takes in any number of sequences, records the minimum and maximum
021 * bounds each sequence covers with respect to 1 position indexing and then
022 * binary searches these when a position is requested. Because of this
023 * 0 length Sequences are excluded during construction.
024 *
025 * Performance is not as good as if you are using a flat sequence however the
026 * speed of lookup is more than adaquate for most situations. Using the iterator
027 * gives the best performance as this does not rely on the binary search
028 * mechanism instead iterating through each sequence in turn.
029 *
030 * @author ayates
031 * @param <C> Tyoe of compound to hold
032 */
033 public class JoiningSequenceReader<C extends Compound> implements ProxySequenceReader<C> {
034
035 /**
036 * Internal implementation flag and controls how we look for the right
037 * sub-sequence
038 */
039 private static final boolean BINARY_SEARCH = true;
040 private final List<Sequence<C>> sequences;
041 private final CompoundSet<C> compoundSet;
042 private int[] maxSequenceIndex;
043 private int[] minSequenceIndex;
044
045 /**
046 * Allows creation of the store from Vargs Sequence<C> objects. CompoundSet
047 * defaults to the first element of the array (assuming there are elements
048 * available during construction otherwise we will throw an illegal
049 * state exception).
050 */
051 public JoiningSequenceReader(Sequence<C>... sequences) {
052 this(Arrays.asList(sequences));
053 }
054
055 /**
056 * Allows creation of the store from List<Sequence<C>>. CompoundSet
057 * defaults to the first element of the List (assuming there are elements
058 * available during construction otherwise we will throw an illegal
059 * state exception).
060 */
061 public JoiningSequenceReader(List<Sequence<C>> sequences) {
062 this.sequences = grepSequences(sequences);
063 this.compoundSet = grepCompoundSet();
064 }
065
066 public JoiningSequenceReader(CompoundSet<C> compoundSet, Sequence<C>... sequences) {
067 this(compoundSet, Arrays.asList(sequences));
068 }
069
070 public JoiningSequenceReader(CompoundSet<C> compoundSet, List<Sequence<C>> sequences) {
071 this.sequences = grepSequences(sequences);
072 this.compoundSet = compoundSet;
073 }
074
075 private List<Sequence<C>> grepSequences(List<Sequence<C>> sequences) {
076 List<Sequence<C>> seqs = new ArrayList<Sequence<C>>();
077 for (Sequence<C> s : sequences) {
078 if (s.getLength() != 0) {
079 seqs.add(s);
080 }
081 }
082 return seqs;
083 }
084
085 private CompoundSet<C> grepCompoundSet() {
086 if (sequences.isEmpty()) {
087 throw new IllegalStateException("Cannot get a CompoundSet because we have no sequences. Set during construction");
088 }
089 return sequences.get(0).getCompoundSet();
090 }
091
092
093 public C getCompoundAt(int position) {
094 int sequenceIndex = getSequenceIndex(position);
095 Sequence<C> sequence = sequences.get(sequenceIndex);
096 int indexInSequence = (position - getMinSequenceIndex()[sequenceIndex]) + 1;
097 return sequence.getCompoundAt(indexInSequence);
098 }
099
100
101 public CompoundSet<C> getCompoundSet() {
102 return compoundSet;
103 }
104
105
106 public int getLength() {
107 int[] maxSeqIndex = getMaxSequenceIndex();
108 if (maxSeqIndex.length == 0) {
109 return 0;
110 }
111 return maxSeqIndex[maxSeqIndex.length - 1];
112 }
113
114 /**
115 * Returns which Sequence holds the position queried for
116 */
117 private int getSequenceIndex(int position) {
118 if (BINARY_SEARCH) {
119 return binarySearch(position);
120 } else {
121 return linearSearch(position);
122 }
123 }
124
125 private int[] getMinSequenceIndex() {
126 if (minSequenceIndex == null) {
127 initSeqIndexes();
128 }
129 return minSequenceIndex;
130 }
131
132 private int[] getMaxSequenceIndex() {
133 if (maxSequenceIndex == null) {
134 initSeqIndexes();
135 }
136 return maxSequenceIndex;
137 }
138
139 private void initSeqIndexes() {
140 minSequenceIndex = new int[sequences.size()];
141 maxSequenceIndex = new int[sequences.size()];
142 int currentMaxIndex = 0;
143 int currentMinIndex = 1;
144 int lastLength = 0;
145 for (int i = 0; i < sequences.size(); i++) {
146 currentMinIndex += lastLength;
147 currentMaxIndex += sequences.get(i).getLength();
148 minSequenceIndex[i] = currentMinIndex;
149 maxSequenceIndex[i] = currentMaxIndex;
150 lastLength = sequences.get(i).getLength();
151 }
152 }
153
154 /**
155 * Scans through the sequence index arrays in linear time. Not the best
156 * performance but easier to code
157 */
158 private int linearSearch(int position) {
159 int[] minSeqIndex = getMinSequenceIndex();
160 int[] maxSeqIndex = getMaxSequenceIndex();
161 int length = minSeqIndex.length;
162 for (int i = 0; i < length; i++) {
163 if (position >= minSeqIndex[i] && position <= maxSeqIndex[i]) {
164 return i;
165 }
166 }
167 throw new IndexOutOfBoundsException("Given position " + position + " does not map into this Sequence");
168 }
169
170 /**
171 * Scans through the sequence index arrays using binary search
172 */
173 private int binarySearch(int position) {
174 int[] minSeqIndex = getMinSequenceIndex();
175 int[] maxSeqIndex = getMaxSequenceIndex();
176
177 int low = 0;
178 int high = minSeqIndex.length - 1;
179 while (low <= high) {
180 //Go to the mid point in the array
181 int mid = (low + high) >>> 1;
182
183 //Get the max position represented by this Sequence
184 int midMinPosition = minSeqIndex[mid];
185 int midMaxPosition = maxSeqIndex[mid];
186
187 //if current position is greater than the current bounds then
188 //increase search space
189 if (midMinPosition < position && midMaxPosition < position) {
190 low = mid + 1;
191 } //if current position is less than current bounds then decrease
192 //search space
193 else if (midMinPosition > position && midMaxPosition > position) {
194 high = mid - 1;
195 } else {
196 return mid;
197 }
198 }
199 throw new IndexOutOfBoundsException("Given position " + position + " does not map into this Sequence");
200 }
201
202 /**
203 * Iterator implementation which attempts to move through the 2D structure
204 * attempting to skip onto the next sequence as & when it is asked to
205 */
206
207 public Iterator<C> iterator() {
208 final List<Sequence<C>> localSequences = sequences;
209 return new Iterator<C>() {
210
211 private Iterator<C> currentSequenceIterator = null;
212 private int currentPosition = 0;
213
214
215 public boolean hasNext() {
216 //If the current iterator is null then see if the Sequences object has anything
217 if (currentSequenceIterator == null) {
218 return !localSequences.isEmpty();
219 }
220
221 //See if we had any compounds
222 boolean hasNext = currentSequenceIterator.hasNext();
223 if (!hasNext) {
224 hasNext = currentPosition < sequences.size();
225 }
226 return hasNext;
227 }
228
229
230 public C next() {
231 if (currentSequenceIterator == null) {
232 if (localSequences.isEmpty()) {
233 throw new NoSuchElementException("No sequences to iterate over; make sure you call hasNext() before next()");
234 }
235 currentSequenceIterator = localSequences.get(currentPosition).iterator();
236 currentPosition++;
237 }
238 if (!currentSequenceIterator.hasNext()) {
239 currentSequenceIterator = localSequences.get(currentPosition).iterator();
240 currentPosition++;
241 }
242 return currentSequenceIterator.next();
243 }
244
245
246 public void remove() throws UnsupportedOperationException {
247 throw new UnsupportedOperationException("Cannot remove from this Sequence");
248 }
249 };
250 }
251
252
253 public void setCompoundSet(CompoundSet<C> compoundSet) {
254 throw new UnsupportedOperationException();
255 }
256
257
258 public void setContents(String sequence) {
259 throw new UnsupportedOperationException();
260 }
261
262
263 public int countCompounds(C... compounds) {
264 return SequenceMixin.countCompounds(this, compounds);
265 }
266
267
268 public AccessionID getAccession() throws UnsupportedOperationException {
269 throw new UnsupportedOperationException();
270 }
271
272
273 public List<C> getAsList() {
274 return SequenceMixin.toList(this);
275 }
276
277
278 public int getIndexOf(C compound) {
279 return SequenceMixin.indexOf(this, compound);
280 }
281
282
283 public int getLastIndexOf(C compound) {
284 return SequenceMixin.lastIndexOf(this, compound);
285 }
286
287
288 public String getSequenceAsString() {
289 return SequenceMixin.toStringBuilder(this).toString();
290 }
291
292
293 public SequenceView<C> getSubSequence(Integer start, Integer end) {
294 return SequenceMixin.createSubSequence(this, start, end);
295 }
296
297 @Override
298 public SequenceView<C> getInverse() {
299 return SequenceMixin.inverse(this);
300 }
301 }