001    package org.biojava3.core.sequence.storage;
002    
003    import java.util.ArrayList;
004    import java.util.Arrays;
005    import java.util.Iterator;
006    import java.util.List;
007    import java.util.NoSuchElementException;
008    
009    import org.biojava3.core.sequence.AccessionID;
010    import org.biojava3.core.sequence.template.Compound;
011    import org.biojava3.core.sequence.template.CompoundSet;
012    import org.biojava3.core.sequence.template.ProxySequenceReader;
013    import org.biojava3.core.sequence.template.Sequence;
014    import org.biojava3.core.sequence.template.SequenceMixin;
015    import org.biojava3.core.sequence.template.SequenceView;
016    
017    /**
018     * This reader actually proxies onto multiple types of sequence in order
019     * to allow a number of sequence objects to act as if they are one sequence.
020     * The code takes in any number of sequences, records the minimum and maximum
021     * bounds each sequence covers with respect to 1 position indexing and then
022     * binary searches these when a position is requested. Because of this
023     * 0 length Sequences are excluded during construction.
024     *
025     * Performance is not as good as if you are using a flat sequence however the
026     * speed of lookup is more than adaquate for most situations. Using the iterator
027     * gives the best performance as this does not rely on the binary search
028     * mechanism instead iterating through each sequence in turn.
029     *
030     * @author ayates
031     * @param <C> Tyoe of compound to hold
032     */
033    public class JoiningSequenceReader<C extends Compound> implements ProxySequenceReader<C> {
034    
035        /**
036         * Internal implementation flag and controls how we look for the right
037         * sub-sequence
038         */
039        private static final boolean BINARY_SEARCH = true;
040        private final List<Sequence<C>> sequences;
041        private final CompoundSet<C> compoundSet;
042        private int[] maxSequenceIndex;
043        private int[] minSequenceIndex;
044    
045        /**
046         * Allows creation of the store from Vargs Sequence<C> objects. CompoundSet
047         * defaults to the first element of the array (assuming there are elements
048         * available during construction otherwise we will throw an illegal
049         * state exception).
050         */
051        public JoiningSequenceReader(Sequence<C>... sequences) {
052            this(Arrays.asList(sequences));
053        }
054    
055        /**
056         * Allows creation of the store from List<Sequence<C>>. CompoundSet
057         * defaults to the first element of the List (assuming there are elements
058         * available during construction otherwise we will throw an illegal
059         * state exception).
060         */
061        public JoiningSequenceReader(List<Sequence<C>> sequences) {
062            this.sequences = grepSequences(sequences);
063            this.compoundSet = grepCompoundSet();
064        }
065    
066        public JoiningSequenceReader(CompoundSet<C> compoundSet, Sequence<C>... sequences) {
067            this(compoundSet, Arrays.asList(sequences));
068        }
069    
070        public JoiningSequenceReader(CompoundSet<C> compoundSet, List<Sequence<C>> sequences) {
071            this.sequences = grepSequences(sequences);
072            this.compoundSet = compoundSet;
073        }
074    
075        private List<Sequence<C>> grepSequences(List<Sequence<C>> sequences) {
076            List<Sequence<C>> seqs = new ArrayList<Sequence<C>>();
077            for (Sequence<C> s : sequences) {
078                if (s.getLength() != 0) {
079                    seqs.add(s);
080                }
081            }
082            return seqs;
083        }
084    
085        private CompoundSet<C> grepCompoundSet() {
086            if (sequences.isEmpty()) {
087                throw new IllegalStateException("Cannot get a CompoundSet because we have no sequences. Set during construction");
088            }
089            return sequences.get(0).getCompoundSet();
090        }
091    
092        
093        public C getCompoundAt(int position) {
094            int sequenceIndex = getSequenceIndex(position);
095            Sequence<C> sequence = sequences.get(sequenceIndex);
096            int indexInSequence = (position - getMinSequenceIndex()[sequenceIndex]) + 1;
097            return sequence.getCompoundAt(indexInSequence);
098        }
099    
100        
101        public CompoundSet<C> getCompoundSet() {
102            return compoundSet;
103        }
104    
105        
106        public int getLength() {
107            int[] maxSeqIndex = getMaxSequenceIndex();
108            if (maxSeqIndex.length == 0) {
109                return 0;
110            }
111            return maxSeqIndex[maxSeqIndex.length - 1];
112        }
113    
114        /**
115         * Returns which Sequence holds the position queried for
116         */
117        private int getSequenceIndex(int position) {
118            if (BINARY_SEARCH) {
119                return binarySearch(position);
120            } else {
121                return linearSearch(position);
122            }
123        }
124    
125        private int[] getMinSequenceIndex() {
126            if (minSequenceIndex == null) {
127                initSeqIndexes();
128            }
129            return minSequenceIndex;
130        }
131    
132        private int[] getMaxSequenceIndex() {
133            if (maxSequenceIndex == null) {
134                initSeqIndexes();
135            }
136            return maxSequenceIndex;
137        }
138    
139        private void initSeqIndexes() {
140            minSequenceIndex = new int[sequences.size()];
141            maxSequenceIndex = new int[sequences.size()];
142            int currentMaxIndex = 0;
143            int currentMinIndex = 1;
144            int lastLength = 0;
145            for (int i = 0; i < sequences.size(); i++) {
146                currentMinIndex += lastLength;
147                currentMaxIndex += sequences.get(i).getLength();
148                minSequenceIndex[i] = currentMinIndex;
149                maxSequenceIndex[i] = currentMaxIndex;
150                lastLength = sequences.get(i).getLength();
151            }
152        }
153    
154        /**
155         * Scans through the sequence index arrays in linear time. Not the best
156         * performance but easier to code
157         */
158        private int linearSearch(int position) {
159            int[] minSeqIndex = getMinSequenceIndex();
160            int[] maxSeqIndex = getMaxSequenceIndex();
161            int length = minSeqIndex.length;
162            for (int i = 0; i < length; i++) {
163                if (position >= minSeqIndex[i] && position <= maxSeqIndex[i]) {
164                    return i;
165                }
166            }
167            throw new IndexOutOfBoundsException("Given position " + position + " does not map into this Sequence");
168        }
169    
170        /**
171         * Scans through the sequence index arrays using binary search
172         */
173        private int binarySearch(int position) {
174            int[] minSeqIndex = getMinSequenceIndex();
175            int[] maxSeqIndex = getMaxSequenceIndex();
176    
177            int low = 0;
178            int high = minSeqIndex.length - 1;
179            while (low <= high) {
180                //Go to the mid point in the array
181                int mid = (low + high) >>> 1;
182    
183                //Get the max position represented by this Sequence
184                int midMinPosition = minSeqIndex[mid];
185                int midMaxPosition = maxSeqIndex[mid];
186    
187                //if current position is greater than the current bounds then
188                //increase search space
189                if (midMinPosition < position && midMaxPosition < position) {
190                    low = mid + 1;
191                } //if current position is less than current bounds then decrease
192                //search space
193                else if (midMinPosition > position && midMaxPosition > position) {
194                    high = mid - 1;
195                } else {
196                    return mid;
197                }
198            }
199            throw new IndexOutOfBoundsException("Given position " + position + " does not map into this Sequence");
200        }
201    
202        /**
203         * Iterator implementation which attempts to move through the 2D structure
204         * attempting to skip onto the next sequence as & when it is asked to
205         */
206        
207        public Iterator<C> iterator() {
208            final List<Sequence<C>> localSequences = sequences;
209            return new Iterator<C>() {
210    
211                private Iterator<C> currentSequenceIterator = null;
212                private int currentPosition = 0;
213    
214                
215                public boolean hasNext() {
216                    //If the current iterator is null then see if the Sequences object has anything
217                    if (currentSequenceIterator == null) {
218                        return !localSequences.isEmpty();
219                    }
220    
221                    //See if we had any compounds
222                    boolean hasNext = currentSequenceIterator.hasNext();
223                    if (!hasNext) {
224                        hasNext = currentPosition < sequences.size();
225                    }
226                    return hasNext;
227                }
228    
229                
230                public C next() {
231                    if (currentSequenceIterator == null) {
232                        if (localSequences.isEmpty()) {
233                            throw new NoSuchElementException("No sequences to iterate over; make sure you call hasNext() before next()");
234                        }
235                        currentSequenceIterator = localSequences.get(currentPosition).iterator();
236                        currentPosition++;
237                    }
238                    if (!currentSequenceIterator.hasNext()) {
239                        currentSequenceIterator = localSequences.get(currentPosition).iterator();
240                        currentPosition++;
241                    }
242                    return currentSequenceIterator.next();
243                }
244    
245                
246                public void remove() throws UnsupportedOperationException {
247                    throw new UnsupportedOperationException("Cannot remove from this Sequence");
248                }
249            };
250        }
251    
252        
253        public void setCompoundSet(CompoundSet<C> compoundSet) {
254            throw new UnsupportedOperationException();
255        }
256    
257        
258        public void setContents(String sequence) {
259            throw new UnsupportedOperationException();
260        }
261    
262        
263        public int countCompounds(C... compounds) {
264            return SequenceMixin.countCompounds(this, compounds);
265        }
266    
267        
268        public AccessionID getAccession() throws UnsupportedOperationException {
269            throw new UnsupportedOperationException();
270        }
271    
272        
273        public List<C> getAsList() {
274            return SequenceMixin.toList(this);
275        }
276    
277        
278        public int getIndexOf(C compound) {
279            return SequenceMixin.indexOf(this, compound);
280        }
281    
282        
283        public int getLastIndexOf(C compound) {
284            return SequenceMixin.lastIndexOf(this, compound);
285        }
286    
287        
288        public String getSequenceAsString() {
289            return SequenceMixin.toStringBuilder(this).toString();
290        }
291    
292        
293        public SequenceView<C> getSubSequence(Integer start, Integer end) {
294            return SequenceMixin.createSubSequence(this, start, end);
295        }
296    
297        @Override
298        public SequenceView<C> getInverse() {
299            return SequenceMixin.inverse(this);
300        }
301    }