001    /*
002     *                    BioJava development code
003     *
004     * This code may be freely distributed and modified under the
005     * terms of the GNU Lesser General Public Licence.  This should
006     * be distributed with the code.  If you do not have a copy,
007     * see:
008     *
009     *      http://www.gnu.org/copyleft/lesser.html
010     *
011     * Copyright for this code is held jointly by the individual
012     * authors.  These should be listed in @author doc comments.
013     *
014     * For more information on the BioJava project and its aims,
015     * or to join the biojava-l mailing list, visit the home page
016     * at:
017     *
018     *      http://www.biojava.org/
019     *
020     * Created on 01-21-2010
021     *
022     * @author Richard Holland
023     * @auther Scooter Willis
024     *
025     */
026    package org.biojava3.core.sequence.loader;
027    
028    import java.io.File;
029    import java.io.RandomAccessFile;
030    import java.util.ArrayList;
031    import java.util.Iterator;
032    import java.util.List;
033    
034    import org.biojava3.core.sequence.template.SequenceProxyView;
035    import org.biojava3.core.sequence.template.Compound;
036    import org.biojava3.core.exceptions.CompoundNotFoundError;
037    import org.biojava3.core.exceptions.FileAccessError;
038    import org.biojava3.core.sequence.AccessionID;
039    import org.biojava3.core.sequence.Strand;
040    
041    import org.biojava3.core.sequence.io.template.SequenceParserInterface;
042    import org.biojava3.core.sequence.storage.SequenceAsStringHelper;
043    import org.biojava3.core.sequence.template.CompoundSet;
044    import org.biojava3.core.sequence.template.ProxySequenceReader;
045    import org.biojava3.core.sequence.template.SequenceMixin;
046    import org.biojava3.core.sequence.template.SequenceView;
047    
048    /**
049     * This class represents the storage container of a sequence stored in a fasta file where
050     * the initial parsing of the file we store the offset and length of the sequence. When a call
051     * is made to any method that needs sequence data then the file will be opened and the sequence
052     * loaded. This class could be improved by using the hints or a some algorithm that indicates
053     * the sequence data once loaded should stay loaded. Could keep track of the last time sequence
054     * data was loaded and then after X amount of time clear the contents to free up memory.
055     *
056     *
057     * @author Scooter Willis <willishf at gmail dot com>
058     * @param <C>
059     */
060    public class SequenceFileProxyLoader<C extends Compound> implements ProxySequenceReader<C> {
061    
062        SequenceParserInterface sequenceParser;
063        private CompoundSet<C> compoundSet;
064        private List<C> parsedCompounds = new ArrayList<C>();
065        File file;
066        long sequenceStartIndex = -1;
067        int sequenceLength = -1;
068        private boolean initialized = false;
069    
070        /**
071         *
072         * @param file The file where the sequence will be found
073         * @param sequenceParser The parser to use to load the sequence
074         * @param sequenceStartIndex The file offset to the start of the sequence
075         * @param sequenceLength The length of the sequence
076         * @param compoundSet
077         */
078        public SequenceFileProxyLoader(File file, SequenceParserInterface sequenceParser, long sequenceStartIndex, int sequenceLength, CompoundSet<C> compoundSet) {
079            this.sequenceParser = sequenceParser;
080            this.file = file;
081            this.sequenceStartIndex = sequenceStartIndex;
082            this.sequenceLength = sequenceLength;
083            setCompoundSet(compoundSet);
084        }
085    
086        /**
087         *
088         * @param compoundSet
089         */
090        public void setCompoundSet(CompoundSet<C> compoundSet) {
091            this.compoundSet = compoundSet;
092        }
093    
094        /**
095         *  Load the sequence
096         * @return
097         */
098        private boolean init() {
099            try {
100                RandomAccessFile randomAccessFile = new RandomAccessFile(file, "r");
101                randomAccessFile.seek(sequenceStartIndex);
102                String sequence = sequenceParser.getSequence(randomAccessFile, sequenceLength);
103                setContents(sequence);
104            } catch (Exception e) {
105                throw new FileAccessError("Error accessing " + file + " offset=" + sequenceStartIndex + " sequenceLength=" + sequenceLength + " " + e.toString());
106            }
107            return true;
108        }
109    
110        /**
111         *
112         * @param sequence
113         */
114        public void setContents(String sequence) {
115            // Horrendously inefficient - pretty much the way the old BJ did things.
116            // TODO Should be optimised.
117            this.parsedCompounds.clear();
118            for (int i = 0; i < sequence.length();) {
119                String compoundStr = null;
120                C compound = null;
121                for (int compoundStrLength = 1; compound == null && compoundStrLength <= compoundSet.getMaxSingleCompoundStringLength(); compoundStrLength++) {
122                    compoundStr = sequence.substring(i, i + compoundStrLength);
123                    compound = compoundSet.getCompoundForString(compoundStr);
124                }
125                if (compound == null) {
126                    throw new CompoundNotFoundError(compoundStr);
127                } else {
128                    i += compoundStr.length();
129                }
130                this.parsedCompounds.add(compound);
131            }
132    
133            setInitialized(true);
134        }
135    
136        /**
137         *
138         * @return
139         */
140        public int getLength() {
141            return sequenceLength;
142        }
143    
144        /**
145         *
146         * @param position
147         * @return
148         */
149        public C getCompoundAt(int position) {
150            if (this.isInitialized() == false) {
151                init();
152            }
153            return this.parsedCompounds.get(position - 1);
154        }
155    
156        /**
157         *
158         * @param compound
159         * @return
160         */
161        public int getIndexOf(C compound) {
162            if (this.isInitialized() == false) {
163                init();
164            }
165            return this.parsedCompounds.indexOf(compound) + 1;
166        }
167    
168        /**
169         *
170         * @param compound
171         * @return
172         */
173        public int getLastIndexOf(C compound) {
174            if (this.isInitialized() == false) {
175                init();
176            }
177            return this.parsedCompounds.lastIndexOf(compound) + 1;
178        }
179    
180        /**
181         *
182         * @return
183         */
184        public String toString() {
185            if (this.isInitialized() == false) {
186                init();
187            }
188            return getSequenceAsString();
189        }
190    
191        /**
192         *
193         * @return
194         */
195        public String getSequenceAsString() {
196            return getSequenceAsString(1, getLength(), Strand.POSITIVE);
197        }
198    
199        /**
200         *
201         * @param bioBegin
202         * @param bioEnd
203         * @param strand
204         * @return
205         */
206        public String getSequenceAsString(Integer bioBegin, Integer bioEnd, Strand strand) {
207    
208            if (this.isInitialized() == false) {
209                init();
210            }
211            SequenceAsStringHelper<C> sequenceAsStringHelper = new SequenceAsStringHelper<C>();
212            return sequenceAsStringHelper.getSequenceAsString(this.parsedCompounds, compoundSet, bioBegin, bioEnd, strand);
213        }
214    
215        /**
216         *
217         * @return
218         */
219        public List<C> getAsList() {
220            if (this.isInitialized() == false) {
221                init();
222            }
223            return this.parsedCompounds;
224    
225        }
226    
227        /**
228         *
229         * @param bioBegin
230         * @param bioEnd
231         * @return
232         */
233        public SequenceView<C> getSubSequence(final Integer bioBegin, final Integer bioEnd) {
234            if (this.isInitialized() == false) {
235                init();
236            }
237            return new SequenceProxyView<C>(SequenceFileProxyLoader.this, bioBegin, bioEnd);
238        }
239    
240        /**
241         *
242         * @return
243         */
244        public Iterator<C> iterator() {
245            if (this.isInitialized() == false) {
246                init();
247            }
248            return this.parsedCompounds.iterator();
249        }
250    
251        /**
252         *
253         * @return
254         */
255        public CompoundSet<C> getCompoundSet() {
256            return compoundSet;
257        }
258    
259        /**
260         * @return the initialized
261         */
262        public boolean isInitialized() {
263            return initialized;
264        }
265    
266        /**
267         * @param initialized the initialized to set
268         */
269        public void setInitialized(boolean initialized) {
270            this.initialized = initialized;
271        }
272    
273        /**
274         *
275         * @return
276         */
277        public AccessionID getAccession() {
278            throw new UnsupportedOperationException("Not supported yet.");
279        }
280    
281        /**
282         *
283         * @param compounds
284         * @return
285         */
286        public int countCompounds(C... compounds) {
287            return SequenceMixin.countCompounds(this, compounds);
288        }
289    
290        /**
291         *
292         * @return
293         */
294        @Override
295        public SequenceView<C> getInverse() {
296            return SequenceMixin.inverse(this);
297        }
298    }