001    /*
002     *                    BioJava development code
003     *
004     * This code may be freely distributed and modified under the
005     * terms of the GNU Lesser General Public Licence.  This should
006     * be distributed with the code.  If you do not have a copy,
007     * see:
008     *
009     *      http://www.gnu.org/copyleft/lesser.html
010     *
011     * Copyright for this code is held jointly by the individual
012     * authors.  These should be listed in @author doc comments.
013     *
014     * For more information on the BioJava project and its aims,
015     * or to join the biojava-l mailing list, visit the home page
016     * at:
017     *
018     *      http://www.biojava.org/
019     *
020     * Created on 01-21-2010
021     *
022     * @author Richard Holland
023     * @auther Scooter Willis
024     *
025     */
026    package org.biojava3.core.sequence.template;
027    
028    import java.util.ArrayList;
029    import java.util.Collection;
030    import java.util.Collections;
031    import java.util.Iterator;
032    import java.util.LinkedHashMap;
033    import java.util.List;
034    
035    import org.biojava3.core.sequence.AccessionID;
036    import org.biojava3.core.sequence.Strand;
037    import org.biojava3.core.sequence.TaxonomyID;
038    import org.biojava3.core.sequence.features.AbstractFeature;
039    import org.biojava3.core.sequence.features.DatabaseReferenceInterface;
040    import org.biojava3.core.sequence.features.FeatureInterface;
041    import org.biojava3.core.sequence.features.FeaturesKeyWordInterface;
042    import org.biojava3.core.sequence.location.SequenceLocation;
043    import org.biojava3.core.sequence.location.SimpleLocation;
044    import org.biojava3.core.sequence.location.template.Location;
045    import org.biojava3.core.sequence.storage.ArrayListSequenceReader;
046    
047    /**
048     *
049     * The base class for DNA, RNA and Protein sequences.
050     * @param <C>
051     */
052    public abstract class AbstractSequence<C extends Compound> implements Sequence<C> {
053    
054        private TaxonomyID taxonomy;
055        private AccessionID accession;
056        private SequenceReader<C> sequenceStorage = null;
057        private CompoundSet<C> compoundSet;
058        private AnnotationType annotationType = AnnotationType.UNKNOWN;
059        private String description;
060        private String originalHeader;
061        private Collection<Object> userCollection;
062        private Integer bioBegin = null;
063        private Integer bioEnd = null;
064        private AbstractSequence<C> parentSequence = null;
065        private String source = null;
066        private ArrayList<String> notesList = new ArrayList<String>();
067        private Double sequenceScore = null;
068        private FeaturesKeyWordInterface featuresKeyWord = null;
069        private DatabaseReferenceInterface databaseReferences = null;
070        private ArrayList<FeatureInterface<AbstractSequence<C>, C>> features =
071                new ArrayList<FeatureInterface<AbstractSequence<C>, C>>();
072        private LinkedHashMap<String, ArrayList<FeatureInterface<AbstractSequence<C>, C>>> groupedFeatures =
073                new LinkedHashMap<String, ArrayList<FeatureInterface<AbstractSequence<C>, C>>>();
074    
075        public AbstractSequence() {
076        }
077    
078        /**
079         * Create a Sequence from a simple string where the values should be found in compoundSet
080         * @param seqString
081         * @param compoundSet
082         */
083        public AbstractSequence(String seqString, CompoundSet<C> compoundSet) {
084            setCompoundSet(compoundSet);
085            sequenceStorage = new ArrayListSequenceReader<C>();
086            sequenceStorage.setCompoundSet(this.getCompoundSet());
087            sequenceStorage.setContents(seqString);
088        }
089    
090        /**
091         * A ProxySequenceReader allows abstraction of both the storage of the sequence data and the location
092         * of the sequence data. A variety of use cases are possible. A ProxySequenceReader that knows the offset and of teh sequence in
093         * a large fasta file. A ProxySequenceReader that can pull Sequence data from Uniprot, NCBI or a custom database.
094         * If the ProxySequecneReader implements various interfaces then the sequence will set those interfaces so that calls to
095         * various methods will be valid.
096         *
097         * @param proxyLoader
098         * @param compoundSet
099         */
100        public AbstractSequence(ProxySequenceReader<C> proxyLoader, CompoundSet<C> compoundSet) {
101            setCompoundSet(compoundSet);
102            setProxySequenceReader(proxyLoader);
103        }
104    
105        /**
106         * Very important method that allows external mappings of sequence data and features. This method
107         * will gain additional interface inspection that allows external data sources with knowledge
108         * of features for a sequence to be supported. 
109         *  
110         * @param proxyLoader
111         */
112        public void setProxySequenceReader(ProxySequenceReader<C> proxyLoader) {
113            this.sequenceStorage = proxyLoader;
114            if (proxyLoader instanceof FeaturesKeyWordInterface) {
115                this.setFeaturesKeyWord((FeaturesKeyWordInterface) sequenceStorage);
116            }
117            if (proxyLoader instanceof DatabaseReferenceInterface) {
118                this.setDatabaseReferences((DatabaseReferenceInterface) sequenceStorage);
119            }
120        }
121    
122        public ProxySequenceReader<C> getProxySequenceReader() {
123            return (ProxySequenceReader<C>) sequenceStorage;
124        }
125    
126        /**
127         * @return the bioBegin
128         */
129        public Integer getBioBegin() {
130            if (bioBegin == null) {
131                return 1;
132            } else {
133                return bioBegin;
134            }
135        }
136    
137        /**
138         * @param bioBegin the bioBegin to set
139         */
140        public void setBioBegin(Integer begin) {
141            this.bioBegin = begin;
142        }
143    
144        /**
145         * @return the bioEnd
146         */
147        public Integer getBioEnd() {
148            if (bioEnd == null) {
149                return this.getLength();
150            } else {
151                return bioEnd;
152            }
153        }
154    
155        /**
156         * @param bioEnd the bioEnd to set
157         */
158        public void setBioEnd(Integer end) {
159            this.bioEnd = end;
160        }
161    
162        /**
163         * Provided for convience if the developer needs to associate data with a sequence
164         *
165         * @return
166         */
167        public Collection<Object> getUserCollection() {
168    
169            return userCollection;
170        }
171    
172        /**
173         *
174         * @param userCollection
175         */
176        public void setUserCollection(Collection<Object> userCollection) {
177            this.userCollection = userCollection;
178        }
179    
180        /**
181         * @return the annotation
182         */
183        public AnnotationType getAnnotationType() {
184            return annotationType;
185        }
186    
187        /**
188         * @param annotation the annotation to set
189         */
190        public void setAnnotationType(AnnotationType annotationType) {
191            this.annotationType = annotationType;
192        }
193    
194        /**
195         * @return the description
196         */
197        public String getDescription() {
198            return description;
199        }
200    
201        /**
202         * @param description the description to set
203         */
204        public void setDescription(String description) {
205            this.description = description;
206        }
207    
208        /**
209         * @return the originalHeader
210         */
211        public String getOriginalHeader() {
212            return originalHeader;
213        }
214    
215        /**
216         * @param originalHeader the originalHeader to set
217         */
218        public void setOriginalHeader(String originalHeader) {
219            this.originalHeader = originalHeader;
220        }
221    
222        /**
223         * @return the parentSequence
224         */
225        public AbstractSequence<C> getParentSequence() {
226            return parentSequence;
227        }
228    
229        /**
230         * @param parentSequence the parentSequence to set
231         */
232        public void setParentSequence(AbstractSequence<C> parentSequence) {
233            this.parentSequence = parentSequence;
234        }
235    
236        /**
237         * Added support for the source of this sequence for GFF3 export
238         * If a sub sequence doesn't have  source then check for parent source
239         * @return the source
240         */
241        public String getSource() {
242            if (source != null) {
243                return source;
244            }
245            if (parentSequence != null) {
246                return parentSequence.getSource();
247            }
248            return null;
249        }
250    
251        /**
252         * Added support for the source of this sequence for GFF3 export
253         * @param source the source to set
254         */
255        public void setSource(String source) {
256    
257            this.source = source;
258        }
259    
260        /**
261         * Add notes about this sequence that will get exported for GFF3
262         * @param note
263         */
264        public void addNote(String note) {
265            notesList.add(note);
266        }
267    
268        public void removeNote(String note) {
269            notesList.remove(note);
270        }
271    
272        /**
273         * @return the notesList
274         */
275        public ArrayList<String> getNotesList() {
276            return notesList;
277        }
278    
279        /**
280         * @param notesList the notesList to set
281         */
282        public void setNotesList(ArrayList<String> notesList) {
283            this.notesList = notesList;
284        }
285    
286        /**
287         * Provide place holder for a metric that indicate a score associated with the sequence
288         * @return the sequenceScore
289         */
290        public Double getSequenceScore() {
291            return sequenceScore;
292        }
293    
294        /**
295         * @param sequenceScore the sequenceScore to set
296         */
297        public void setSequenceScore(Double sequenceScore) {
298            this.sequenceScore = sequenceScore;
299        }
300    
301        /**
302         * Return features at a sequence position by type
303         * @param featureType
304         * @param bioSequencePosition
305         * @return
306         */
307        public List<FeatureInterface<AbstractSequence<C>, C>> getFeatures(String featureType, int bioSequencePosition) {
308            ArrayList<FeatureInterface<AbstractSequence<C>, C>> featureHits =
309                    new ArrayList<FeatureInterface<AbstractSequence<C>, C>>();
310            List<FeatureInterface<AbstractSequence<C>, C>> features = getFeaturesByType(featureType);
311            if (features != null) {
312                for (FeatureInterface<AbstractSequence<C>, C> feature : features) {
313                    if (bioSequencePosition >= feature.getLocations().getStart().getPosition() && bioSequencePosition <= feature.getLocations().getEnd().getPosition()) {
314                        featureHits.add(feature);
315                    }
316                }
317            }
318            return featureHits;
319        }
320    
321        /**
322         * Return features at a sequence position
323         * @param featureType
324         * @param bioSequencePosition
325         * @return
326         */
327        public List<FeatureInterface<AbstractSequence<C>, C>> getFeatures(int bioSequencePosition) {
328            ArrayList<FeatureInterface<AbstractSequence<C>, C>> featureHits =
329                    new ArrayList<FeatureInterface<AbstractSequence<C>, C>>();
330            if (features != null) {
331                for (FeatureInterface<AbstractSequence<C>, C> feature : features) {
332                    if (bioSequencePosition >= feature.getLocations().getStart().getPosition() && bioSequencePosition <= feature.getLocations().getEnd().getPosition()) {
333                        featureHits.add(feature);
334                    }
335                }
336            }
337            return featureHits;
338        }
339    
340        /**
341         *
342         * @return
343         */
344        public List<FeatureInterface<AbstractSequence<C>, C>> getFeatures() {
345            return features;
346        }
347    
348        /**
349         * Method to help set the proper details for a feature as it relates to a sequence
350         * where the feature needs to have a location on the sequence
351         * @param bioStart
352         * @param bioEnd
353         * @param feature
354         */
355        public void addFeature(int bioStart, int bioEnd, FeatureInterface<AbstractSequence<C>, C> feature) {
356            SequenceLocation<AbstractSequence<C>, C> sequenceLocation =
357                    new SequenceLocation<AbstractSequence<C>, C>(bioStart, bioEnd, this);
358            feature.setLocation(sequenceLocation);
359            addFeature(feature);
360        }
361    
362        /**
363         * Add a feature to this sequence. The feature will be added to the collection where the order is start position and if more than
364         * one feature at the same start position then longest is added first. This helps on doing feature layout for displaying features
365         * in SequenceFeaturePanel
366         * @param feature
367         */
368        public void addFeature(FeatureInterface<AbstractSequence<C>, C> feature) {
369            features.add(feature);
370            ArrayList<FeatureInterface<AbstractSequence<C>, C>> featureList = groupedFeatures.get(feature.getType());
371            if (featureList == null) {
372                featureList = new ArrayList<FeatureInterface<AbstractSequence<C>, C>>();
373                groupedFeatures.put(feature.getType(), featureList);
374            }
375            featureList.add(feature);
376            Collections.sort(features, AbstractFeature.LOCATION_LENGTH);
377            Collections.sort(featureList, AbstractFeature.LOCATION_LENGTH);
378        }
379    
380        /**
381         * Remove a feature from the sequence
382         * @param feature
383         */
384        public void removeFeature(FeatureInterface<AbstractSequence<C>, C> feature) {
385            features.remove(feature);
386            ArrayList<FeatureInterface<AbstractSequence<C>, C>> featureList = groupedFeatures.get(feature.getType());
387            if (featureList != null) {
388                featureList.remove(feature);
389                if (featureList.isEmpty()) {
390                    groupedFeatures.remove(feature.getType());
391                }
392            }
393        }
394    
395        /**
396         *
397         * @param type
398         * @return
399         */
400        public List<FeatureInterface<AbstractSequence<C>, C>> getFeaturesByType(String type) {
401            List<FeatureInterface<AbstractSequence<C>, C>> features = groupedFeatures.get(type);
402            if (features == null) {
403                features = new ArrayList<FeatureInterface<AbstractSequence<C>, C>>();
404            }
405            return features;
406        }
407    
408        /**
409         * @return the featuresKeyWord
410         */
411        public FeaturesKeyWordInterface getFeaturesKeyWord() {
412            return featuresKeyWord;
413        }
414    
415        /**
416         * @param featuresKeyWord the featuresKeyWord to set
417         */
418        public void setFeaturesKeyWord(FeaturesKeyWordInterface featuresKeyWord) {
419            this.featuresKeyWord = featuresKeyWord;
420        }
421    
422        /**
423         * @return the databaseReferences
424         */
425        public DatabaseReferenceInterface getDatabaseReferences() {
426            return databaseReferences;
427        }
428    
429        /**
430         * @param databaseReferences the databaseReferences to set
431         */
432        public void setDatabaseReferences(DatabaseReferenceInterface databaseReferences) {
433            this.databaseReferences = databaseReferences;
434        }
435    
436        public enum AnnotationType {
437    
438            CURATED, PREDICTED, UNKNOWN;
439        }
440    
441        /**
442         * @return the accession
443         */
444        public AccessionID getAccession() {
445            return accession;
446        }
447    
448        /**
449         * @param accession the accession to set
450         */
451        public void setAccession(AccessionID accession) {
452            this.accession = accession;
453        }
454    
455        /**
456         * @return the species
457         */
458        public TaxonomyID getTaxonomy() {
459            return taxonomy;
460        }
461    
462        /**
463         * @param species the species to set
464         */
465        public void setTaxonomy(TaxonomyID taxonomy) {
466            this.taxonomy = taxonomy;
467        }
468    
469        public CompoundSet<C> getCompoundSet() {
470            if (compoundSet != null) {
471                return compoundSet;
472            }
473            if (parentSequence != null) {
474                return parentSequence.getCompoundSet();
475            }
476            return null;
477    
478    
479        }
480    
481        public void setCompoundSet(CompoundSet<C> compoundSet) {
482            this.compoundSet = compoundSet;
483        }
484    
485        @Override
486        public String toString() {
487            return getSequenceAsString();
488        }
489    
490        private SequenceReader<C> getSequenceStorage() {
491            if (sequenceStorage != null) {
492                return sequenceStorage;
493            }
494            if (parentSequence != null) {
495                return parentSequence.getSequenceStorage();
496            }
497            return null;
498        }
499    
500        /**
501         *
502         * @param begin
503         * @param end
504         * @param strand 
505         * @return
506         */
507        public String getSequenceAsString(Integer bioStart, Integer bioEnd, Strand strand) {
508            Location loc = new SimpleLocation(bioStart, bioEnd, strand);
509            return loc.getSubSequence(this).getSequenceAsString();
510        }
511    
512        /**
513         * Default case is to assume strand is positive because only CDSSequence can be either positive or negative Strand.
514         * @return
515         */
516        public String getSequenceAsString() {
517            return SequenceMixin.toString(this);
518    
519        }
520    
521        /**
522         *
523         * @return
524         */
525        public List<C> getAsList() {
526            return SequenceMixin.toList(this);
527        }
528    
529        /**
530         *
531         * @param position
532         * @return
533         */
534        public C getCompoundAt(int position) {
535            return getSequenceStorage().getCompoundAt(position);
536        }
537    
538        /**
539         *
540         * @param compound
541         * @return
542         */
543        public int getIndexOf(C compound) {
544            return getSequenceStorage().getIndexOf(compound);
545        }
546    
547        /**
548         *
549         * @param compound
550         * @return
551         */
552        public int getLastIndexOf(C compound) {
553            return getSequenceStorage().getLastIndexOf(compound);
554        }
555    
556        /**
557         *
558         * @return
559         */
560        public int getLength() {
561            return getSequenceStorage().getLength();
562        }
563    
564        /**
565         *
566         * @param bioStart
567         * @param bioEnd
568         * @return
569         */
570        public SequenceView<C> getSubSequence(final Integer bioStart, final Integer bioEnd) {
571            return new SequenceProxyView<C>(this, bioStart, bioEnd);
572        }
573    
574        /**
575         *
576         * @return
577         */
578        public Iterator<C> iterator() {
579            return getSequenceStorage().iterator();
580        }
581    
582        /**
583         *
584         * @param compounds
585         * @return
586         */
587        public int countCompounds(C... compounds) {
588            return SequenceMixin.countCompounds(this, compounds);
589        }
590    
591        /**
592         *
593         * @return
594         */
595        @Override
596        public SequenceView<C> getInverse() {
597            return SequenceMixin.inverse(this);
598        }
599    }