001 /*
002 * BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence. This should
006 * be distributed with the code. If you do not have a copy,
007 * see:
008 *
009 * http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors. These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 * http://www.biojava.org/
019 *
020 * Created on 01-21-2010
021 *
022 * @author Richard Holland
023 * @auther Scooter Willis
024 *
025 */
026 package org.biojava3.core.sequence.template;
027
028 import java.util.ArrayList;
029 import java.util.Collection;
030 import java.util.Collections;
031 import java.util.Iterator;
032 import java.util.LinkedHashMap;
033 import java.util.List;
034
035 import org.biojava3.core.sequence.AccessionID;
036 import org.biojava3.core.sequence.Strand;
037 import org.biojava3.core.sequence.TaxonomyID;
038 import org.biojava3.core.sequence.features.AbstractFeature;
039 import org.biojava3.core.sequence.features.DatabaseReferenceInterface;
040 import org.biojava3.core.sequence.features.FeatureInterface;
041 import org.biojava3.core.sequence.features.FeaturesKeyWordInterface;
042 import org.biojava3.core.sequence.location.SequenceLocation;
043 import org.biojava3.core.sequence.location.SimpleLocation;
044 import org.biojava3.core.sequence.location.template.Location;
045 import org.biojava3.core.sequence.storage.ArrayListSequenceReader;
046
047 /**
048 *
049 * The base class for DNA, RNA and Protein sequences.
050 * @param <C>
051 */
052 public abstract class AbstractSequence<C extends Compound> implements Sequence<C> {
053
054 private TaxonomyID taxonomy;
055 private AccessionID accession;
056 private SequenceReader<C> sequenceStorage = null;
057 private CompoundSet<C> compoundSet;
058 private AnnotationType annotationType = AnnotationType.UNKNOWN;
059 private String description;
060 private String originalHeader;
061 private Collection<Object> userCollection;
062 private Integer bioBegin = null;
063 private Integer bioEnd = null;
064 private AbstractSequence<C> parentSequence = null;
065 private String source = null;
066 private ArrayList<String> notesList = new ArrayList<String>();
067 private Double sequenceScore = null;
068 private FeaturesKeyWordInterface featuresKeyWord = null;
069 private DatabaseReferenceInterface databaseReferences = null;
070 private ArrayList<FeatureInterface<AbstractSequence<C>, C>> features =
071 new ArrayList<FeatureInterface<AbstractSequence<C>, C>>();
072 private LinkedHashMap<String, ArrayList<FeatureInterface<AbstractSequence<C>, C>>> groupedFeatures =
073 new LinkedHashMap<String, ArrayList<FeatureInterface<AbstractSequence<C>, C>>>();
074
075 public AbstractSequence() {
076 }
077
078 /**
079 * Create a Sequence from a simple string where the values should be found in compoundSet
080 * @param seqString
081 * @param compoundSet
082 */
083 public AbstractSequence(String seqString, CompoundSet<C> compoundSet) {
084 setCompoundSet(compoundSet);
085 sequenceStorage = new ArrayListSequenceReader<C>();
086 sequenceStorage.setCompoundSet(this.getCompoundSet());
087 sequenceStorage.setContents(seqString);
088 }
089
090 /**
091 * A ProxySequenceReader allows abstraction of both the storage of the sequence data and the location
092 * of the sequence data. A variety of use cases are possible. A ProxySequenceReader that knows the offset and of teh sequence in
093 * a large fasta file. A ProxySequenceReader that can pull Sequence data from Uniprot, NCBI or a custom database.
094 * If the ProxySequecneReader implements various interfaces then the sequence will set those interfaces so that calls to
095 * various methods will be valid.
096 *
097 * @param proxyLoader
098 * @param compoundSet
099 */
100 public AbstractSequence(ProxySequenceReader<C> proxyLoader, CompoundSet<C> compoundSet) {
101 setCompoundSet(compoundSet);
102 setProxySequenceReader(proxyLoader);
103 }
104
105 /**
106 * Very important method that allows external mappings of sequence data and features. This method
107 * will gain additional interface inspection that allows external data sources with knowledge
108 * of features for a sequence to be supported.
109 *
110 * @param proxyLoader
111 */
112 public void setProxySequenceReader(ProxySequenceReader<C> proxyLoader) {
113 this.sequenceStorage = proxyLoader;
114 if (proxyLoader instanceof FeaturesKeyWordInterface) {
115 this.setFeaturesKeyWord((FeaturesKeyWordInterface) sequenceStorage);
116 }
117 if (proxyLoader instanceof DatabaseReferenceInterface) {
118 this.setDatabaseReferences((DatabaseReferenceInterface) sequenceStorage);
119 }
120 }
121
122 public ProxySequenceReader<C> getProxySequenceReader() {
123 return (ProxySequenceReader<C>) sequenceStorage;
124 }
125
126 /**
127 * @return the bioBegin
128 */
129 public Integer getBioBegin() {
130 if (bioBegin == null) {
131 return 1;
132 } else {
133 return bioBegin;
134 }
135 }
136
137 /**
138 * @param bioBegin the bioBegin to set
139 */
140 public void setBioBegin(Integer begin) {
141 this.bioBegin = begin;
142 }
143
144 /**
145 * @return the bioEnd
146 */
147 public Integer getBioEnd() {
148 if (bioEnd == null) {
149 return this.getLength();
150 } else {
151 return bioEnd;
152 }
153 }
154
155 /**
156 * @param bioEnd the bioEnd to set
157 */
158 public void setBioEnd(Integer end) {
159 this.bioEnd = end;
160 }
161
162 /**
163 * Provided for convience if the developer needs to associate data with a sequence
164 *
165 * @return
166 */
167 public Collection<Object> getUserCollection() {
168
169 return userCollection;
170 }
171
172 /**
173 *
174 * @param userCollection
175 */
176 public void setUserCollection(Collection<Object> userCollection) {
177 this.userCollection = userCollection;
178 }
179
180 /**
181 * @return the annotation
182 */
183 public AnnotationType getAnnotationType() {
184 return annotationType;
185 }
186
187 /**
188 * @param annotation the annotation to set
189 */
190 public void setAnnotationType(AnnotationType annotationType) {
191 this.annotationType = annotationType;
192 }
193
194 /**
195 * @return the description
196 */
197 public String getDescription() {
198 return description;
199 }
200
201 /**
202 * @param description the description to set
203 */
204 public void setDescription(String description) {
205 this.description = description;
206 }
207
208 /**
209 * @return the originalHeader
210 */
211 public String getOriginalHeader() {
212 return originalHeader;
213 }
214
215 /**
216 * @param originalHeader the originalHeader to set
217 */
218 public void setOriginalHeader(String originalHeader) {
219 this.originalHeader = originalHeader;
220 }
221
222 /**
223 * @return the parentSequence
224 */
225 public AbstractSequence<C> getParentSequence() {
226 return parentSequence;
227 }
228
229 /**
230 * @param parentSequence the parentSequence to set
231 */
232 public void setParentSequence(AbstractSequence<C> parentSequence) {
233 this.parentSequence = parentSequence;
234 }
235
236 /**
237 * Added support for the source of this sequence for GFF3 export
238 * If a sub sequence doesn't have source then check for parent source
239 * @return the source
240 */
241 public String getSource() {
242 if (source != null) {
243 return source;
244 }
245 if (parentSequence != null) {
246 return parentSequence.getSource();
247 }
248 return null;
249 }
250
251 /**
252 * Added support for the source of this sequence for GFF3 export
253 * @param source the source to set
254 */
255 public void setSource(String source) {
256
257 this.source = source;
258 }
259
260 /**
261 * Add notes about this sequence that will get exported for GFF3
262 * @param note
263 */
264 public void addNote(String note) {
265 notesList.add(note);
266 }
267
268 public void removeNote(String note) {
269 notesList.remove(note);
270 }
271
272 /**
273 * @return the notesList
274 */
275 public ArrayList<String> getNotesList() {
276 return notesList;
277 }
278
279 /**
280 * @param notesList the notesList to set
281 */
282 public void setNotesList(ArrayList<String> notesList) {
283 this.notesList = notesList;
284 }
285
286 /**
287 * Provide place holder for a metric that indicate a score associated with the sequence
288 * @return the sequenceScore
289 */
290 public Double getSequenceScore() {
291 return sequenceScore;
292 }
293
294 /**
295 * @param sequenceScore the sequenceScore to set
296 */
297 public void setSequenceScore(Double sequenceScore) {
298 this.sequenceScore = sequenceScore;
299 }
300
301 /**
302 * Return features at a sequence position by type
303 * @param featureType
304 * @param bioSequencePosition
305 * @return
306 */
307 public List<FeatureInterface<AbstractSequence<C>, C>> getFeatures(String featureType, int bioSequencePosition) {
308 ArrayList<FeatureInterface<AbstractSequence<C>, C>> featureHits =
309 new ArrayList<FeatureInterface<AbstractSequence<C>, C>>();
310 List<FeatureInterface<AbstractSequence<C>, C>> features = getFeaturesByType(featureType);
311 if (features != null) {
312 for (FeatureInterface<AbstractSequence<C>, C> feature : features) {
313 if (bioSequencePosition >= feature.getLocations().getStart().getPosition() && bioSequencePosition <= feature.getLocations().getEnd().getPosition()) {
314 featureHits.add(feature);
315 }
316 }
317 }
318 return featureHits;
319 }
320
321 /**
322 * Return features at a sequence position
323 * @param featureType
324 * @param bioSequencePosition
325 * @return
326 */
327 public List<FeatureInterface<AbstractSequence<C>, C>> getFeatures(int bioSequencePosition) {
328 ArrayList<FeatureInterface<AbstractSequence<C>, C>> featureHits =
329 new ArrayList<FeatureInterface<AbstractSequence<C>, C>>();
330 if (features != null) {
331 for (FeatureInterface<AbstractSequence<C>, C> feature : features) {
332 if (bioSequencePosition >= feature.getLocations().getStart().getPosition() && bioSequencePosition <= feature.getLocations().getEnd().getPosition()) {
333 featureHits.add(feature);
334 }
335 }
336 }
337 return featureHits;
338 }
339
340 /**
341 *
342 * @return
343 */
344 public List<FeatureInterface<AbstractSequence<C>, C>> getFeatures() {
345 return features;
346 }
347
348 /**
349 * Method to help set the proper details for a feature as it relates to a sequence
350 * where the feature needs to have a location on the sequence
351 * @param bioStart
352 * @param bioEnd
353 * @param feature
354 */
355 public void addFeature(int bioStart, int bioEnd, FeatureInterface<AbstractSequence<C>, C> feature) {
356 SequenceLocation<AbstractSequence<C>, C> sequenceLocation =
357 new SequenceLocation<AbstractSequence<C>, C>(bioStart, bioEnd, this);
358 feature.setLocation(sequenceLocation);
359 addFeature(feature);
360 }
361
362 /**
363 * Add a feature to this sequence. The feature will be added to the collection where the order is start position and if more than
364 * one feature at the same start position then longest is added first. This helps on doing feature layout for displaying features
365 * in SequenceFeaturePanel
366 * @param feature
367 */
368 public void addFeature(FeatureInterface<AbstractSequence<C>, C> feature) {
369 features.add(feature);
370 ArrayList<FeatureInterface<AbstractSequence<C>, C>> featureList = groupedFeatures.get(feature.getType());
371 if (featureList == null) {
372 featureList = new ArrayList<FeatureInterface<AbstractSequence<C>, C>>();
373 groupedFeatures.put(feature.getType(), featureList);
374 }
375 featureList.add(feature);
376 Collections.sort(features, AbstractFeature.LOCATION_LENGTH);
377 Collections.sort(featureList, AbstractFeature.LOCATION_LENGTH);
378 }
379
380 /**
381 * Remove a feature from the sequence
382 * @param feature
383 */
384 public void removeFeature(FeatureInterface<AbstractSequence<C>, C> feature) {
385 features.remove(feature);
386 ArrayList<FeatureInterface<AbstractSequence<C>, C>> featureList = groupedFeatures.get(feature.getType());
387 if (featureList != null) {
388 featureList.remove(feature);
389 if (featureList.isEmpty()) {
390 groupedFeatures.remove(feature.getType());
391 }
392 }
393 }
394
395 /**
396 *
397 * @param type
398 * @return
399 */
400 public List<FeatureInterface<AbstractSequence<C>, C>> getFeaturesByType(String type) {
401 List<FeatureInterface<AbstractSequence<C>, C>> features = groupedFeatures.get(type);
402 if (features == null) {
403 features = new ArrayList<FeatureInterface<AbstractSequence<C>, C>>();
404 }
405 return features;
406 }
407
408 /**
409 * @return the featuresKeyWord
410 */
411 public FeaturesKeyWordInterface getFeaturesKeyWord() {
412 return featuresKeyWord;
413 }
414
415 /**
416 * @param featuresKeyWord the featuresKeyWord to set
417 */
418 public void setFeaturesKeyWord(FeaturesKeyWordInterface featuresKeyWord) {
419 this.featuresKeyWord = featuresKeyWord;
420 }
421
422 /**
423 * @return the databaseReferences
424 */
425 public DatabaseReferenceInterface getDatabaseReferences() {
426 return databaseReferences;
427 }
428
429 /**
430 * @param databaseReferences the databaseReferences to set
431 */
432 public void setDatabaseReferences(DatabaseReferenceInterface databaseReferences) {
433 this.databaseReferences = databaseReferences;
434 }
435
436 public enum AnnotationType {
437
438 CURATED, PREDICTED, UNKNOWN;
439 }
440
441 /**
442 * @return the accession
443 */
444 public AccessionID getAccession() {
445 return accession;
446 }
447
448 /**
449 * @param accession the accession to set
450 */
451 public void setAccession(AccessionID accession) {
452 this.accession = accession;
453 }
454
455 /**
456 * @return the species
457 */
458 public TaxonomyID getTaxonomy() {
459 return taxonomy;
460 }
461
462 /**
463 * @param species the species to set
464 */
465 public void setTaxonomy(TaxonomyID taxonomy) {
466 this.taxonomy = taxonomy;
467 }
468
469 public CompoundSet<C> getCompoundSet() {
470 if (compoundSet != null) {
471 return compoundSet;
472 }
473 if (parentSequence != null) {
474 return parentSequence.getCompoundSet();
475 }
476 return null;
477
478
479 }
480
481 public void setCompoundSet(CompoundSet<C> compoundSet) {
482 this.compoundSet = compoundSet;
483 }
484
485 @Override
486 public String toString() {
487 return getSequenceAsString();
488 }
489
490 private SequenceReader<C> getSequenceStorage() {
491 if (sequenceStorage != null) {
492 return sequenceStorage;
493 }
494 if (parentSequence != null) {
495 return parentSequence.getSequenceStorage();
496 }
497 return null;
498 }
499
500 /**
501 *
502 * @param begin
503 * @param end
504 * @param strand
505 * @return
506 */
507 public String getSequenceAsString(Integer bioStart, Integer bioEnd, Strand strand) {
508 Location loc = new SimpleLocation(bioStart, bioEnd, strand);
509 return loc.getSubSequence(this).getSequenceAsString();
510 }
511
512 /**
513 * Default case is to assume strand is positive because only CDSSequence can be either positive or negative Strand.
514 * @return
515 */
516 public String getSequenceAsString() {
517 return SequenceMixin.toString(this);
518
519 }
520
521 /**
522 *
523 * @return
524 */
525 public List<C> getAsList() {
526 return SequenceMixin.toList(this);
527 }
528
529 /**
530 *
531 * @param position
532 * @return
533 */
534 public C getCompoundAt(int position) {
535 return getSequenceStorage().getCompoundAt(position);
536 }
537
538 /**
539 *
540 * @param compound
541 * @return
542 */
543 public int getIndexOf(C compound) {
544 return getSequenceStorage().getIndexOf(compound);
545 }
546
547 /**
548 *
549 * @param compound
550 * @return
551 */
552 public int getLastIndexOf(C compound) {
553 return getSequenceStorage().getLastIndexOf(compound);
554 }
555
556 /**
557 *
558 * @return
559 */
560 public int getLength() {
561 return getSequenceStorage().getLength();
562 }
563
564 /**
565 *
566 * @param bioStart
567 * @param bioEnd
568 * @return
569 */
570 public SequenceView<C> getSubSequence(final Integer bioStart, final Integer bioEnd) {
571 return new SequenceProxyView<C>(this, bioStart, bioEnd);
572 }
573
574 /**
575 *
576 * @return
577 */
578 public Iterator<C> iterator() {
579 return getSequenceStorage().iterator();
580 }
581
582 /**
583 *
584 * @param compounds
585 * @return
586 */
587 public int countCompounds(C... compounds) {
588 return SequenceMixin.countCompounds(this, compounds);
589 }
590
591 /**
592 *
593 * @return
594 */
595 @Override
596 public SequenceView<C> getInverse() {
597 return SequenceMixin.inverse(this);
598 }
599 }