001    /*
002     *                    BioJava development code
003     *
004     * This code may be freely distributed and modified under the
005     * terms of the GNU Lesser General Public Licence.  This should
006     * be distributed with the code.  If you do not have a copy,
007     * see:
008     *
009     *      http://www.gnu.org/copyleft/lesser.html
010     *
011     * Copyright for this code is held jointly by the individual
012     * authors.  These should be listed in @author doc comments.
013     *
014     * For more information on the BioJava project and its aims,
015     * or to join the biojava-l mailing list, visit the home page
016     * at:
017     *
018     *      http://www.biojava.org/
019     *
020     * Created on 01-21-2010
021     */
022    package org.biojava3.core.sequence.location.template;
023    
024    import static java.lang.String.format;
025    import org.biojava3.core.sequence.AccessionID;
026    import org.biojava3.core.sequence.template.Compound;
027    import org.biojava3.core.sequence.template.Sequence;
028    import static org.biojava3.core.util.Equals.classEqual;
029    import static org.biojava3.core.util.Equals.equal;
030    
031    import java.util.ArrayList;
032    import java.util.Collections;
033    import java.util.Iterator;
034    import java.util.List;
035    
036    import org.biojava3.core.sequence.Strand;
037    import org.biojava3.core.sequence.storage.JoiningSequenceReader;
038    import org.biojava3.core.sequence.template.ComplementCompound;
039    import org.biojava3.core.sequence.template.CompoundSet;
040    import org.biojava3.core.sequence.views.ComplementSequenceView;
041    import org.biojava3.core.sequence.views.ReversedSequenceView;
042    import org.biojava3.core.util.Hashcoder;
043    
044    /**
045     * Base abstraction of a location which encodes for the majority of important
046     * features about a location such as the start, end and strand
047     *
048     * @author ayates
049     */
050    public abstract class AbstractLocation implements Location {
051    
052        //TODO Need to have the Sequence lookup resolver here; see the next one as well
053        //TODO Need a way of late binding of start/stop
054    
055        private Point start;
056        private Point end;
057        private Strand strand;
058        private List<Location> subLocations;
059        private boolean circular;
060        private boolean betweenCompounds;
061        private AccessionID accession;
062    
063    
064    
065        protected AbstractLocation() {
066            super();
067        }
068    
069        /**
070         * Default constructor
071         *
072         * @param start start of the location
073         * @param end end of the location
074         * @param strand strand it is located on
075         * @param circular Boolean which says if the current location was circular
076         * or not
077         * @param betweenCompounds Indicates the location lies at the position between
078         * a pair of bases; means the bases must be next to each other (and
079         * therefore cannot be complex)
080         * @param subLocations Sub locations which composes this location
081         */
082        public AbstractLocation(Point start, Point end, Strand strand,
083                boolean circular, boolean betweenCompounds,
084                List<Location> subLocations) {
085            this(start, end, strand, circular, betweenCompounds, null, subLocations);
086        }
087    
088        /**
089         * Default constructor
090         *
091         * @param start start of the location
092         * @param end end of the location
093         * @param strand strand it is located on
094         * @param circular Boolean which says if the current location was circular
095         * or not
096         * @param betweenCompounds Indicates the location lies at the position between
097         * a pair of bases; means the bases must be next to each other (and
098         * therefore cannot be complex)
099         * @param accession The accession ID to link this location to
100         * @param subLocations Sub locations which composes this location
101         */
102        public AbstractLocation(Point start, Point end, Strand strand,
103                boolean circular, boolean betweenCompounds, AccessionID accession,
104                List<Location> subLocations) {
105            this.start = start;
106            this.end = end;
107            this.strand = strand;
108            this.circular = circular;
109            this.betweenCompounds = betweenCompounds;
110            this.accession = accession;
111            this.subLocations = Collections.unmodifiableList(subLocations);
112            assertLocation();
113        }
114    
115        protected void assertLocation() {
116            if (isCircular() && !isComplex()) {
117                throw new IllegalStateException("Cannot have a circular "
118                        + "location which is not complex");
119            }
120    
121            int st = getStart().getPosition();
122            int e = getEnd().getPosition();
123    
124            if (st > e) {
125                throw new IllegalStateException(
126                        String.format("Start (%d) is greater than end (%d); "
127                        + "this is an incorrect format",
128                        st, e));
129            }
130    
131            if(isBetweenCompounds() && isComplex()) {
132                throw new IllegalStateException("Cannot have a complex location "
133                        + "which is located between a pair of compounds");
134            }
135    
136            if(isBetweenCompounds() && (st + 1) != e) {
137                throw new IllegalStateException(
138                        String.format("Start (%d) is not next to end (%d)", st, e));
139            }
140    
141        }
142    
143        
144        public Point getEnd() {
145            return end;
146        }
147    
148        
149        public Point getStart() {
150            return start;
151        }
152    
153        
154        public int getLength() {
155            return (getEnd().getPosition() - getStart().getPosition()) + 1;
156        }
157    
158        
159        public Strand getStrand() {
160            return strand;
161        }
162    
163        
164        public List<Location> getSubLocations() {
165            if(subLocations == null) {
166                return Collections.emptyList();
167            }
168            return subLocations;
169        }
170    
171        
172        public boolean isComplex() {
173            return getSubLocations().size() > 0;
174        }
175    
176        
177        public AccessionID getAccession() {
178            return accession;
179        }
180    
181        /**
182         * Iterates through all known sub-locations for this location but does
183         * not descend
184         */
185        
186        public Iterator<Location> iterator() {
187            List<Location> list;
188            if(isComplex()) {
189                list = getSubLocations();
190            }
191            else {
192                list = new ArrayList<Location>();
193                list.add(this);
194            }
195            return list.iterator();
196        }
197    
198        /**
199         * Returns the normalised list of sub locations i.e. only those locations
200         * which do not have a sub location. Useful for when you need to get
201         * the exact elements of a location back for sub sequences.
202         */
203        
204        public List<Location> getRelevantSubLocations() {
205            return getAllSubLocations(this);
206        }
207    
208        /**
209         * Here to allow for recursion
210         */
211        private List<Location> getAllSubLocations(Location location) {
212            List<Location> flatSubLocations = new ArrayList<Location>();
213            for (Location l : location.getSubLocations()) {
214                if (l.isComplex()) {
215                    flatSubLocations.addAll(getAllSubLocations(l));
216                }
217                else {
218                    flatSubLocations.add(l);
219                }
220            }
221            return flatSubLocations;
222        }
223    
224        
225        @SuppressWarnings("EqualsWhichDoesntCheckParameterClass")
226        public boolean equals(Object obj) {
227            boolean equals = false;
228            if (classEqual(this, obj)) {
229                AbstractLocation l = (AbstractLocation) obj;
230                equals = (equal(getStart(), l.getStart())
231                        && equal(getEnd(), l.getEnd())
232                        && equal(getStrand(), l.getStrand())
233                        && equal(isCircular(), l.isCircular())
234                        && equal(isBetweenCompounds(), l.isBetweenCompounds())
235                        && equal(getSubLocations(), l.getSubLocations())
236                        && equal(getAccession(), l.getAccession()));
237            }
238            return equals;
239        }
240    
241        
242        public int hashCode() {
243            int r = Hashcoder.SEED;
244            r = Hashcoder.hash(r, getStart());
245            r = Hashcoder.hash(r, getEnd());
246            r = Hashcoder.hash(r, getStrand());
247            r = Hashcoder.hash(r, isCircular());
248            r = Hashcoder.hash(r, isBetweenCompounds());
249            r = Hashcoder.hash(r, getSubLocations());
250            r = Hashcoder.hash(r, getAccession());
251            return r;
252        }
253    
254        
255        public boolean isCircular() {
256            return circular;
257        }
258    
259        
260        public boolean isBetweenCompounds() {
261            return betweenCompounds;
262        }
263    
264        //TODO Support the accession based lookup system; maybe still require a different impl?
265    
266        /**
267         * If circular this will return the sequence represented by the sub
268         * locations joined. If not circular then we get the Sequence for the
269         * outer-bounds defined by this location.
270         */
271        
272        public <C extends Compound> Sequence<C> getSubSequence(Sequence<C> sequence) {
273            if(isCircular()) {
274                List<Sequence<C>> sequences = new ArrayList<Sequence<C>>();
275                for(Location l: this) {
276                    sequences.add(l.getSubSequence(sequence));
277                }
278                return new JoiningSequenceReader<C>(sequence.getCompoundSet(), sequences);
279            }
280            return reverseSequence(sequence.getSubSequence(
281                    getStart().getPosition(), getEnd().getPosition()));
282        }
283    
284        /**
285         * 
286         */
287        
288        public <C extends Compound> Sequence<C> getRelevantSubSequence(Sequence<C> sequence) {
289            List<Sequence<C>> sequences = new ArrayList<Sequence<C>>();
290            for(Location l: getRelevantSubLocations()) {
291                sequences.add(l.getSubSequence(sequence));
292            }
293            return new JoiningSequenceReader<C>(sequence.getCompoundSet(), sequences);
294        }
295    
296        /**
297         * Reverses and (if possible) complements the Sequence so as to represent
298         * the reverse strand (if one exists). Also does checking to see if the
299         * location we are on is on the reverse strand or not.
300         */
301        @SuppressWarnings("unchecked")
302        protected <C extends Compound> Sequence<C> reverseSequence(Sequence<C> sequence) {
303            if(getStrand() != Strand.NEGATIVE) {
304                return sequence;
305            }
306    
307            Sequence<C> reversed = new ReversedSequenceView<C>(sequence);
308            // "safe" operation as we have tried to check this
309            if(canComplement(sequence)) {
310                Sequence<ComplementCompound> casted = (Sequence<ComplementCompound>) reversed;
311                ComplementSequenceView<ComplementCompound> complement =
312                        new ComplementSequenceView<ComplementCompound>(casted);
313                return (Sequence<C>)complement;
314            }
315            return reversed;
316        }
317    
318        /**
319         * Uses the Sequence's CompoundSet to decide if a compound can
320         * be assgined to ComplementCompound meaning it can complement
321         */
322        protected <C extends Compound> boolean canComplement(Sequence<C> sequence) {
323            CompoundSet<C> compoundSet = sequence.getCompoundSet();
324            Compound c = compoundSet.getAllCompounds().iterator().next();
325            return ComplementCompound.class.isAssignableFrom(c.getClass());
326        }
327    
328        
329        public String toString() {
330            String circ = (isCircular()) ? " - circular" : "";
331            String between = (isBetweenCompounds()) ? "^" : "..";
332            return format("%d%s%d(%s%s)", getStart().getPosition(), between, getEnd().getPosition(),
333                    getStrand().getStringRepresentation(), circ);
334        }
335    
336        protected void setCircular(boolean circular) {
337            this.circular = circular;
338        }
339    
340        protected void setEnd(Point end) {
341            this.end = end;
342        }
343    
344        protected void setStart(Point start) {
345            this.start = start;
346        }
347    
348        protected void setStrand(Strand strand) {
349            this.strand = strand;
350        }
351    
352        protected void setBetweenCompounds(boolean betweenCompounds) {
353            this.betweenCompounds = betweenCompounds;
354        }
355    
356        protected void setSubLocations(List<Location> subLocations) {
357            this.subLocations = subLocations;
358        }
359    
360        protected void setAccession(AccessionID accession) {
361            this.accession = accession;
362        }
363    }