001    /*
002     *                    BioJava development code
003     *
004     * This code may be freely distributed and modified under the
005     * terms of the GNU Lesser General Public Licence.  This should
006     * be distributed with the code.  If you do not have a copy,
007     * see:
008     *
009     *      http://www.gnu.org/copyleft/lesser.html
010     *
011     * Copyright for this code is held jointly by the individual
012     * authors.  These should be listed in @author doc comments.
013     *
014     * For more information on the BioJava project and its aims,
015     * or to join the biojava-l mailing list, visit the home page
016     * at:
017     *
018     *      http://www.biojava.org/
019     *
020     * Created on 01-21-2010
021     */
022    package org.biojava3.core.sequence.location.template;
023    
024    import java.util.ArrayList;
025    import java.util.List;
026    
027    import org.biojava3.core.sequence.Strand;
028    import org.biojava3.core.sequence.location.SimpleLocation;
029    import org.biojava3.core.sequence.location.SimplePoint;
030    import org.biojava3.core.sequence.template.Accessioned;
031    import org.biojava3.core.sequence.template.Compound;
032    import org.biojava3.core.sequence.template.Sequence;
033    
034    /**
035     * Sets of integers used to represent the location of features on sequence. A
036     * location can be a single set of bounds or composed of multiple
037     * sub-locations. Each sub-location is a Location and therefore subject to the
038     * same rules.
039     * 
040     * @author ayates
041     */
042    public interface Location extends Iterable<Location>, Accessioned {
043    
044        /**
045         * Basic location which is set to the minimum and maximum bounds of
046         * {@link Integer}. {@link Strand} is set to {@link Strand#UNDEFINED}.
047         */
048        public static final Location EMPTY =
049                new SimpleLocation(Integer.MIN_VALUE, Integer.MAX_VALUE, Strand.UNDEFINED);
050    
051        /**
052         * Start of the location
053         */
054        Point getStart();
055    
056        /**
057         * End of the location
058         */
059        Point getEnd();
060    
061        /**
062         * Returns the length of the outer bounds of this location
063         */
064        int getLength();
065    
066        /**
067         * Strand which the location is located on
068         */
069        Strand getStrand();
070    
071        /**
072         * Gives access to the sub locations for this location. However this does
073         * not return sub-locations of sub-locations. For that functionality use
074         * {@link #getAllSubLocations()}.
075         *
076         * @return A list of a single level of sub-locations
077         */
078        List<Location> getSubLocations();
079    
080        /**
081         * An extension to {@link #getSubLocations()} which returns sub-locations
082         * of sub-locations; this will continue until it runs out of those locations.
083         *
084         * @return List of all sub locations including sub-locations of sub locations
085         */
086        List<Location> getRelevantSubLocations();
087    
088        /**
089         * Returns true if the location is considered to be complex meaning
090         * the location is actually composed of sub-locations.
091         */
092        boolean isComplex();
093    
094        /**
095         * Indicates if this location is circular.
096         */
097        boolean isCircular();
098    
099        /**
100         * Returns true if the position is meant to represent a point between
101         * two points such as 78^79. Only valid if start and stop are next to
102         * each other.
103         */
104        boolean isBetweenCompounds();
105    
106        /**
107         * Will return a SequenceReader object which represents the outer bounds
108         * of this Location
109         *
110         * @param <C> The type of compound to use
111         * @param sequence The sequence object to work with
112         * @return The sequence
113         */
114        <C extends Compound> Sequence<C> getSubSequence(Sequence<C> sequence);
115    
116        /**
117         * Will return a SequenceReader object which offers a view of all resolved
118         * locations i.e. those locations which are not complex and define the
119         * true Sequence represented
120         *
121         * @param <C> The type of compound to use
122         * @param sequence The sequence object to work with
123         * @return The full assembled sequence
124         */
125        <C extends Compound> Sequence<C> getRelevantSubSequence(Sequence<C> sequence);
126    
127        /**
128         * Helper methods for use with the Location classes. Taking its
129         * inspiration from the RichSequence.Tools class from the old BioJava
130         */
131        public static class Tools {
132    
133            /**
134             * Used for building a location from a series of sub-locations
135             */
136            public static Location location(List<Location> locations, Integer sequenceLength, String type) {
137                type = (type == null) ? "join" : type;
138                sequenceLength = (sequenceLength == null) ? -1 : sequenceLength;
139    
140    
141    
142                return null;
143            }
144    
145            /**
146             * Returns a location object which unlike the location constructors
147             * allows you to input reverse coordinates and will convert
148             * these into the right location on the positive strand.
149             */
150            public static Location location(int start, int end, Strand strand, int length) {
151                int min = Math.min(start, end);
152                //if this is true then we have a coord on the +ve strand even though Strand could be negative
153                boolean isReverse = (min != start);
154                if (isReverse) {
155                    return new SimpleLocation(
156                            new SimplePoint(start).reverse(length),
157                            new SimplePoint(end).reverse(length),
158                            strand);
159                }
160                return new SimpleLocation(start, end, strand);
161            }
162    
163            /**
164             * Converts a location which defines the outer bounds of a circular
165             * location and splits it into the required portions. Unlike any
166             * other location builder this allows you to express your input
167             * location on the reverse strand
168             *
169             * @param location The location which currently expresses the outer
170             * bounds of a circular location.
171             * @param length The length of the circular genomic unit
172             * @return The circular location; can optionally return a normal non
173             * circular location if the one you give is within the bounds of
174             * the length
175             */
176            public static Location circularLocation(int start, int end, Strand strand, int length) {
177    
178                int min = Math.min(start, end);
179                int max = Math.max(start, end);
180                //Tells us we're dealing with something that's not _right_
181                boolean isReverse = (min != start);
182    
183                if (min > length) {
184                    throw new IllegalArgumentException("Cannot process a "
185                            + "location whose lowest coordinate is less than "
186                            + "the given length " + length);
187                }
188    
189                //If max positon was less than length the return a normal location
190                if (max <= length) {
191                    return location(start, end, strand, length);
192                }
193    
194                //Fine for forward coords (i..e start < end)
195                int modStart = modulateCircularIndex(start, length);
196                int modEnd = modulateCircularIndex(end, length);
197                int numberOfPasses = completeCircularPasses(Math.max(start, end), length);
198    
199                if (isReverse) {
200                    int reversedModStart = new SimplePoint(modStart).reverse(length).getPosition();
201                    int reversedModEnd = new SimplePoint(modEnd).reverse(length).getPosition();
202                    modStart = reversedModStart;
203                    modEnd = reversedModEnd;
204                    start = reversedModStart;
205                    //+1 to number of passes to skip the run encoded by the start
206                    end = (length * (numberOfPasses + 1)) + modEnd;
207                }
208    
209                List<Location> locations = new ArrayList<Location>();
210                locations.add(new SimpleLocation(modStart, length, strand));
211                for (int i = 0; i < numberOfPasses; i++) {
212                    locations.add(new SimpleLocation(1, length, strand));
213                }
214                locations.add(new SimpleLocation(1, modEnd, strand));
215                return new SimpleLocation(new SimplePoint(start),
216                        new SimplePoint(end), strand, true, false, locations);
217            }
218    
219            private static interface LocationPredicate {
220    
221                boolean accept(Location previous, Location current);
222            }
223    
224            /**
225             * Scans through a list of locations to find the Location with the
226             * lowest start
227             */
228            public static Location getMin(List<Location> locations) {
229                return scanLocations(locations, new LocationPredicate() {
230                    
231                    public boolean accept(Location previous, Location current) {
232                        int res = current.getStart().compareTo(previous.getStart());
233                        return res < 0;
234                    }
235                });
236            }
237    
238            /**
239             * Scans through a list of locations to find the Location with the
240             * highest end
241             */
242            public static Location getMax(List<Location> locations) {
243                return scanLocations(locations, new LocationPredicate() {
244                    
245                    public boolean accept(Location previous, Location current) {
246                        int res = current.getEnd().compareTo(previous.getEnd());
247                        return res > 0;
248                    }
249                });
250            }
251    
252            /**
253             * Used for scanning through a list of locations; assumes the
254             * locations given will have at least one value otherwise
255             * we will get a null pointer
256             */
257            private static Location scanLocations(List<Location> locations, LocationPredicate predicate) {
258                Location location = null;
259                for (Location l : locations) {
260                    if (location == null) {
261                        location = l;
262                    } else {
263                        if (predicate.accept(location, l)) {
264                            location = l;
265                        }
266                    }
267                }
268                return location;
269            }
270    
271            /**
272             * Takes a point on a circular location and moves it left until it falls
273             * at the earliest possible point that represents the same base.
274             *
275             * @param index Index of the position to work with
276             * @param seqLength Length of the Sequence
277             * @return The shifted point
278             */
279            public static int modulateCircularIndex(int index, int seqLength) {
280                // Dummy case
281                if (seqLength == 0) {
282                    return index;
283                }
284                // Modulate
285                while (index > seqLength) {
286                    index -= seqLength;
287                }
288                return index;
289            }
290    
291            /**
292             * Works in a similar way to modulateCircularLocation but returns
293             * the number of complete passes over a Sequence length a circular
294             * location makes i.e. if we have a sequence of length 10 and the
295             * location 3..52 we make 4 complete passes through the genome to
296             * go from position 3 to position 52.
297             */
298            public static int completeCircularPasses(int index, int seqLength) {
299                int count = 0;
300                while (index > seqLength) {
301                    count++;
302                    index -= seqLength;
303                }
304                return count - 1;
305            }
306        }
307    }