001    /*
002     *                    BioJava development code
003     *
004     * This code may be freely distributed and modified under the
005     * terms of the GNU Lesser General Public Licence.  This should
006     * be distributed with the code.  If you do not have a copy,
007     * see:
008     *
009     *      http://www.gnu.org/copyleft/lesser.html
010     *
011     * Copyright for this code is held jointly by the individual
012     * authors.  These should be listed in @author doc comments.
013     *
014     * For more information on the BioJava project and its aims,
015     * or to join the biojava-l mailing list, visit the home page
016     * at:
017     *
018     *      http://www.biojava.org/
019     *
020     * Created on 01-21-2010
021     */
022    package org.biojava3.core.sequence.location;
023    
024    import java.util.ArrayList;
025    import java.util.HashSet;
026    import java.util.List;
027    import java.util.Set;
028    import org.biojava3.core.exceptions.ParserException;
029    import org.biojava3.core.sequence.AccessionID;
030    import org.biojava3.core.sequence.Strand;
031    import org.biojava3.core.sequence.location.template.Location;
032    import org.biojava3.core.sequence.location.template.Point;
033    
034    /**
035     * Helper methods for use with the Location classes. Taking its
036     * inspiration from the RichSequence.Tools class from the old BioJava
037     */
038    public class LocationHelper {
039    
040        /**
041         * Used as a thin wrapper to the {@link #location(java.util.List, java.lang.String) }
042         * method to bring the given location list together as a join (the default
043         * type)
044         */
045        public static Location location(List<Location> subLocations) {
046            return location(subLocations, "join");
047        }
048    
049        /**
050         * Builds a location from a List of locations; this can be circular or
051         * linear joins. The code expects that these locations are in
052         * a sensible format.
053         *
054         * @param subLocations The list of locations to use to build the location.
055         * If given a list of size 1 we will return that location.
056         * @param type The type of join for this location; defaults to join
057         * @return
058         */
059        public static Location location(List<Location> subLocations, String type) {
060            if (subLocations.size() == 1) {
061                return subLocations.get(0);
062            }
063    
064            boolean circular = detectCicular(subLocations);
065            Strand strand = detectStrand(subLocations);
066            Point start = detectStart(subLocations);
067            Point end = detectEnd(subLocations, circular);
068            Location l;
069            if ("join".equals(type)) {
070                l = new SimpleLocation(start, end, strand, circular, subLocations);
071            }
072            else if ("order".equals(type)) {
073                l = new InsdcLocations.OrderLocation(start, end, strand, circular, subLocations);
074            }
075            else if ("one-of".equals(type)) {
076                l = new InsdcLocations.OneOfLocation(subLocations);
077            }
078            else if ("group".equals(type)) {
079                l = new InsdcLocations.GroupLocation(start, end, strand, circular, subLocations);
080            }
081            else if ("bond".equals(type)) {
082                l = new InsdcLocations.BondLocation(subLocations);
083            }
084            else {
085                throw new ParserException("Unknown join type " + type);
086            }
087    
088            return l;
089        }
090    
091        /**
092         * Returns a location object which unlike the location constructors
093         * allows you to input reverse coordinates and will convert
094         * these into the right location on the positive strand.
095         */
096        public static Location location(int start, int end, Strand strand, int length) {
097            int min = Math.min(start, end);
098            //if this is true then we have a coord on the +ve strand even though Strand could be negative
099            boolean isReverse = (min != start);
100            if (isReverse) {
101                return new SimpleLocation(
102                        new SimplePoint(start).reverse(length),
103                        new SimplePoint(end).reverse(length),
104                        strand);
105            }
106            return new SimpleLocation(start, end, strand);
107        }
108    
109        /**
110         * Converts a location which defines the outer bounds of a circular
111         * location and splits it into the required portions. Unlike any
112         * other location builder this allows you to express your input
113         * location on the reverse strand
114         *
115         * @param location The location which currently expresses the outer
116         * bounds of a circular location.
117         * @param length The length of the circular genomic unit
118         * @return The circular location; can optionally return a normal non
119         * circular location if the one you give is within the bounds of
120         * the length
121         */
122        public static Location circularLocation(int start, int end, Strand strand, int length) {
123    
124            int min = Math.min(start, end);
125            int max = Math.max(start, end);
126            //Tells us we're dealing with something that's not _right_
127            boolean isReverse = (min != start);
128    
129            if (min > length) {
130                throw new IllegalArgumentException("Cannot process a "
131                        + "location whose lowest coordinate is less than "
132                        + "the given length " + length);
133            }
134    
135            //If max positon was less than length the return a normal location
136            if (max <= length) {
137                return location(start, end, strand, length);
138            }
139    
140            //Fine for forward coords (i..e start < end)
141            int modStart = modulateCircularIndex(start, length);
142            int modEnd = modulateCircularIndex(end, length);
143            int numberOfPasses = completeCircularPasses(Math.max(start, end), length);
144    
145            if (isReverse) {
146                int reversedModStart = new SimplePoint(modStart).reverse(length).getPosition();
147                int reversedModEnd = new SimplePoint(modEnd).reverse(length).getPosition();
148                modStart = reversedModStart;
149                modEnd = reversedModEnd;
150                start = reversedModStart;
151                //+1 to number of passes to skip the run encoded by the start
152                end = (length * (numberOfPasses + 1)) + modEnd;
153            }
154    
155            List<Location> locations = new ArrayList<Location>();
156            locations.add(new SimpleLocation(modStart, length, strand));
157            for (int i = 0; i < numberOfPasses; i++) {
158                locations.add(new SimpleLocation(1, length, strand));
159            }
160            locations.add(new SimpleLocation(1, modEnd, strand));
161            return new SimpleLocation(new SimplePoint(start),
162                    new SimplePoint(end), strand, true, false, locations);
163        }
164    
165        private static interface LocationPredicate {
166            boolean accept(Location previous, Location current);
167        }
168    
169        /**
170         * Scans through a list of locations to find the Location with the
171         * lowest start
172         */
173        public static Location getMin(List<Location> locations) {
174            return scanLocations(locations, new LocationPredicate() {
175                @Override
176                public boolean accept(Location previous, Location current) {
177                    int res = current.getStart().compareTo(previous.getStart());
178                    return res < 0;
179                }
180            });
181        }
182    
183        /**
184         * Scans through a list of locations to find the Location with the
185         * highest end
186         */
187        public static Location getMax(List<Location> locations) {
188            return scanLocations(locations, new LocationPredicate() {
189                @Override
190                public boolean accept(Location previous, Location current) {
191                    int res = current.getEnd().compareTo(previous.getEnd());
192                    return res > 0;
193                }
194            });
195        }
196    
197        /**
198         * Used for scanning through a list of locations; assumes the
199         * locations given will have at least one value otherwise
200         * we will get a null pointer
201         */
202        private static Location scanLocations(List<Location> locations, LocationPredicate predicate) {
203            Location location = null;
204            for (Location l : locations) {
205                if (location == null) {
206                    location = l;
207                }
208                else {
209                    if (predicate.accept(location, l)) {
210                        location = l;
211                    }
212                }
213            }
214            return location;
215        }
216    
217        /**
218         * Takes a point on a circular location and moves it left until it falls
219         * at the earliest possible point that represents the same base.
220         *
221         * @param index Index of the position to work with
222         * @param seqLength Length of the Sequence
223         * @return The shifted point
224         */
225        public static int modulateCircularIndex(int index, int seqLength) {
226            // Dummy case
227            if (seqLength == 0) {
228                return index;
229            }
230            // Modulate
231            while (index > seqLength) {
232                index -= seqLength;
233            }
234            return index;
235        }
236    
237        /**
238         * Works in a similar way to modulateCircularLocation but returns
239         * the number of complete passes over a Sequence length a circular
240         * location makes i.e. if we have a sequence of length 10 and the
241         * location 3..52 we make 4 complete passes through the genome to
242         * go from position 3 to position 52.
243         */
244        public static int completeCircularPasses(int index, int seqLength) {
245            int count = 0;
246            while (index > seqLength) {
247                count++;
248                index -= seqLength;
249            }
250            return count - 1;
251        }
252    
253        /**
254         * Loops through the given list of locations and returns true if it looks
255         * like they represent a circular location. Detection cannot happen if
256         * we do not have consistent accessions
257         */
258        public static boolean detectCicular(List<Location> subLocations) {
259            boolean isCircular = false;
260            if(! consistentAccessions(subLocations))
261                return isCircular;
262    
263            int lastMax = 0;
264            for (Location sub : subLocations) {
265                if (sub.getEnd().getPosition() > lastMax) {
266                    lastMax = sub.getEnd().getPosition();
267                }
268                else {
269                    isCircular = true;
270                    break;
271                }
272            }
273            return isCircular;
274        }
275    
276        /**
277         * Scans a list of locations and returns true if all the given locations
278         * are linked to the same sequence. A list of null accessioned locations
279         * is the same as a list where the accession is the same
280         *
281         * @param subLocations The locations to scan
282         * @return Returns a boolean indicating if this is consistently accessioned
283         */
284        public static boolean consistentAccessions(List<Location> subLocations) {
285            Set<AccessionID> set = new HashSet<AccessionID>();
286            for(Location sub: subLocations) {
287                set.add(sub.getAccession());
288            }
289            return set.size() == 1;
290        }
291    
292        /**
293         * Loops through the given list of locations and returns the consensus
294         * Strand class. If the class switches then we will return an undefined
295         * strand
296         */
297        public static Strand detectStrand(List<Location> subLocations) {
298            Strand strand = subLocations.get(0).getStrand();
299            for (Location sub : subLocations) {
300                if (strand != sub.getStrand()) {
301                    strand = Strand.UNDEFINED;
302                    break;
303                }
304            }
305            return strand;
306        }
307    
308        /**
309         * Assumes that the first element is the start & clones it
310         */
311        public static Point detectStart(List<Location> subLocations) {
312            return subLocations.get(0).getStart().clonePoint();
313        }
314    
315        /**
316         * This will attempt to find what the last point is and returns that
317         * position. If the location is circular this will return the total length
318         * of the location and does not mean the maximum point on the Sequence
319         * we may find the locations on
320         */
321        public static Point detectEnd(List<Location> subLocations, boolean isCircular) {
322            int end = 0;
323            Point lastPoint = null;
324            if(isCircular) {
325                for (Location sub : subLocations) {
326                    lastPoint = sub.getEnd();
327                    end += lastPoint.getPosition();
328                }
329            }
330            else {
331                lastPoint = subLocations.get(subLocations.size()-1).getEnd();
332                end = lastPoint.getPosition();
333            }
334            return new SimplePoint(end, lastPoint.isUnknown(), lastPoint.isUncertain());
335        }
336    }