001 /*
002 * BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence. This should
006 * be distributed with the code. If you do not have a copy,
007 * see:
008 *
009 * http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors. These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 * http://www.biojava.org/
019 *
020 * Created on 01-21-2010
021 */
022 package org.biojava3.core.sequence.location;
023
024 import java.util.ArrayList;
025 import java.util.HashSet;
026 import java.util.List;
027 import java.util.Set;
028 import org.biojava3.core.exceptions.ParserException;
029 import org.biojava3.core.sequence.AccessionID;
030 import org.biojava3.core.sequence.Strand;
031 import org.biojava3.core.sequence.location.template.Location;
032 import org.biojava3.core.sequence.location.template.Point;
033
034 /**
035 * Helper methods for use with the Location classes. Taking its
036 * inspiration from the RichSequence.Tools class from the old BioJava
037 */
038 public class LocationHelper {
039
040 /**
041 * Used as a thin wrapper to the {@link #location(java.util.List, java.lang.String) }
042 * method to bring the given location list together as a join (the default
043 * type)
044 */
045 public static Location location(List<Location> subLocations) {
046 return location(subLocations, "join");
047 }
048
049 /**
050 * Builds a location from a List of locations; this can be circular or
051 * linear joins. The code expects that these locations are in
052 * a sensible format.
053 *
054 * @param subLocations The list of locations to use to build the location.
055 * If given a list of size 1 we will return that location.
056 * @param type The type of join for this location; defaults to join
057 * @return
058 */
059 public static Location location(List<Location> subLocations, String type) {
060 if (subLocations.size() == 1) {
061 return subLocations.get(0);
062 }
063
064 boolean circular = detectCicular(subLocations);
065 Strand strand = detectStrand(subLocations);
066 Point start = detectStart(subLocations);
067 Point end = detectEnd(subLocations, circular);
068 Location l;
069 if ("join".equals(type)) {
070 l = new SimpleLocation(start, end, strand, circular, subLocations);
071 }
072 else if ("order".equals(type)) {
073 l = new InsdcLocations.OrderLocation(start, end, strand, circular, subLocations);
074 }
075 else if ("one-of".equals(type)) {
076 l = new InsdcLocations.OneOfLocation(subLocations);
077 }
078 else if ("group".equals(type)) {
079 l = new InsdcLocations.GroupLocation(start, end, strand, circular, subLocations);
080 }
081 else if ("bond".equals(type)) {
082 l = new InsdcLocations.BondLocation(subLocations);
083 }
084 else {
085 throw new ParserException("Unknown join type " + type);
086 }
087
088 return l;
089 }
090
091 /**
092 * Returns a location object which unlike the location constructors
093 * allows you to input reverse coordinates and will convert
094 * these into the right location on the positive strand.
095 */
096 public static Location location(int start, int end, Strand strand, int length) {
097 int min = Math.min(start, end);
098 //if this is true then we have a coord on the +ve strand even though Strand could be negative
099 boolean isReverse = (min != start);
100 if (isReverse) {
101 return new SimpleLocation(
102 new SimplePoint(start).reverse(length),
103 new SimplePoint(end).reverse(length),
104 strand);
105 }
106 return new SimpleLocation(start, end, strand);
107 }
108
109 /**
110 * Converts a location which defines the outer bounds of a circular
111 * location and splits it into the required portions. Unlike any
112 * other location builder this allows you to express your input
113 * location on the reverse strand
114 *
115 * @param location The location which currently expresses the outer
116 * bounds of a circular location.
117 * @param length The length of the circular genomic unit
118 * @return The circular location; can optionally return a normal non
119 * circular location if the one you give is within the bounds of
120 * the length
121 */
122 public static Location circularLocation(int start, int end, Strand strand, int length) {
123
124 int min = Math.min(start, end);
125 int max = Math.max(start, end);
126 //Tells us we're dealing with something that's not _right_
127 boolean isReverse = (min != start);
128
129 if (min > length) {
130 throw new IllegalArgumentException("Cannot process a "
131 + "location whose lowest coordinate is less than "
132 + "the given length " + length);
133 }
134
135 //If max positon was less than length the return a normal location
136 if (max <= length) {
137 return location(start, end, strand, length);
138 }
139
140 //Fine for forward coords (i..e start < end)
141 int modStart = modulateCircularIndex(start, length);
142 int modEnd = modulateCircularIndex(end, length);
143 int numberOfPasses = completeCircularPasses(Math.max(start, end), length);
144
145 if (isReverse) {
146 int reversedModStart = new SimplePoint(modStart).reverse(length).getPosition();
147 int reversedModEnd = new SimplePoint(modEnd).reverse(length).getPosition();
148 modStart = reversedModStart;
149 modEnd = reversedModEnd;
150 start = reversedModStart;
151 //+1 to number of passes to skip the run encoded by the start
152 end = (length * (numberOfPasses + 1)) + modEnd;
153 }
154
155 List<Location> locations = new ArrayList<Location>();
156 locations.add(new SimpleLocation(modStart, length, strand));
157 for (int i = 0; i < numberOfPasses; i++) {
158 locations.add(new SimpleLocation(1, length, strand));
159 }
160 locations.add(new SimpleLocation(1, modEnd, strand));
161 return new SimpleLocation(new SimplePoint(start),
162 new SimplePoint(end), strand, true, false, locations);
163 }
164
165 private static interface LocationPredicate {
166 boolean accept(Location previous, Location current);
167 }
168
169 /**
170 * Scans through a list of locations to find the Location with the
171 * lowest start
172 */
173 public static Location getMin(List<Location> locations) {
174 return scanLocations(locations, new LocationPredicate() {
175 @Override
176 public boolean accept(Location previous, Location current) {
177 int res = current.getStart().compareTo(previous.getStart());
178 return res < 0;
179 }
180 });
181 }
182
183 /**
184 * Scans through a list of locations to find the Location with the
185 * highest end
186 */
187 public static Location getMax(List<Location> locations) {
188 return scanLocations(locations, new LocationPredicate() {
189 @Override
190 public boolean accept(Location previous, Location current) {
191 int res = current.getEnd().compareTo(previous.getEnd());
192 return res > 0;
193 }
194 });
195 }
196
197 /**
198 * Used for scanning through a list of locations; assumes the
199 * locations given will have at least one value otherwise
200 * we will get a null pointer
201 */
202 private static Location scanLocations(List<Location> locations, LocationPredicate predicate) {
203 Location location = null;
204 for (Location l : locations) {
205 if (location == null) {
206 location = l;
207 }
208 else {
209 if (predicate.accept(location, l)) {
210 location = l;
211 }
212 }
213 }
214 return location;
215 }
216
217 /**
218 * Takes a point on a circular location and moves it left until it falls
219 * at the earliest possible point that represents the same base.
220 *
221 * @param index Index of the position to work with
222 * @param seqLength Length of the Sequence
223 * @return The shifted point
224 */
225 public static int modulateCircularIndex(int index, int seqLength) {
226 // Dummy case
227 if (seqLength == 0) {
228 return index;
229 }
230 // Modulate
231 while (index > seqLength) {
232 index -= seqLength;
233 }
234 return index;
235 }
236
237 /**
238 * Works in a similar way to modulateCircularLocation but returns
239 * the number of complete passes over a Sequence length a circular
240 * location makes i.e. if we have a sequence of length 10 and the
241 * location 3..52 we make 4 complete passes through the genome to
242 * go from position 3 to position 52.
243 */
244 public static int completeCircularPasses(int index, int seqLength) {
245 int count = 0;
246 while (index > seqLength) {
247 count++;
248 index -= seqLength;
249 }
250 return count - 1;
251 }
252
253 /**
254 * Loops through the given list of locations and returns true if it looks
255 * like they represent a circular location. Detection cannot happen if
256 * we do not have consistent accessions
257 */
258 public static boolean detectCicular(List<Location> subLocations) {
259 boolean isCircular = false;
260 if(! consistentAccessions(subLocations))
261 return isCircular;
262
263 int lastMax = 0;
264 for (Location sub : subLocations) {
265 if (sub.getEnd().getPosition() > lastMax) {
266 lastMax = sub.getEnd().getPosition();
267 }
268 else {
269 isCircular = true;
270 break;
271 }
272 }
273 return isCircular;
274 }
275
276 /**
277 * Scans a list of locations and returns true if all the given locations
278 * are linked to the same sequence. A list of null accessioned locations
279 * is the same as a list where the accession is the same
280 *
281 * @param subLocations The locations to scan
282 * @return Returns a boolean indicating if this is consistently accessioned
283 */
284 public static boolean consistentAccessions(List<Location> subLocations) {
285 Set<AccessionID> set = new HashSet<AccessionID>();
286 for(Location sub: subLocations) {
287 set.add(sub.getAccession());
288 }
289 return set.size() == 1;
290 }
291
292 /**
293 * Loops through the given list of locations and returns the consensus
294 * Strand class. If the class switches then we will return an undefined
295 * strand
296 */
297 public static Strand detectStrand(List<Location> subLocations) {
298 Strand strand = subLocations.get(0).getStrand();
299 for (Location sub : subLocations) {
300 if (strand != sub.getStrand()) {
301 strand = Strand.UNDEFINED;
302 break;
303 }
304 }
305 return strand;
306 }
307
308 /**
309 * Assumes that the first element is the start & clones it
310 */
311 public static Point detectStart(List<Location> subLocations) {
312 return subLocations.get(0).getStart().clonePoint();
313 }
314
315 /**
316 * This will attempt to find what the last point is and returns that
317 * position. If the location is circular this will return the total length
318 * of the location and does not mean the maximum point on the Sequence
319 * we may find the locations on
320 */
321 public static Point detectEnd(List<Location> subLocations, boolean isCircular) {
322 int end = 0;
323 Point lastPoint = null;
324 if(isCircular) {
325 for (Location sub : subLocations) {
326 lastPoint = sub.getEnd();
327 end += lastPoint.getPosition();
328 }
329 }
330 else {
331 lastPoint = subLocations.get(subLocations.size()-1).getEnd();
332 end = lastPoint.getPosition();
333 }
334 return new SimplePoint(end, lastPoint.isUnknown(), lastPoint.isUncertain());
335 }
336 }