001 /*
002 * BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence. This should
006 * be distributed with the code. If you do not have a copy,
007 * see:
008 *
009 * http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors. These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 * http://www.biojava.org/
019 *
020 * Created on 01-21-2010
021 */
022 package org.biojava3.core.sequence.location.template;
023
024 import java.util.ArrayList;
025 import java.util.List;
026
027 import org.biojava3.core.sequence.Strand;
028 import org.biojava3.core.sequence.location.SimpleLocation;
029 import org.biojava3.core.sequence.location.SimplePoint;
030 import org.biojava3.core.sequence.template.Accessioned;
031 import org.biojava3.core.sequence.template.Compound;
032 import org.biojava3.core.sequence.template.Sequence;
033
034 /**
035 * Sets of integers used to represent the location of features on sequence. A
036 * location can be a single set of bounds or composed of multiple
037 * sub-locations. Each sub-location is a Location and therefore subject to the
038 * same rules.
039 *
040 * @author ayates
041 */
042 public interface Location extends Iterable<Location>, Accessioned {
043
044 /**
045 * Basic location which is set to the minimum and maximum bounds of
046 * {@link Integer}. {@link Strand} is set to {@link Strand#UNDEFINED}.
047 */
048 public static final Location EMPTY =
049 new SimpleLocation(Integer.MIN_VALUE, Integer.MAX_VALUE, Strand.UNDEFINED);
050
051 /**
052 * Start of the location
053 */
054 Point getStart();
055
056 /**
057 * End of the location
058 */
059 Point getEnd();
060
061 /**
062 * Returns the length of the outer bounds of this location
063 */
064 int getLength();
065
066 /**
067 * Strand which the location is located on
068 */
069 Strand getStrand();
070
071 /**
072 * Gives access to the sub locations for this location. However this does
073 * not return sub-locations of sub-locations. For that functionality use
074 * {@link #getAllSubLocations()}.
075 *
076 * @return A list of a single level of sub-locations
077 */
078 List<Location> getSubLocations();
079
080 /**
081 * An extension to {@link #getSubLocations()} which returns sub-locations
082 * of sub-locations; this will continue until it runs out of those locations.
083 *
084 * @return List of all sub locations including sub-locations of sub locations
085 */
086 List<Location> getRelevantSubLocations();
087
088 /**
089 * Returns true if the location is considered to be complex meaning
090 * the location is actually composed of sub-locations.
091 */
092 boolean isComplex();
093
094 /**
095 * Indicates if this location is circular.
096 */
097 boolean isCircular();
098
099 /**
100 * Returns true if the position is meant to represent a point between
101 * two points such as 78^79. Only valid if start and stop are next to
102 * each other.
103 */
104 boolean isBetweenCompounds();
105
106 /**
107 * Will return a SequenceReader object which represents the outer bounds
108 * of this Location
109 *
110 * @param <C> The type of compound to use
111 * @param sequence The sequence object to work with
112 * @return The sequence
113 */
114 <C extends Compound> Sequence<C> getSubSequence(Sequence<C> sequence);
115
116 /**
117 * Will return a SequenceReader object which offers a view of all resolved
118 * locations i.e. those locations which are not complex and define the
119 * true Sequence represented
120 *
121 * @param <C> The type of compound to use
122 * @param sequence The sequence object to work with
123 * @return The full assembled sequence
124 */
125 <C extends Compound> Sequence<C> getRelevantSubSequence(Sequence<C> sequence);
126
127 /**
128 * Helper methods for use with the Location classes. Taking its
129 * inspiration from the RichSequence.Tools class from the old BioJava
130 */
131 public static class Tools {
132
133 /**
134 * Used for building a location from a series of sub-locations
135 */
136 public static Location location(List<Location> locations, Integer sequenceLength, String type) {
137 type = (type == null) ? "join" : type;
138 sequenceLength = (sequenceLength == null) ? -1 : sequenceLength;
139
140
141
142 return null;
143 }
144
145 /**
146 * Returns a location object which unlike the location constructors
147 * allows you to input reverse coordinates and will convert
148 * these into the right location on the positive strand.
149 */
150 public static Location location(int start, int end, Strand strand, int length) {
151 int min = Math.min(start, end);
152 //if this is true then we have a coord on the +ve strand even though Strand could be negative
153 boolean isReverse = (min != start);
154 if (isReverse) {
155 return new SimpleLocation(
156 new SimplePoint(start).reverse(length),
157 new SimplePoint(end).reverse(length),
158 strand);
159 }
160 return new SimpleLocation(start, end, strand);
161 }
162
163 /**
164 * Converts a location which defines the outer bounds of a circular
165 * location and splits it into the required portions. Unlike any
166 * other location builder this allows you to express your input
167 * location on the reverse strand
168 *
169 * @param location The location which currently expresses the outer
170 * bounds of a circular location.
171 * @param length The length of the circular genomic unit
172 * @return The circular location; can optionally return a normal non
173 * circular location if the one you give is within the bounds of
174 * the length
175 */
176 public static Location circularLocation(int start, int end, Strand strand, int length) {
177
178 int min = Math.min(start, end);
179 int max = Math.max(start, end);
180 //Tells us we're dealing with something that's not _right_
181 boolean isReverse = (min != start);
182
183 if (min > length) {
184 throw new IllegalArgumentException("Cannot process a "
185 + "location whose lowest coordinate is less than "
186 + "the given length " + length);
187 }
188
189 //If max positon was less than length the return a normal location
190 if (max <= length) {
191 return location(start, end, strand, length);
192 }
193
194 //Fine for forward coords (i..e start < end)
195 int modStart = modulateCircularIndex(start, length);
196 int modEnd = modulateCircularIndex(end, length);
197 int numberOfPasses = completeCircularPasses(Math.max(start, end), length);
198
199 if (isReverse) {
200 int reversedModStart = new SimplePoint(modStart).reverse(length).getPosition();
201 int reversedModEnd = new SimplePoint(modEnd).reverse(length).getPosition();
202 modStart = reversedModStart;
203 modEnd = reversedModEnd;
204 start = reversedModStart;
205 //+1 to number of passes to skip the run encoded by the start
206 end = (length * (numberOfPasses + 1)) + modEnd;
207 }
208
209 List<Location> locations = new ArrayList<Location>();
210 locations.add(new SimpleLocation(modStart, length, strand));
211 for (int i = 0; i < numberOfPasses; i++) {
212 locations.add(new SimpleLocation(1, length, strand));
213 }
214 locations.add(new SimpleLocation(1, modEnd, strand));
215 return new SimpleLocation(new SimplePoint(start),
216 new SimplePoint(end), strand, true, false, locations);
217 }
218
219 private static interface LocationPredicate {
220
221 boolean accept(Location previous, Location current);
222 }
223
224 /**
225 * Scans through a list of locations to find the Location with the
226 * lowest start
227 */
228 public static Location getMin(List<Location> locations) {
229 return scanLocations(locations, new LocationPredicate() {
230
231 public boolean accept(Location previous, Location current) {
232 int res = current.getStart().compareTo(previous.getStart());
233 return res < 0;
234 }
235 });
236 }
237
238 /**
239 * Scans through a list of locations to find the Location with the
240 * highest end
241 */
242 public static Location getMax(List<Location> locations) {
243 return scanLocations(locations, new LocationPredicate() {
244
245 public boolean accept(Location previous, Location current) {
246 int res = current.getEnd().compareTo(previous.getEnd());
247 return res > 0;
248 }
249 });
250 }
251
252 /**
253 * Used for scanning through a list of locations; assumes the
254 * locations given will have at least one value otherwise
255 * we will get a null pointer
256 */
257 private static Location scanLocations(List<Location> locations, LocationPredicate predicate) {
258 Location location = null;
259 for (Location l : locations) {
260 if (location == null) {
261 location = l;
262 } else {
263 if (predicate.accept(location, l)) {
264 location = l;
265 }
266 }
267 }
268 return location;
269 }
270
271 /**
272 * Takes a point on a circular location and moves it left until it falls
273 * at the earliest possible point that represents the same base.
274 *
275 * @param index Index of the position to work with
276 * @param seqLength Length of the Sequence
277 * @return The shifted point
278 */
279 public static int modulateCircularIndex(int index, int seqLength) {
280 // Dummy case
281 if (seqLength == 0) {
282 return index;
283 }
284 // Modulate
285 while (index > seqLength) {
286 index -= seqLength;
287 }
288 return index;
289 }
290
291 /**
292 * Works in a similar way to modulateCircularLocation but returns
293 * the number of complete passes over a Sequence length a circular
294 * location makes i.e. if we have a sequence of length 10 and the
295 * location 3..52 we make 4 complete passes through the genome to
296 * go from position 3 to position 52.
297 */
298 public static int completeCircularPasses(int index, int seqLength) {
299 int count = 0;
300 while (index > seqLength) {
301 count++;
302 index -= seqLength;
303 }
304 return count - 1;
305 }
306 }
307 }