001 package org.biojava3.core.sequence.template;
002
003 import java.util.ArrayList;
004 import java.util.HashMap;
005 import java.util.HashSet;
006 import java.util.List;
007 import java.util.Map;
008 import java.util.Set;
009
010 import org.biojava3.core.sequence.compound.NucleotideCompound;
011
012 /**
013 *
014 * @author Andy Yates
015 * @param <C> Type of compound this set will contain but must extend
016 * NucleotideCompound
017 */
018 public abstract class AbstractNucleotideCompoundSet<C extends NucleotideCompound>
019 extends AbstractCompoundSet<C> {
020
021 protected void addNucleotideCompound(String base, String complement, String... equivalents) {
022
023 String[] upperEquivalents = new String[equivalents.length];
024 String[] lowerEquivalents = new String[equivalents.length];
025 for(int i=0; i<equivalents.length; i++) {
026 upperEquivalents[i] = equivalents[i].toUpperCase();
027 lowerEquivalents[i] = equivalents[i].toLowerCase();
028 }
029
030 C upper = newNucleotideCompound(base.toUpperCase(), complement.toUpperCase(), upperEquivalents);
031 C lower = newNucleotideCompound(base.toLowerCase(), complement.toLowerCase(), lowerEquivalents);
032
033 List<C> equivalentCompounds = new ArrayList<C>();
034
035 for(int i=0; i<equivalents.length; i++) {
036 equivalentCompounds.add(getCompoundForString(upperEquivalents[i]));
037 equivalentCompounds.add(getCompoundForString(lowerEquivalents[i]));
038 }
039
040 addCompound(upper, lower, equivalentCompounds);
041 }
042
043 protected abstract C newNucleotideCompound(String base, String complement, String... equivalents);
044
045 /**
046 * Loops through all known nucelotides and attempts to find which are
047 * equivalent to each other. Also takes into account lower casing
048 * nucleotides as well as upper-cased ones.
049 */
050 @SuppressWarnings("unchecked")
051 protected void calculateIndirectAmbiguities() {
052 Map<NucleotideCompound, List<NucleotideCompound>> equivalentsMap = new HashMap<NucleotideCompound, List<NucleotideCompound>>();
053
054 List<NucleotideCompound> ambiguousCompounds = new ArrayList<NucleotideCompound>();
055 for(NucleotideCompound compound: getAllCompounds()) {
056 if (!compound.isAmbiguous()) {
057 continue;
058 }
059 ambiguousCompounds.add(compound);
060 }
061
062 for(NucleotideCompound sourceCompound: ambiguousCompounds) {
063 Set<NucleotideCompound> compoundConstituents = sourceCompound.getConsituents();
064 for(NucleotideCompound targetCompound: ambiguousCompounds) {
065 Set<NucleotideCompound> targetConstituents = targetCompound.getConsituents();
066 if(targetConstituents.containsAll(compoundConstituents)) {
067 NucleotideCompound lcSourceCompound = toLowerCase(sourceCompound);
068 NucleotideCompound lcTargetCompound = toLowerCase(targetCompound);
069
070 //equivalentsMap.put(sourceCompound, targetCompound);
071 // equivalentsMap.put(sourceCompound, lcTargetCompound);
072
073
074 checkAdd(equivalentsMap, sourceCompound, targetCompound);
075 checkAdd(equivalentsMap, sourceCompound, lcTargetCompound);
076
077 checkAdd(equivalentsMap,targetCompound,sourceCompound);
078 checkAdd(equivalentsMap, lcTargetCompound, sourceCompound);
079
080 checkAdd(equivalentsMap, lcSourceCompound, targetCompound);
081 checkAdd(equivalentsMap, lcSourceCompound, lcTargetCompound);
082
083 }
084 }
085 }
086
087 //And once it's all done start adding them to the equivalents map
088
089 for ( NucleotideCompound key: equivalentsMap.keySet()){
090 List<NucleotideCompound> vals = equivalentsMap.get(key);
091 for (NucleotideCompound value: vals){
092 addEquivalent((C)key,(C)value);
093 addEquivalent((C)value,(C)key);
094 }
095 }
096 }
097
098 private void checkAdd(
099 Map<NucleotideCompound, List<NucleotideCompound>> equivalentsMap,
100 NucleotideCompound key,
101 NucleotideCompound value) {
102
103
104 List<NucleotideCompound> listS = equivalentsMap.get(key);
105 if ( listS == null){
106 listS = new ArrayList<NucleotideCompound>();
107 equivalentsMap.put(key, listS);
108 }
109 listS.add(value);
110
111
112 }
113
114 private NucleotideCompound toLowerCase(NucleotideCompound compound) {
115 return getCompoundForString(compound.getBase().toLowerCase());
116 }
117
118 /**
119 * Calculates the best symbol for a collection of compounds. For example
120 * if you gave this method a AC it will return a M which is the ambiguity
121 * symbol for these compounds.
122 *
123 * @param compounds Compounds to calculate ambiguity for
124 * @return The ambiguity symbol which represents this set of nucleotides best
125 */
126 public NucleotideCompound getAmbiguity(NucleotideCompound... compounds) {
127 Set<NucleotideCompound> settedCompounds = new HashSet<NucleotideCompound>();
128 for(NucleotideCompound compound: compounds) {
129 for(NucleotideCompound subCompound: compound.getConsituents()) {
130 settedCompounds.add(getCompoundForString(subCompound.getBase().toUpperCase()));
131 }
132 }
133 for(NucleotideCompound compound: getAllCompounds()) {
134 if(compound.getConsituents().equals(settedCompounds)) {
135 return compound;
136 }
137 }
138 return null;
139 }
140
141 /**
142 * NucleotideCompounds can always complement
143 */
144 @Override
145 public boolean isComplementable() {
146 return true;
147 }
148 }