001    package org.biojava3.core.sequence.template;
002    
003    import java.util.ArrayList;
004    import java.util.HashMap;
005    import java.util.HashSet;
006    import java.util.List;
007    import java.util.Map;
008    import java.util.Set;
009    
010    import org.biojava3.core.sequence.compound.NucleotideCompound;
011    
012    /**
013     *
014     * @author Andy Yates
015     * @param <C> Type of compound this set will contain but must extend
016     * NucleotideCompound
017     */
018    public abstract class AbstractNucleotideCompoundSet<C extends NucleotideCompound>
019      extends AbstractCompoundSet<C> {
020    
021      protected void addNucleotideCompound(String base, String complement, String... equivalents) {
022    
023        String[] upperEquivalents = new String[equivalents.length];
024        String[] lowerEquivalents = new String[equivalents.length];
025        for(int i=0; i<equivalents.length; i++) {
026          upperEquivalents[i] = equivalents[i].toUpperCase();
027          lowerEquivalents[i] = equivalents[i].toLowerCase();
028        }
029    
030        C upper = newNucleotideCompound(base.toUpperCase(), complement.toUpperCase(), upperEquivalents);
031        C lower = newNucleotideCompound(base.toLowerCase(), complement.toLowerCase(), lowerEquivalents);
032    
033        List<C> equivalentCompounds = new ArrayList<C>();
034    
035        for(int i=0; i<equivalents.length; i++) {
036          equivalentCompounds.add(getCompoundForString(upperEquivalents[i]));
037          equivalentCompounds.add(getCompoundForString(lowerEquivalents[i]));
038        }
039    
040        addCompound(upper, lower, equivalentCompounds);
041      }
042    
043      protected abstract C newNucleotideCompound(String base, String complement, String... equivalents);
044    
045      /**
046       * Loops through all known nucelotides and attempts to find which are
047       * equivalent to each other. Also takes into account lower casing
048       * nucleotides as well as upper-cased ones.
049       */
050      @SuppressWarnings("unchecked")
051      protected void calculateIndirectAmbiguities() {
052        Map<NucleotideCompound, List<NucleotideCompound>> equivalentsMap = new HashMap<NucleotideCompound, List<NucleotideCompound>>();
053    
054        List<NucleotideCompound> ambiguousCompounds = new ArrayList<NucleotideCompound>();
055        for(NucleotideCompound compound: getAllCompounds()) {
056          if (!compound.isAmbiguous()) {
057            continue;
058          }
059          ambiguousCompounds.add(compound);
060        }
061    
062        for(NucleotideCompound sourceCompound: ambiguousCompounds) {
063          Set<NucleotideCompound> compoundConstituents = sourceCompound.getConsituents();
064          for(NucleotideCompound targetCompound: ambiguousCompounds) {
065            Set<NucleotideCompound> targetConstituents = targetCompound.getConsituents();
066            if(targetConstituents.containsAll(compoundConstituents)) {
067              NucleotideCompound lcSourceCompound = toLowerCase(sourceCompound);
068              NucleotideCompound lcTargetCompound = toLowerCase(targetCompound);
069    
070            //equivalentsMap.put(sourceCompound, targetCompound);
071            //      equivalentsMap.put(sourceCompound, lcTargetCompound);
072                    
073              
074              checkAdd(equivalentsMap, sourceCompound, targetCompound);
075              checkAdd(equivalentsMap, sourceCompound, lcTargetCompound);
076            
077              checkAdd(equivalentsMap,targetCompound,sourceCompound);
078              checkAdd(equivalentsMap, lcTargetCompound, sourceCompound);
079              
080              checkAdd(equivalentsMap, lcSourceCompound, targetCompound);
081              checkAdd(equivalentsMap, lcSourceCompound, lcTargetCompound);
082                        
083            }
084          }
085        }
086    
087        //And once it's all done start adding them to the equivalents map
088        
089        for ( NucleotideCompound key: equivalentsMap.keySet()){
090            List<NucleotideCompound> vals = equivalentsMap.get(key);
091            for (NucleotideCompound value: vals){
092                    addEquivalent((C)key,(C)value);
093                    addEquivalent((C)value,(C)key);
094            }
095        }
096      }
097    
098      private void checkAdd(
099                    Map<NucleotideCompound, List<NucleotideCompound>> equivalentsMap,
100                    NucleotideCompound key,
101                    NucleotideCompound value) {
102    
103              
104          List<NucleotideCompound> listS = equivalentsMap.get(key);
105          if ( listS == null){
106              listS = new ArrayList<NucleotideCompound>();
107              equivalentsMap.put(key, listS);
108          }
109          listS.add(value);
110          
111            
112    }
113    
114    private NucleotideCompound toLowerCase(NucleotideCompound compound) {
115        return getCompoundForString(compound.getBase().toLowerCase());
116      }
117    
118      /**
119       * Calculates the best symbol for a collection of compounds. For example
120       * if you gave this method a AC it will return a M which is the ambiguity
121       * symbol for these compounds.
122       *
123       * @param compounds Compounds to calculate ambiguity for
124       * @return The ambiguity symbol which represents this set of nucleotides best
125       */
126      public NucleotideCompound getAmbiguity(NucleotideCompound... compounds) {
127        Set<NucleotideCompound> settedCompounds = new HashSet<NucleotideCompound>();
128        for(NucleotideCompound compound: compounds) {
129          for(NucleotideCompound subCompound: compound.getConsituents()) {
130            settedCompounds.add(getCompoundForString(subCompound.getBase().toUpperCase()));
131          }
132        }
133        for(NucleotideCompound compound: getAllCompounds()) {
134          if(compound.getConsituents().equals(settedCompounds)) {
135            return compound;
136          }
137        }
138        return null;
139      }
140    
141        /**
142         * NucleotideCompounds can always complement
143         */
144        @Override
145        public boolean isComplementable() {
146            return true;
147        }
148    }