001    /*
002     *                    BioJava development code
003     *
004     * This code may be freely distributed and modified under the
005     * terms of the GNU Lesser General Public Licence.  This should
006     * be distributed with the code.  If you do not have a copy,
007     * see:
008     *
009     *      http://www.gnu.org/copyleft/lesser.html
010     *
011     * Copyright for this code is held jointly by the individual
012     * authors.  These should be listed in @author doc comments.
013     *
014     * For more information on the BioJava project and its aims,
015     * or to join the biojava-l mailing list, visit the home page
016     * at:
017     *
018     *      http://www.biojava.org/
019     *
020     * Created on 01-21-2010
021     */
022    package org.biojava3.core.sequence.io;
023    
024    import java.io.File;
025    import java.io.FileInputStream;
026    import java.io.InputStream;
027    import java.util.LinkedHashMap;
028    import org.biojava3.core.sequence.DNASequence;
029    import org.biojava3.core.sequence.ProteinSequence;
030    
031    import org.biojava3.core.sequence.compound.AminoAcidCompound;
032    import org.biojava3.core.sequence.compound.AminoAcidCompoundSet;
033    import org.biojava3.core.sequence.compound.DNACompoundSet;
034    import org.biojava3.core.sequence.compound.NucleotideCompound;
035    import org.biojava3.core.sequence.io.DNASequenceCreator;
036    import org.biojava3.core.sequence.io.FastaReader;
037    import org.biojava3.core.sequence.io.GenericFastaHeaderParser;
038    import org.biojava3.core.sequence.io.ProteinSequenceCreator;
039    
040    /**
041     *
042     * @author Scooter Willis <willishf at gmail dot com>
043     */
044    public class FastaReaderHelper {
045    
046      /**
047       * Read a fasta file containing amino acids with setup that would handle most
048       * cases.
049       *
050       * @param file
051       * @return
052       * @throws Exception
053       */
054      public static LinkedHashMap<String, ProteinSequence> readFastaProteinSequence(
055          File file) throws Exception {
056        FileInputStream inStream = new FileInputStream(file);
057        LinkedHashMap<String, ProteinSequence> proteinSequences = readFastaProteinSequence(inStream);
058        inStream.close();
059        return proteinSequences;
060      }
061    
062      /**
063       * Read a fasta file containing amino acids with setup that would handle most
064       * cases. User is responsible for closing InputStream because you opened it
065       *
066       * @param inStream
067       * @return
068       * @throws Exception
069       */
070      public static LinkedHashMap<String, ProteinSequence> readFastaProteinSequence(
071          InputStream inStream) throws Exception {
072        FastaReader<ProteinSequence, AminoAcidCompound> fastaReader = new FastaReader<ProteinSequence, AminoAcidCompound>(
073            inStream,
074            new GenericFastaHeaderParser<ProteinSequence, AminoAcidCompound>(),
075            new ProteinSequenceCreator(AminoAcidCompoundSet
076                .getAminoAcidCompoundSet()));
077        return fastaReader.process();
078      }
079    
080      /**
081       * Read a fasta DNA sequence
082       * @param inStream
083       * @return
084       * @throws Exception
085       */
086      public static LinkedHashMap<String, DNASequence> readFastaDNASequence(
087          InputStream inStream) throws Exception {
088        FastaReader<DNASequence, NucleotideCompound> fastaReader = new FastaReader<DNASequence, NucleotideCompound>(
089            inStream,
090            new GenericFastaHeaderParser<DNASequence, NucleotideCompound>(),
091            new DNASequenceCreator(DNACompoundSet.getDNACompoundSet()));
092        return fastaReader.process();
093      }
094    
095      /**
096       *
097       * @param file
098       * @return
099       * @throws Exception
100       */
101      public static LinkedHashMap<String, DNASequence> readFastaDNASequence(
102          File file) throws Exception {
103        FileInputStream inStream = new FileInputStream(file);
104        LinkedHashMap<String, DNASequence> dnaSequences = readFastaDNASequence(inStream);
105        inStream.close();
106        return dnaSequences;
107      }
108    
109      public static void main(String args[]) throws Exception {
110    
111        LinkedHashMap<String, DNASequence> dnaSequences = FastaReaderHelper.readFastaDNASequence(new File("/Users/Scooter/scripps/dyadic/analysis/454Scaffolds/454Scaffolds.fna"));
112        for(DNASequence sequence : dnaSequences.values() ){
113            sequence.getRNASequence().getProteinSequence().getSequenceAsString();
114        }
115      }
116    }