001 /*
002 * BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence. This should
006 * be distributed with the code. If you do not have a copy,
007 * see:
008 *
009 * http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors. These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 * http://www.biojava.org/
019 *
020 * Created on 01-21-2010
021 */
022 package org.biojava3.core.sequence.io;
023
024 import java.io.File;
025 import java.io.FileInputStream;
026 import java.io.InputStream;
027 import java.util.LinkedHashMap;
028 import org.biojava3.core.sequence.DNASequence;
029 import org.biojava3.core.sequence.ProteinSequence;
030
031 import org.biojava3.core.sequence.compound.AminoAcidCompound;
032 import org.biojava3.core.sequence.compound.AminoAcidCompoundSet;
033 import org.biojava3.core.sequence.compound.DNACompoundSet;
034 import org.biojava3.core.sequence.compound.NucleotideCompound;
035 import org.biojava3.core.sequence.io.DNASequenceCreator;
036 import org.biojava3.core.sequence.io.FastaReader;
037 import org.biojava3.core.sequence.io.GenericFastaHeaderParser;
038 import org.biojava3.core.sequence.io.ProteinSequenceCreator;
039
040 /**
041 *
042 * @author Scooter Willis <willishf at gmail dot com>
043 */
044 public class FastaReaderHelper {
045
046 /**
047 * Read a fasta file containing amino acids with setup that would handle most
048 * cases.
049 *
050 * @param file
051 * @return
052 * @throws Exception
053 */
054 public static LinkedHashMap<String, ProteinSequence> readFastaProteinSequence(
055 File file) throws Exception {
056 FileInputStream inStream = new FileInputStream(file);
057 LinkedHashMap<String, ProteinSequence> proteinSequences = readFastaProteinSequence(inStream);
058 inStream.close();
059 return proteinSequences;
060 }
061
062 /**
063 * Read a fasta file containing amino acids with setup that would handle most
064 * cases. User is responsible for closing InputStream because you opened it
065 *
066 * @param inStream
067 * @return
068 * @throws Exception
069 */
070 public static LinkedHashMap<String, ProteinSequence> readFastaProteinSequence(
071 InputStream inStream) throws Exception {
072 FastaReader<ProteinSequence, AminoAcidCompound> fastaReader = new FastaReader<ProteinSequence, AminoAcidCompound>(
073 inStream,
074 new GenericFastaHeaderParser<ProteinSequence, AminoAcidCompound>(),
075 new ProteinSequenceCreator(AminoAcidCompoundSet
076 .getAminoAcidCompoundSet()));
077 return fastaReader.process();
078 }
079
080 /**
081 * Read a fasta DNA sequence
082 * @param inStream
083 * @return
084 * @throws Exception
085 */
086 public static LinkedHashMap<String, DNASequence> readFastaDNASequence(
087 InputStream inStream) throws Exception {
088 FastaReader<DNASequence, NucleotideCompound> fastaReader = new FastaReader<DNASequence, NucleotideCompound>(
089 inStream,
090 new GenericFastaHeaderParser<DNASequence, NucleotideCompound>(),
091 new DNASequenceCreator(DNACompoundSet.getDNACompoundSet()));
092 return fastaReader.process();
093 }
094
095 /**
096 *
097 * @param file
098 * @return
099 * @throws Exception
100 */
101 public static LinkedHashMap<String, DNASequence> readFastaDNASequence(
102 File file) throws Exception {
103 FileInputStream inStream = new FileInputStream(file);
104 LinkedHashMap<String, DNASequence> dnaSequences = readFastaDNASequence(inStream);
105 inStream.close();
106 return dnaSequences;
107 }
108
109 public static void main(String args[]) throws Exception {
110
111 LinkedHashMap<String, DNASequence> dnaSequences = FastaReaderHelper.readFastaDNASequence(new File("/Users/Scooter/scripps/dyadic/analysis/454Scaffolds/454Scaffolds.fna"));
112 for(DNASequence sequence : dnaSequences.values() ){
113 sequence.getRNASequence().getProteinSequence().getSequenceAsString();
114 }
115 }
116 }