001    /*
002     *                  BioJava development code
003     *
004     * This code may be freely distributed and modified under the
005     * terms of the GNU Lesser General Public Licence.  This should
006     * be distributed with the code.  If you do not have a copy,
007     * see:
008     *
009     *      http://www.gnu.org/copyleft/lesser.html
010     *
011     * Copyright for this code is held jointly by the individual
012     * authors.  These should be listed in @author doc comments.
013     *
014     * For more information on the BioJava project and its aims,
015     * or to join the biojava-l mailing list, visit the home page
016     * at:
017     *
018     *      http://www.biojava.org/
019     * 
020     * Created on Dec 28, 2005
021     *
022     */
023    package org.biojava3.core.util;
024    
025    import java.io.EOFException;
026    import java.io.File;
027    import java.io.FileInputStream;
028    import java.io.FileNotFoundException;
029    import java.io.IOException;
030    import java.io.InputStream;
031    import java.net.URL;
032    import java.util.Enumeration;
033    import java.util.jar.JarEntry;
034    import java.util.jar.JarFile;
035    import java.util.zip.GZIPInputStream;
036    import java.util.zip.ZipEntry;
037    import java.util.zip.ZipFile;
038    
039    
040    /** A class that provides an InputStream from a File. The file can be compressed or uncompressed. 
041     *  
042     * Currently supported
043     * compressions:
044     * <ul>
045     * <li>Gzip (extension .gz)</li>
046     * <li>Zip (extension .zip) in this case a stream to the first entry in the zip file is returned </li> 
047     * <li>Jar (extension .jar) same as .Zip; only stream to first entry is returned </li>
048     * <li>Z (extension .Z) compressed using the unix compress command </li>
049     * <li>for any other extension, no compression is assumed </li>
050     * </ul>
051     * 
052     * 
053     * @author Andreas Prlic
054     * @since 1.5
055     * @version %I% %G%
056     *
057     */
058    public class InputStreamProvider {
059    
060       /**
061        * The magic number found at the start of a GZIP stream.
062        */
063       public static final int GZIP_MAGIC = 0x1f8b;
064       public static final String CACHE_PROPERTY = "biojava.cache.files";
065       boolean cacheRawFiles ;
066    
067       FlatFileCache cache ;
068       public InputStreamProvider() {
069          super();
070          cacheRawFiles = false;
071    
072          String prop = System.getProperty(CACHE_PROPERTY);
073          if ( prop != null && prop.equals("true")) {
074             cacheRawFiles = true;
075             cache = FlatFileCache.getInstance();
076          }
077    
078       }
079    
080       /** get an InputStream for this file 
081        * 
082        * @param pathToFile the path of the file.
083        * @return an InputStream for the file located at the path.
084        * @throws IOException
085        */
086       public InputStream getInputStream(String pathToFile)
087       throws IOException
088       {
089          File f = new File(pathToFile);
090          return getInputStream(f);
091       }
092    
093    
094       /** open the file and read the magic number from the beginning
095        * this is used to determine the compression type
096        * 
097        * @param in an input stream to read from
098        * @return the magic number
099        * @throws IOException
100        */
101       private int getMagicNumber(InputStream in) 
102       throws IOException {
103    
104    
105          int t = in.read();
106          if (t < 0) throw new EOFException("Failed to read magic number");
107          int magic = (t & 0xff) << 8;
108          t = in.read();
109          if (t < 0) throw new EOFException("Failed to read magic number");
110          magic += t & 0xff;
111    
112          return magic;
113       }
114    
115    
116       public InputStream getInputStream(URL u)
117       throws IOException{
118    
119          int magic = 0;
120    
121          try {
122             InputStream inStream = u.openStream(); 
123             magic = getMagicNumber(inStream);
124             inStream.close();
125          } catch (Exception e){
126             e.printStackTrace();
127          };
128    
129          if (magic == UncompressInputStream.LZW_MAGIC ) {
130             // a Z compressed file
131             return openCompressedURL(u);
132          } else if (magic == GZIP_MAGIC ) {
133             return openGZIPURL(u); 
134          } else if ( u.toString().endsWith(".gz")) {
135             return openGZIPURL(u);
136          } else if ( u.toString().endsWith(".Z")) {
137             // unix compressed 
138             return openCompressedURL(u);
139    
140          } else {
141             InputStream inStream = u.openStream();
142             return inStream;
143          }
144    
145       }
146    
147    
148       /** get an InputStream for the file
149        * 
150        * @param f a File
151        * @return an InputStream for the file
152        * @throws IOException
153        */
154       public  InputStream getInputStream(File f) 
155       throws IOException
156       {
157    
158          // use the magic numbers to determine the compression type, 
159          // use file extension only as 2nd choice 
160    
161          int magic = 0;
162    
163          try {
164             InputStream test = getInputStreamFromFile(f);
165             magic = getMagicNumber(test);
166             test.close();
167          } catch (Exception e){
168              System.err.println("Problem while loading: " + f);
169             e.printStackTrace();
170          };
171    
172          InputStream inputStream = null;
173    
174          String fileName = f.getName();
175    
176          if (magic == UncompressInputStream.LZW_MAGIC ) {
177             // a Z compressed file
178             return openCompressedFile(f);
179          }
180    
181          else if (magic == GZIP_MAGIC ) {
182             return openGZIPFile(f); 
183          }
184    
185          else if ( fileName.endsWith(".gz")) {
186             return openGZIPFile(f);
187          } 
188    
189          else if ( fileName.endsWith(".zip")){
190    
191             ZipFile zipfile = new ZipFile(f);
192    
193             // stream to first entry is returned ...
194             ZipEntry entry;
195             Enumeration<? extends ZipEntry> e = zipfile.entries();
196             if ( e.hasMoreElements()){
197                entry = e.nextElement();
198                inputStream = zipfile.getInputStream(entry);
199             } else {
200                throw new IOException ("Zip file has no entries");
201             }
202    
203          } 
204    
205          else if ( fileName.endsWith(".jar")) {
206    
207             JarFile jarFile = new JarFile(f);
208    
209             // stream to first entry is returned
210             JarEntry entry;
211             Enumeration<JarEntry> e = jarFile.entries();
212             if ( e.hasMoreElements()){
213                entry = e.nextElement();
214                inputStream = jarFile.getInputStream(entry);
215             } else {
216                throw new IOException ("Jar file has no entries");
217             }
218          } 
219    
220          else if ( fileName.endsWith(".Z")) {
221             // unix compressed 
222             return openCompressedFile(f);
223    
224          }
225    
226          else {
227    
228             // no particular extension found, assume that it is an uncompressed file
229             inputStream = getInputStreamFromFile(f);
230          }
231    
232          return inputStream;
233       }
234    
235    
236       /** Wrapper for new FileInputStream. if System.property biojava.cache.files is set, will try to load files from memory cache.
237        * 
238        * @param f
239        * @return
240        * @throws FileNotFoundException
241        */
242       private InputStream getInputStreamFromFile(File f) throws FileNotFoundException{
243          InputStream stream = null;
244    
245    
246    
247          if ( cacheRawFiles ){
248             stream = FlatFileCache.getInputStream(f.getAbsolutePath());
249    
250             if ( stream == null){
251                FlatFileCache.addToCache(f.getAbsolutePath(),f);
252                stream = FlatFileCache.getInputStream(f.getAbsolutePath());
253             }
254          }
255    
256          if ( stream == null)
257             stream = new FileInputStream(f);                  
258          
259          return stream;
260       }
261    
262    
263       private InputStream openCompressedFile(File f)
264       throws IOException{
265    
266          InputStream is           =  getInputStreamFromFile(f);
267          InputStream inputStream =  new UncompressInputStream(is);
268          return inputStream;
269       }
270    
271       private InputStream openCompressedURL(URL u)
272       throws IOException{
273    
274          InputStream is           =  u.openStream();
275          InputStream inputStream =  new UncompressInputStream(is);
276          return inputStream;
277       }
278    
279    
280       private InputStream openGZIPFile(File f) 
281       throws IOException{
282    
283          InputStream is      = getInputStreamFromFile(f);
284          InputStream inputStream = new GZIPInputStream(is);
285          return inputStream;
286       }
287    
288       private InputStream openGZIPURL(URL u) 
289       throws IOException{
290    
291          InputStream is      = u.openStream();
292          InputStream inputStream = new GZIPInputStream(is);
293          return inputStream;
294       }
295    }