001 /*
002 * BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence. This should
006 * be distributed with the code. If you do not have a copy,
007 * see:
008 *
009 * http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors. These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 * http://www.biojava.org/
019 *
020 * Created on Dec 28, 2005
021 *
022 */
023 package org.biojava3.core.util;
024
025 import java.io.EOFException;
026 import java.io.File;
027 import java.io.FileInputStream;
028 import java.io.FileNotFoundException;
029 import java.io.IOException;
030 import java.io.InputStream;
031 import java.net.URL;
032 import java.util.Enumeration;
033 import java.util.jar.JarEntry;
034 import java.util.jar.JarFile;
035 import java.util.zip.GZIPInputStream;
036 import java.util.zip.ZipEntry;
037 import java.util.zip.ZipFile;
038
039
040 /** A class that provides an InputStream from a File. The file can be compressed or uncompressed.
041 *
042 * Currently supported
043 * compressions:
044 * <ul>
045 * <li>Gzip (extension .gz)</li>
046 * <li>Zip (extension .zip) in this case a stream to the first entry in the zip file is returned </li>
047 * <li>Jar (extension .jar) same as .Zip; only stream to first entry is returned </li>
048 * <li>Z (extension .Z) compressed using the unix compress command </li>
049 * <li>for any other extension, no compression is assumed </li>
050 * </ul>
051 *
052 *
053 * @author Andreas Prlic
054 * @since 1.5
055 * @version %I% %G%
056 *
057 */
058 public class InputStreamProvider {
059
060 /**
061 * The magic number found at the start of a GZIP stream.
062 */
063 public static final int GZIP_MAGIC = 0x1f8b;
064 public static final String CACHE_PROPERTY = "biojava.cache.files";
065 boolean cacheRawFiles ;
066
067 FlatFileCache cache ;
068 public InputStreamProvider() {
069 super();
070 cacheRawFiles = false;
071
072 String prop = System.getProperty(CACHE_PROPERTY);
073 if ( prop != null && prop.equals("true")) {
074 cacheRawFiles = true;
075 cache = FlatFileCache.getInstance();
076 }
077
078 }
079
080 /** get an InputStream for this file
081 *
082 * @param pathToFile the path of the file.
083 * @return an InputStream for the file located at the path.
084 * @throws IOException
085 */
086 public InputStream getInputStream(String pathToFile)
087 throws IOException
088 {
089 File f = new File(pathToFile);
090 return getInputStream(f);
091 }
092
093
094 /** open the file and read the magic number from the beginning
095 * this is used to determine the compression type
096 *
097 * @param in an input stream to read from
098 * @return the magic number
099 * @throws IOException
100 */
101 private int getMagicNumber(InputStream in)
102 throws IOException {
103
104
105 int t = in.read();
106 if (t < 0) throw new EOFException("Failed to read magic number");
107 int magic = (t & 0xff) << 8;
108 t = in.read();
109 if (t < 0) throw new EOFException("Failed to read magic number");
110 magic += t & 0xff;
111
112 return magic;
113 }
114
115
116 public InputStream getInputStream(URL u)
117 throws IOException{
118
119 int magic = 0;
120
121 try {
122 InputStream inStream = u.openStream();
123 magic = getMagicNumber(inStream);
124 inStream.close();
125 } catch (Exception e){
126 e.printStackTrace();
127 };
128
129 if (magic == UncompressInputStream.LZW_MAGIC ) {
130 // a Z compressed file
131 return openCompressedURL(u);
132 } else if (magic == GZIP_MAGIC ) {
133 return openGZIPURL(u);
134 } else if ( u.toString().endsWith(".gz")) {
135 return openGZIPURL(u);
136 } else if ( u.toString().endsWith(".Z")) {
137 // unix compressed
138 return openCompressedURL(u);
139
140 } else {
141 InputStream inStream = u.openStream();
142 return inStream;
143 }
144
145 }
146
147
148 /** get an InputStream for the file
149 *
150 * @param f a File
151 * @return an InputStream for the file
152 * @throws IOException
153 */
154 public InputStream getInputStream(File f)
155 throws IOException
156 {
157
158 // use the magic numbers to determine the compression type,
159 // use file extension only as 2nd choice
160
161 int magic = 0;
162
163 try {
164 InputStream test = getInputStreamFromFile(f);
165 magic = getMagicNumber(test);
166 test.close();
167 } catch (Exception e){
168 System.err.println("Problem while loading: " + f);
169 e.printStackTrace();
170 };
171
172 InputStream inputStream = null;
173
174 String fileName = f.getName();
175
176 if (magic == UncompressInputStream.LZW_MAGIC ) {
177 // a Z compressed file
178 return openCompressedFile(f);
179 }
180
181 else if (magic == GZIP_MAGIC ) {
182 return openGZIPFile(f);
183 }
184
185 else if ( fileName.endsWith(".gz")) {
186 return openGZIPFile(f);
187 }
188
189 else if ( fileName.endsWith(".zip")){
190
191 ZipFile zipfile = new ZipFile(f);
192
193 // stream to first entry is returned ...
194 ZipEntry entry;
195 Enumeration<? extends ZipEntry> e = zipfile.entries();
196 if ( e.hasMoreElements()){
197 entry = e.nextElement();
198 inputStream = zipfile.getInputStream(entry);
199 } else {
200 throw new IOException ("Zip file has no entries");
201 }
202
203 }
204
205 else if ( fileName.endsWith(".jar")) {
206
207 JarFile jarFile = new JarFile(f);
208
209 // stream to first entry is returned
210 JarEntry entry;
211 Enumeration<JarEntry> e = jarFile.entries();
212 if ( e.hasMoreElements()){
213 entry = e.nextElement();
214 inputStream = jarFile.getInputStream(entry);
215 } else {
216 throw new IOException ("Jar file has no entries");
217 }
218 }
219
220 else if ( fileName.endsWith(".Z")) {
221 // unix compressed
222 return openCompressedFile(f);
223
224 }
225
226 else {
227
228 // no particular extension found, assume that it is an uncompressed file
229 inputStream = getInputStreamFromFile(f);
230 }
231
232 return inputStream;
233 }
234
235
236 /** Wrapper for new FileInputStream. if System.property biojava.cache.files is set, will try to load files from memory cache.
237 *
238 * @param f
239 * @return
240 * @throws FileNotFoundException
241 */
242 private InputStream getInputStreamFromFile(File f) throws FileNotFoundException{
243 InputStream stream = null;
244
245
246
247 if ( cacheRawFiles ){
248 stream = FlatFileCache.getInputStream(f.getAbsolutePath());
249
250 if ( stream == null){
251 FlatFileCache.addToCache(f.getAbsolutePath(),f);
252 stream = FlatFileCache.getInputStream(f.getAbsolutePath());
253 }
254 }
255
256 if ( stream == null)
257 stream = new FileInputStream(f);
258
259 return stream;
260 }
261
262
263 private InputStream openCompressedFile(File f)
264 throws IOException{
265
266 InputStream is = getInputStreamFromFile(f);
267 InputStream inputStream = new UncompressInputStream(is);
268 return inputStream;
269 }
270
271 private InputStream openCompressedURL(URL u)
272 throws IOException{
273
274 InputStream is = u.openStream();
275 InputStream inputStream = new UncompressInputStream(is);
276 return inputStream;
277 }
278
279
280 private InputStream openGZIPFile(File f)
281 throws IOException{
282
283 InputStream is = getInputStreamFromFile(f);
284 InputStream inputStream = new GZIPInputStream(is);
285 return inputStream;
286 }
287
288 private InputStream openGZIPURL(URL u)
289 throws IOException{
290
291 InputStream is = u.openStream();
292 InputStream inputStream = new GZIPInputStream(is);
293 return inputStream;
294 }
295 }