001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.fileupload2.core;
018
019import java.io.ByteArrayInputStream;
020import java.io.IOException;
021import java.io.InputStream;
022import java.io.OutputStream;
023import java.nio.charset.Charset;
024import java.nio.charset.StandardCharsets;
025import java.nio.file.CopyOption;
026import java.nio.file.Files;
027import java.nio.file.InvalidPathException;
028import java.nio.file.Path;
029import java.nio.file.Paths;
030import java.nio.file.StandardCopyOption;
031import java.util.UUID;
032import java.util.concurrent.atomic.AtomicInteger;
033
034import org.apache.commons.fileupload2.core.FileItemFactory.AbstractFileItemBuilder;
035import org.apache.commons.io.Charsets;
036import org.apache.commons.io.build.AbstractOrigin;
037import org.apache.commons.io.file.PathUtils;
038import org.apache.commons.io.output.DeferredFileOutputStream;
039
040/**
041 * The default implementation of the {@link FileItem FileItem} interface.
042 * <p>
043 * After retrieving an instance of this class from a {@link DiskFileItemFactory} instance (see
044 * {@code org.apache.commons.fileupload2.core.servlet.ServletFileUpload
045 * #parseRequest(javax.servlet.http.HttpServletRequest)}), you may either request all contents of file at once using {@link #get()} or request an
046 * {@link java.io.InputStream InputStream} with {@link #getInputStream()} and process the file without attempting to load it into memory, which may come handy
047 * with large files.
048 * </p>
049 * <p>
050 * Temporary files, which are created for file items, should be deleted later on. The best way to do this is using a
051 * {@link org.apache.commons.io.FileCleaningTracker}, which you can set on the {@link DiskFileItemFactory}. However, if you do use such a tracker, then you must
052 * consider the following: Temporary files are automatically deleted as soon as they are no longer needed. (More precisely, when the corresponding instance of
053 * {@link java.io.File} is garbage collected.) This is done by the so-called reaper thread, which is started and stopped automatically by the
054 * {@link org.apache.commons.io.FileCleaningTracker} when there are files to be tracked. It might make sense to terminate that thread, for example, if your web
055 * application ends. See the section on "Resource cleanup" in the users guide of Commons FileUpload.
056 * </p>
057 */
058public final class DiskFileItem implements FileItem<DiskFileItem> {
059
060    /**
061     * Builds a new {@link DiskFileItem} instance.
062     * <p>
063     * For example:
064     * </p>
065     *
066     * <pre>{@code
067     * final FileItem fileItem = fileItemFactory.fileItemBuilder()
068     *   .setFieldName("FieldName")
069     *   .setContentType("ContentType")
070     *   .setFormField(true)
071     *   .setFileName("FileName")
072     *   .setFileItemHeaders(...)
073     *   .get();
074     * }
075     * </pre>
076     */
077    public static class Builder extends AbstractFileItemBuilder<DiskFileItem, Builder> {
078
079        /**
080         * Constructs a new instance.
081         */
082        public Builder() {
083            setBufferSize(DiskFileItemFactory.DEFAULT_THRESHOLD);
084            setPath(PathUtils.getTempDirectory());
085            setCharset(DEFAULT_CHARSET);
086            setCharsetDefault(DEFAULT_CHARSET);
087        }
088
089        /**
090         * Constructs a new instance.
091         * <p>
092         * You must provide an origin that can be converted to a Reader by this builder, otherwise, this call will throw an
093         * {@link UnsupportedOperationException}.
094         * </p>
095         *
096         * @return a new instance.
097         * @throws UnsupportedOperationException if the origin cannot provide a Path.
098         * @see AbstractOrigin#getReader(Charset)
099         */
100        @Override
101        public DiskFileItem get() {
102            final var diskFileItem = new DiskFileItem(getFieldName(), getContentType(), isFormField(), getFileName(), getBufferSize(), getPath(),
103                    getFileItemHeaders(), getCharset());
104            final var tracker = getFileCleaningTracker();
105            if (tracker != null) {
106                tracker.track(diskFileItem.getTempFile().toFile(), diskFileItem);
107            }
108            return diskFileItem;
109        }
110
111    }
112
113    /**
114     * Default content charset to be used when no explicit charset parameter is provided by the sender. Media subtypes of the "text" type are defined to have a
115     * default charset value of "ISO-8859-1" when received via HTTP.
116     */
117    public static final Charset DEFAULT_CHARSET = StandardCharsets.ISO_8859_1;
118
119    /**
120     * UID used in unique file name generation.
121     */
122    private static final String UID = UUID.randomUUID().toString().replace('-', '_');
123
124    /**
125     * Counter used in unique identifier generation.
126     */
127    private static final AtomicInteger COUNTER = new AtomicInteger();
128
129    /**
130     * Constructs a new {@link Builder}.
131     *
132     * @return a new {@link Builder}.
133     */
134    public static Builder builder() {
135        return new Builder();
136    }
137
138    /**
139     * Tests if the file name is valid. For example, if it contains a NUL characters, it's invalid. If the file name is valid, it will be returned without any
140     * modifications. Otherwise, throw an {@link InvalidPathException}.
141     *
142     * @param fileName The file name to check
143     * @return Unmodified file name, if valid.
144     * @throws InvalidPathException The file name is invalid.
145     */
146    public static String checkFileName(final String fileName) {
147        if (fileName != null) {
148            // Specific NUL check to build a better exception message.
149            final var indexOf0 = fileName.indexOf(0);
150            if (indexOf0 != -1) {
151                final var sb = new StringBuilder();
152                for (var i = 0; i < fileName.length(); i++) {
153                    final var c = fileName.charAt(i);
154                    switch (c) {
155                    case 0:
156                        sb.append("\\0");
157                        break;
158                    default:
159                        sb.append(c);
160                        break;
161                    }
162                }
163                throw new InvalidPathException(fileName, sb.toString(), indexOf0);
164            }
165            // Throws InvalidPathException on invalid file names
166            Paths.get(fileName);
167        }
168        return fileName;
169    }
170
171    /**
172     * Gets an identifier that is unique within the class loader used to load this class, but does not have random-like appearance.
173     *
174     * @return A String with the non-random looking instance identifier.
175     */
176    private static String getUniqueId() {
177        final var limit = 100_000_000;
178        final var current = COUNTER.getAndIncrement();
179        var id = Integer.toString(current);
180
181        // If you manage to get more than 100 million of ids, you'll
182        // start getting ids longer than 8 characters.
183        if (current < limit) {
184            id = ("00000000" + id).substring(id.length());
185        }
186        return id;
187    }
188
189    /**
190     * The name of the form field as provided by the browser.
191     */
192    private String fieldName;
193
194    /**
195     * The content type passed by the browser, or {@code null} if not defined.
196     */
197    private final String contentType;
198
199    /**
200     * Whether or not this item is a simple form field.
201     */
202    private volatile boolean isFormField;
203
204    /**
205     * The original file name in the user's file system.
206     */
207    private final String fileName;
208
209    /**
210     * The size of the item, in bytes. This is used to cache the size when a file item is moved from its original location.
211     */
212    private volatile long size = -1;
213
214    /**
215     * The threshold above which uploads will be stored on disk.
216     */
217    private final int threshold;
218
219    /**
220     * The directory in which uploaded files will be stored, if stored on disk.
221     */
222    private final Path repository;
223
224    /**
225     * Cached contents of the file.
226     */
227    private byte[] cachedContent;
228
229    /**
230     * Output stream for this item.
231     */
232    private DeferredFileOutputStream dfos;
233
234    /**
235     * The temporary file to use.
236     */
237    private final Path tempFile;
238
239    /**
240     * The file items headers.
241     */
242    private FileItemHeaders fileItemHeaders;
243
244    /**
245     * Default content Charset to be used when no explicit Charset parameter is provided by the sender.
246     */
247    private Charset charsetDefault = DEFAULT_CHARSET;
248
249    /**
250     * Constructs a new {@code DiskFileItem} instance.
251     *
252     * @param fieldName       The name of the form field.
253     * @param contentType     The content type passed by the browser or {@code null} if not specified.
254     * @param isFormField     Whether or not this item is a plain form field, as opposed to a file upload.
255     * @param fileName        The original file name in the user's file system, or {@code null} if not specified.
256     * @param threshold       The threshold, in bytes, below which items will be retained in memory and above which they will be stored as a file.
257     * @param repository      The data repository, which is the directory in which files will be created, should the item size exceed the threshold.
258     * @param fileItemHeaders The file item headers.
259     * @param defaultCharset  The default Charset.
260     */
261    private DiskFileItem(final String fieldName, final String contentType, final boolean isFormField, final String fileName, final int threshold,
262            final Path repository, final FileItemHeaders fileItemHeaders, final Charset defaultCharset) {
263        this.fieldName = fieldName;
264        this.contentType = contentType;
265        this.charsetDefault = defaultCharset;
266        this.isFormField = isFormField;
267        this.fileName = fileName;
268        this.fileItemHeaders = fileItemHeaders;
269        this.threshold = threshold;
270        this.repository = repository != null ? repository : PathUtils.getTempDirectory();
271        this.tempFile = this.repository.resolve(String.format("upload_%s_%s.tmp", UID, getUniqueId()));
272    }
273
274    /**
275     * Deletes the underlying storage for a file item, including deleting any associated temporary disk file. This method can be used to ensure that this is
276     * done at an earlier time, thus preserving system resources.
277     *
278     * @throws IOException if an error occurs.
279     */
280    @Override
281    public DiskFileItem delete() throws IOException {
282        cachedContent = null;
283        final var outputFile = getPath();
284        if (outputFile != null && !isInMemory() && Files.exists(outputFile)) {
285            Files.delete(outputFile);
286        }
287        return this;
288    }
289
290    /**
291     * Gets the contents of the file as an array of bytes. If the contents of the file were not yet cached in memory, they will be loaded from the disk storage
292     * and cached.
293     *
294     * @return The contents of the file as an array of bytes or {@code null} if the data cannot be read.
295     * @throws IOException if an I/O error occurs.
296     * @throws OutOfMemoryError     See {@link Files#readAllBytes(Path)}: If an array of the required size cannot be allocated, for example the file is larger
297     *                              that {@code 2GB}
298     */
299    @Override
300    public byte[] get() throws IOException {
301        if (isInMemory()) {
302            if (cachedContent == null && dfos != null) {
303                cachedContent = dfos.getData();
304            }
305            return cachedContent != null ? cachedContent.clone() : new byte[0];
306        }
307        return Files.readAllBytes(dfos.getFile().toPath());
308    }
309
310    /**
311     * Gets the content charset passed by the agent or {@code null} if not defined.
312     *
313     * @return The content charset passed by the agent or {@code null} if not defined.
314     */
315    public Charset getCharset() {
316        final var parser = new ParameterParser();
317        parser.setLowerCaseNames(true);
318        // Parameter parser can handle null input
319        final var params = parser.parse(getContentType(), ';');
320        return Charsets.toCharset(params.get("charset"), charsetDefault);
321    }
322
323    /**
324     * Gets the default charset for use when no explicit charset parameter is provided by the sender.
325     *
326     * @return the default charset
327     */
328    public Charset getCharsetDefault() {
329        return charsetDefault;
330    }
331
332    /**
333     * Gets the content type passed by the agent or {@code null} if not defined.
334     *
335     * @return The content type passed by the agent or {@code null} if not defined.
336     */
337    @Override
338    public String getContentType() {
339        return contentType;
340    }
341
342    /**
343     * Gets the name of the field in the multipart form corresponding to this file item.
344     *
345     * @return The name of the form field.
346     * @see #setFieldName(String)
347     */
348    @Override
349    public String getFieldName() {
350        return fieldName;
351    }
352
353    /**
354     * Gets the file item headers.
355     *
356     * @return The file items headers.
357     */
358    @Override
359    public FileItemHeaders getHeaders() {
360        return fileItemHeaders;
361    }
362
363    /**
364     * Gets an {@link java.io.InputStream InputStream} that can be used to retrieve the contents of the file.
365     *
366     * @return An {@link java.io.InputStream InputStream} that can be used to retrieve the contents of the file.
367     * @throws IOException if an error occurs.
368     */
369    @Override
370    public InputStream getInputStream() throws IOException {
371        if (!isInMemory()) {
372            return Files.newInputStream(dfos.getFile().toPath());
373        }
374
375        if (cachedContent == null) {
376            cachedContent = dfos.getData();
377        }
378        return new ByteArrayInputStream(cachedContent);
379    }
380
381    /**
382     * Gets the original file name in the client's file system.
383     *
384     * @return The original file name in the client's file system.
385     * @throws InvalidPathException The file name contains a NUL character, which might be an indicator of a security attack. If you intend to use the file name
386     *                              anyways, catch the exception and use {@link InvalidPathException#getInput()}.
387     */
388    @Override
389    public String getName() {
390        return checkFileName(fileName);
391    }
392
393    /**
394     * Gets an {@link java.io.OutputStream OutputStream} that can be used for storing the contents of the file.
395     *
396     * @return An {@link java.io.OutputStream OutputStream} that can be used for storing the contents of the file.
397     */
398    @Override
399    public OutputStream getOutputStream() {
400        if (dfos == null) {
401            dfos = DeferredFileOutputStream.builder().setThreshold(threshold).setOutputFile(getTempFile().toFile()).get();
402        }
403        return dfos;
404    }
405
406    /**
407     * Gets the {@link Path} for the {@code FileItem}'s data's temporary location on the disk. Note that for {@code FileItem}s that have their data stored in
408     * memory, this method will return {@code null}. When handling large files, you can use {@link Files#move(Path,Path,CopyOption...)} to move the file to new
409     * location without copying the data, if the source and destination locations reside within the same logical volume.
410     *
411     * @return The data file, or {@code null} if the data is stored in memory.
412     */
413    public Path getPath() {
414        if (dfos == null) {
415            return null;
416        }
417        if (isInMemory()) {
418            return null;
419        }
420        return dfos.getFile().toPath();
421    }
422
423    /**
424     * Gets the size of the file.
425     *
426     * @return The size of the file, in bytes.
427     */
428    @Override
429    public long getSize() {
430        if (size >= 0) {
431            return size;
432        }
433        if (cachedContent != null) {
434            return cachedContent.length;
435        }
436        return dfos != null ? dfos.getByteCount() : 0;
437    }
438
439    /**
440     * Gets the contents of the file as a String, using the default character encoding. This method uses {@link #get()} to retrieve the contents of the file.
441     * <p>
442     * <strong>TODO</strong> Consider making this method throw UnsupportedEncodingException.
443     * </p>
444     *
445     * @return The contents of the file, as a string.
446     * @throws IOException if an I/O error occurs
447     */
448    @Override
449    public String getString() throws IOException {
450        return new String(get(), getCharset());
451    }
452
453    /**
454     * Gets the contents of the file as a String, using the specified encoding. This method uses {@link #get()} to retrieve the contents of the file.
455     *
456     * @param charset The charset to use.
457     * @return The contents of the file, as a string.
458     * @throws IOException if an I/O error occurs
459     */
460    @Override
461    public String getString(final Charset charset) throws IOException {
462        return new String(get(), Charsets.toCharset(charset, charsetDefault));
463    }
464
465    /**
466     * Creates and returns a {@link java.io.File File} representing a uniquely named temporary file in the configured repository path. The lifetime of the file
467     * is tied to the lifetime of the {@code FileItem} instance; the file will be deleted when the instance is garbage collected.
468     * <p>
469     * <strong>Note: Subclasses that override this method must ensure that they return the same File each time.</strong>
470     * </p>
471     *
472     * @return The {@link java.io.File File} to be used for temporary storage.
473     */
474    protected Path getTempFile() {
475        return tempFile;
476    }
477
478    /**
479     * Tests whether or not a {@code FileItem} instance represents a simple form field.
480     *
481     * @return {@code true} if the instance represents a simple form field; {@code false} if it represents an uploaded file.
482     * @see #setFormField(boolean)
483     */
484    @Override
485    public boolean isFormField() {
486        return isFormField;
487    }
488
489    /**
490     * Provides a hint as to whether or not the file contents will be read from memory.
491     *
492     * @return {@code true} if the file contents will be read from memory; {@code false} otherwise.
493     */
494    @Override
495    public boolean isInMemory() {
496        if (cachedContent != null) {
497            return true;
498        }
499        return dfos.isInMemory();
500    }
501
502    /**
503     * Sets the default charset for use when no explicit charset parameter is provided by the sender.
504     *
505     * @param charset the default charset
506     * @return {@code this} instance.
507     */
508    public DiskFileItem setCharsetDefault(final Charset charset) {
509        charsetDefault = charset;
510        return this;
511    }
512
513    /**
514     * Sets the field name used to reference this file item.
515     *
516     * @param fieldName The name of the form field.
517     * @see #getFieldName()
518     */
519    @Override
520    public DiskFileItem setFieldName(final String fieldName) {
521        this.fieldName = fieldName;
522        return this;
523    }
524
525    /**
526     * Specifies whether or not a {@code FileItem} instance represents a simple form field.
527     *
528     * @param state {@code true} if the instance represents a simple form field; {@code false} if it represents an uploaded file.
529     * @see #isFormField()
530     */
531    @Override
532    public DiskFileItem setFormField(final boolean state) {
533        isFormField = state;
534        return this;
535    }
536
537    /**
538     * Sets the file item headers.
539     *
540     * @param headers The file items headers.
541     */
542    @Override
543    public DiskFileItem setHeaders(final FileItemHeaders headers) {
544        this.fileItemHeaders = headers;
545        return this;
546    }
547
548    /**
549     * Returns a string representation of this object.
550     *
551     * @return a string representation of this object.
552     */
553    @Override
554    public String toString() {
555        return String.format("name=%s, StoreLocation=%s, size=%s bytes, isFormField=%s, FieldName=%s", getName(), getPath(), getSize(), isFormField(),
556                getFieldName());
557    }
558
559    /**
560     * Writes an uploaded item to disk.
561     * <p>
562     * The client code is not concerned with whether or not the item is stored in memory, or on disk in a temporary location. They just want to write the
563     * uploaded item to a file.
564     * </p>
565     * <p>
566     * This implementation first attempts to rename the uploaded item to the specified destination file, if the item was originally written to disk. Otherwise,
567     * the data will be copied to the specified file.
568     * </p>
569     * <p>
570     * This method is only guaranteed to work <em>once</em>, the first time it is invoked for a particular item. This is because, in the event that the method
571     * renames a temporary file, that file will no longer be available to copy or rename again at a later time.
572     * </p>
573     *
574     * @param file The {@code File} into which the uploaded item should be stored.
575     * @throws IOException if an error occurs.
576     */
577    @Override
578    public DiskFileItem write(final Path file) throws IOException {
579        if (isInMemory()) {
580            try (var fout = Files.newOutputStream(file)) {
581                fout.write(get());
582            } catch (final IOException e) {
583                throw new IOException("Unexpected output data", e);
584            }
585        } else {
586            final var outputFile = getPath();
587            if (outputFile == null) {
588                /*
589                 * For whatever reason we cannot write the file to disk.
590                 */
591                throw new FileUploadException("Cannot write uploaded file to disk.");
592            }
593            // Save the length of the file
594            size = Files.size(outputFile);
595            //
596            // The uploaded file is being stored on disk in a temporary location so move it to the desired file.
597            //
598            Files.move(outputFile, file, StandardCopyOption.REPLACE_EXISTING);
599        }
600        return this;
601    }
602}