blob: 34ba69df358c9f00f4ea663e4e78b5ef42169f63 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.beam.sdk.io;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.nio.channels.ReadableByteChannel;
import java.nio.channels.WritableByteChannel;
import java.util.Collection;
import java.util.List;
import org.apache.beam.sdk.annotations.Experimental;
import org.apache.beam.sdk.annotations.Experimental.Kind;
import org.apache.beam.sdk.io.fs.CreateOptions;
import org.apache.beam.sdk.io.fs.MatchResult;
import org.apache.beam.sdk.io.fs.ResourceId;
/**
* File system interface in Beam.
*
* <p>It defines APIs for writing file systems agnostic code.
*
* <p>All methods are protected, and they are for file system providers to implement. Clients should
* use the {@link FileSystems} utility.
*/
@Experimental(Kind.FILESYSTEM)
public abstract class FileSystem<ResourceIdT extends ResourceId> {
/**
* This is the entry point to convert user-provided specs to {@link ResourceIdT ResourceIds}.
* Callers should use {@link #match} to resolve users specs ambiguities before calling other
* methods.
*
* <p>Implementation should handle the following ambiguities of a user-provided spec:
*
* <ol>
* <li>{@code spec} could be a glob or a uri. {@link #match} should be able to tell and choose
* efficient implementations.
* <li>The user-provided {@code spec} might refer to files or directories. It is common that
* users that wish to indicate a directory will omit the trailing {@code /}, such as in a
* spec of {@code "/tmp/dir"}. The {@link FileSystem} should be able to recognize a
* directory with the trailing {@code /} omitted, but should always return a correct {@link
* ResourceIdT} (e.g., {@code "/tmp/dir/"} inside the returned {@link MatchResult}.
* </ol>
*
* <p>All {@link FileSystem} implementations should support glob in the final hierarchical path
* component of {@link ResourceIdT}. This allows SDK libraries to construct file system agnostic
* spec. {@link FileSystem FileSystems} can support additional patterns for user-provided specs.
*
* @return {@code List<MatchResult>} in the same order of the input specs.
* @throws IllegalArgumentException if specs are invalid.
* @throws IOException if all specs failed to match due to issues like: network connection,
* authorization. Exception for individual spec need to be deferred until callers retrieve
* metadata with {@link MatchResult#metadata()}.
*/
protected abstract List<MatchResult> match(List<String> specs) throws IOException;
/**
* Returns a write channel for the given {@link ResourceIdT}.
*
* <p>The resource is not expanded; it is used verbatim.
*
* @param resourceId the reference of the file-like resource to create
* @param createOptions the configuration of the create operation
*/
protected abstract WritableByteChannel create(ResourceIdT resourceId, CreateOptions createOptions)
throws IOException;
/**
* Returns a read channel for the given {@link ResourceIdT}.
*
* <p>The resource is not expanded; it is used verbatim.
*
* <p>If seeking is supported, then this returns a {@link java.nio.channels.SeekableByteChannel}.
*
* @param resourceId the reference of the file-like resource to open
*/
protected abstract ReadableByteChannel open(ResourceIdT resourceId) throws IOException;
/**
* Copies a {@link List} of file-like resources from one location to another.
*
* <p>The number of source resources must equal the number of destination resources. Destination
* resources will be created recursively.
*
* @param srcResourceIds the references of the source resources
* @param destResourceIds the references of the destination resources
* @throws FileNotFoundException if the source resources are missing. When copy throws, each
* resource might or might not be copied. In such scenarios, callers can use {@code match()}
* to determine the state of the resources.
*/
protected abstract void copy(List<ResourceIdT> srcResourceIds, List<ResourceIdT> destResourceIds)
throws IOException;
/**
* Renames a {@link List} of file-like resources from one location to another.
*
* <p>The number of source resources must equal the number of destination resources. Destination
* resources will be created recursively.
*
* @param srcResourceIds the references of the source resources
* @param destResourceIds the references of the destination resources
* @throws FileNotFoundException if the source resources are missing. When rename throws, the
* state of the resources is unknown but safe: for every (source, destination) pair of
* resources, the following are possible: a) source exists, b) destination exists, c) source
* and destination both exist. Thus no data is lost, however, duplicated resource are
* possible. In such scenarios, callers can use {@code match()} to determine the state of the
* resource.
*/
protected abstract void rename(
List<ResourceIdT> srcResourceIds, List<ResourceIdT> destResourceIds) throws IOException;
/**
* Deletes a collection of resources.
*
* @param resourceIds the references of the resources to delete.
* @throws FileNotFoundException if resources are missing. When delete throws, each resource might
* or might not be deleted. In such scenarios, callers can use {@code match()} to determine
* the state of the resources.
*/
protected abstract void delete(Collection<ResourceIdT> resourceIds) throws IOException;
/**
* Returns a new {@link ResourceId} for this filesystem that represents the named resource. The
* user supplies both the resource spec and whether it is a directory.
*
* <p>The supplied {@code singleResourceSpec} is expected to be in a proper format, including any
* necessary escaping, for this {@link FileSystem}.
*
* <p>This function may throw an {@link IllegalArgumentException} if given an invalid argument,
* such as when the specified {@code singleResourceSpec} is not a valid resource name.
*/
protected abstract ResourceIdT matchNewResource(String singleResourceSpec, boolean isDirectory);
/**
* Get the URI scheme which defines the namespace of the {@link FileSystem}.
*
* @see <a href="https://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a>
*/
protected abstract String getScheme();
}