blob: e7b6e15b49abbd0484bbdd62e6cd53dd97544908 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* License); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import Foundation
/// Defines a protocol for transforms that know how to read from external File-like systems given an input
/// collection of paths.
public protocol FileIOSource {
/// A static function definition that takes `(path,matched_filename)` pairs output by ``readFiles(matching:)`` or some other source
/// and reads them as a single data output.
///
/// - Parameter matching: An input ``PCollection`` of pairs representing a `(path,matched_filename)`
/// where `path` and `matched_filename` are dependent on the specific source.
/// - Returns: A ``PCollection`` of binary `Data` elements. Note that the original path and filename is not carried with the data.
static func readFiles(matching: PCollection<KV<String, String>>) -> PCollection<Data>
/// Static function definition for reading a list of files matching some pattern at a given path. The definition of "path" largely depends on the
/// source type. For example, for remote object stores that might be a bucket or a bucket prefix while for a local filesystem it would be a
/// standard file path. It is best to check the specific transform's documentation to verify
///
///
/// - Parameter matching: A pair representing the path to check and pattern to check with.
///
/// - Returns: A list of pairs of the form `(path,matched_filename)`
static func listFiles(matching: PCollection<KV<String, String>>) -> PCollection<KV<String, String>>
}
public extension PCollection<KV<String, String>> {
/// A transform for reading files from a concrete ``FileIOSource``.
/// - Parameter `_`: The type of ``FileIOSource`` to use. For example ``LocalStorage.self``
///
/// - Returns: The data elements from the ``readFiles(matching:)`` implementation for this ``FileIOSource``
func readFiles<Source: FileIOSource>(in _: Source.Type) -> PCollection<Data> {
Source.readFiles(matching: self)
}
/// Takes a KV pair of (bucket,prefix) and returns a list of (bucket,filename)
func listFiles<Source: FileIOSource>(in _: Source.Type) -> PCollection<KV<String, String>> {
Source.listFiles(matching: self)
}
}