flink-filesystems/flink-s3-fs-base/src/main/java/org/apache/flink/fs/s3/common/writer/S3RecoverableMultipartUploadFactory.java - flink - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 package org.apache.flink.fs.s3.common.writer;

 import org.apache.flink.annotation.Internal;
 import org.apache.flink.core.fs.Path;
 import org.apache.flink.fs.s3.common.utils.BackPressuringExecutor;
 import org.apache.flink.fs.s3.common.utils.RefCountedFile;
 import org.apache.flink.runtime.fs.hdfs.HadoopFileSystem;
 import org.apache.flink.util.Preconditions;
 import org.apache.flink.util.function.FunctionWithException;

 import org.apache.hadoop.fs.FileSystem;

 import java.io.File;
 import java.io.IOException;
 import java.util.Optional;
 import java.util.concurrent.Executor;

 /**
  * A factory for creating or recovering {@link RecoverableMultiPartUpload mulitpart uploads}.
  */
 @Internal
 final class S3RecoverableMultipartUploadFactory {

 	private final org.apache.hadoop.fs.FileSystem fs;

 	private final S3AccessHelper s3AccessHelper;

 	private final FunctionWithException<File, RefCountedFile, IOException> tmpFileSupplier;

 	private final int maxConcurrentUploadsPerStream;

 	private final Executor executor;

 	S3RecoverableMultipartUploadFactory(
 			final FileSystem fs,
 			final S3AccessHelper s3AccessHelper,
 			final int maxConcurrentUploadsPerStream,
 			final Executor executor,
 			final FunctionWithException<File, RefCountedFile, IOException> tmpFileSupplier) {

 		this.fs = Preconditions.checkNotNull(fs);
 		this.maxConcurrentUploadsPerStream = maxConcurrentUploadsPerStream;
 		this.executor = executor;
 		this.s3AccessHelper = s3AccessHelper;
 		this.tmpFileSupplier = tmpFileSupplier;
 	}

 	RecoverableMultiPartUpload getNewRecoverableUpload(Path path) throws IOException {

 		return RecoverableMultiPartUploadImpl.newUpload(
 				s3AccessHelper,
 				limitedExecutor(),
 				pathToObjectName(path));
 	}

 	RecoverableMultiPartUpload recoverRecoverableUpload(S3Recoverable recoverable) throws IOException {
 		final Optional<File> incompletePart = recoverInProgressPart(recoverable);

 		return RecoverableMultiPartUploadImpl.recoverUpload(
 				s3AccessHelper,
 				limitedExecutor(),
 				recoverable.uploadId(),
 				recoverable.getObjectName(),
 				recoverable.parts(),
 				recoverable.numBytesInParts(),
 				incompletePart);
 	}

 	private Optional<File> recoverInProgressPart(S3Recoverable recoverable) throws IOException {

 		final String objectKey = recoverable.incompleteObjectName();
 		if (objectKey == null) {
 			return Optional.empty();
 		}

 		// download the file (simple way)
 		final RefCountedFile refCountedFile = tmpFileSupplier.apply(null);
 		final File file = refCountedFile.getFile();
 		final long numBytes = s3AccessHelper.getObject(objectKey, file);

 		if (numBytes != recoverable.incompleteObjectLength()) {
 			throw new IOException(String.format("Error recovering writer: " +
 							"Downloading the last data chunk file gives incorrect length." +
 							"File length is %d bytes, RecoveryData indicates %d bytes",
 					numBytes, recoverable.incompleteObjectLength()));
 		}

 		return Optional.of(file);
 	}

 	private String pathToObjectName(final Path path) {
 		org.apache.hadoop.fs.Path hadoopPath = HadoopFileSystem.toHadoopPath(path);
 		if (!hadoopPath.isAbsolute()) {
 			hadoopPath = new org.apache.hadoop.fs.Path(fs.getWorkingDirectory(), hadoopPath);
 		}

 		return hadoopPath.toUri().getScheme() != null && hadoopPath.toUri().getPath().isEmpty()
 				? ""
 				: hadoopPath.toUri().getPath().substring(1);
 	}

 	private Executor limitedExecutor() {
 		return maxConcurrentUploadsPerStream <= 0 ?
 				executor :
 				new BackPressuringExecutor(executor, maxConcurrentUploadsPerStream);
 	}
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	package org.apache.flink.fs.s3.common.writer;

	import org.apache.flink.annotation.Internal;
	import org.apache.flink.core.fs.Path;
	import org.apache.flink.fs.s3.common.utils.BackPressuringExecutor;
	import org.apache.flink.fs.s3.common.utils.RefCountedFile;
	import org.apache.flink.runtime.fs.hdfs.HadoopFileSystem;
	import org.apache.flink.util.Preconditions;
	import org.apache.flink.util.function.FunctionWithException;

	import org.apache.hadoop.fs.FileSystem;

	import java.io.File;
	import java.io.IOException;
	import java.util.Optional;
	import java.util.concurrent.Executor;

	/**
	* A factory for creating or recovering {@link RecoverableMultiPartUpload mulitpart uploads}.
	*/
	@Internal
	final class S3RecoverableMultipartUploadFactory {

	private final org.apache.hadoop.fs.FileSystem fs;

	private final S3AccessHelper s3AccessHelper;

	private final FunctionWithException<File, RefCountedFile, IOException> tmpFileSupplier;

	private final int maxConcurrentUploadsPerStream;

	private final Executor executor;

	S3RecoverableMultipartUploadFactory(
	final FileSystem fs,
	final S3AccessHelper s3AccessHelper,
	final int maxConcurrentUploadsPerStream,
	final Executor executor,
	final FunctionWithException<File, RefCountedFile, IOException> tmpFileSupplier) {

	this.fs = Preconditions.checkNotNull(fs);
	this.maxConcurrentUploadsPerStream = maxConcurrentUploadsPerStream;
	this.executor = executor;
	this.s3AccessHelper = s3AccessHelper;
	this.tmpFileSupplier = tmpFileSupplier;
	}

	RecoverableMultiPartUpload getNewRecoverableUpload(Path path) throws IOException {

	return RecoverableMultiPartUploadImpl.newUpload(
	s3AccessHelper,
	limitedExecutor(),
	pathToObjectName(path));
	}

	RecoverableMultiPartUpload recoverRecoverableUpload(S3Recoverable recoverable) throws IOException {
	final Optional<File> incompletePart = recoverInProgressPart(recoverable);

	return RecoverableMultiPartUploadImpl.recoverUpload(
	s3AccessHelper,
	limitedExecutor(),
	recoverable.uploadId(),
	recoverable.getObjectName(),
	recoverable.parts(),
	recoverable.numBytesInParts(),
	incompletePart);
	}

	private Optional<File> recoverInProgressPart(S3Recoverable recoverable) throws IOException {

	final String objectKey = recoverable.incompleteObjectName();
	if (objectKey == null) {
	return Optional.empty();
	}

	// download the file (simple way)
	final RefCountedFile refCountedFile = tmpFileSupplier.apply(null);
	final File file = refCountedFile.getFile();
	final long numBytes = s3AccessHelper.getObject(objectKey, file);

	if (numBytes != recoverable.incompleteObjectLength()) {
	throw new IOException(String.format("Error recovering writer: " +
	"Downloading the last data chunk file gives incorrect length." +
	"File length is %d bytes, RecoveryData indicates %d bytes",
	numBytes, recoverable.incompleteObjectLength()));
	}

	return Optional.of(file);
	}

	private String pathToObjectName(final Path path) {
	org.apache.hadoop.fs.Path hadoopPath = HadoopFileSystem.toHadoopPath(path);
	if (!hadoopPath.isAbsolute()) {
	hadoopPath = new org.apache.hadoop.fs.Path(fs.getWorkingDirectory(), hadoopPath);
	}

	return hadoopPath.toUri().getScheme() != null && hadoopPath.toUri().getPath().isEmpty()
	? ""
	: hadoopPath.toUri().getPath().substring(1);
	}

	private Executor limitedExecutor() {
	return maxConcurrentUploadsPerStream <= 0 ?
	executor :
	new BackPressuringExecutor(executor, maxConcurrentUploadsPerStream);
	}
	}