src/main/java/org/apache/cassandra/sidecar/restore/RestoreJobUtil.java - cassandra-sidecar - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 package org.apache.cassandra.sidecar.restore;

 import java.io.File;
 import java.io.IOException;
 import java.io.InputStream;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.nio.file.StandardCopyOption;
 import java.util.UUID;
 import java.util.concurrent.ThreadLocalRandom;
 import java.util.stream.Stream;
 import java.util.zip.ZipEntry;
 import java.util.zip.ZipInputStream;

 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;

 import com.datastax.driver.core.utils.UUIDs;
 import com.google.inject.Inject;
 import com.google.inject.Singleton;
 import com.google.inject.name.Named;
 import org.apache.cassandra.sidecar.exceptions.RestoreJobException;
 import org.apache.cassandra.sidecar.exceptions.RestoreJobFatalException;
 import org.apache.cassandra.sidecar.utils.DigestAlgorithm;
 import org.apache.cassandra.sidecar.utils.DigestAlgorithmProvider;

 /**
  * Utilities that only makes sense in the context of restore jobs.
  *
  * Note: Avoid using it in the other scenarios.
  *
  */
 @Singleton
 public class RestoreJobUtil
 {
     private static final Logger LOGGER = LoggerFactory.getLogger(RestoreJobUtil.class);
     private static final int KB_512 = 512 * 1024;
     // it is part of upload id and get validated by
     // org.apache.cassandra.sidecar.utils.SSTableUploadsPathBuilder.UPLOAD_ID_PATTERN
     private static final String RESTORE_JOB_PREFIX = "c0ffee-";
     private static final int RESTORE_JOB_PREFIX_LEN = RESTORE_JOB_PREFIX.length();
     private static final int RESTORE_JOB_DEFAULT_HASH_SEED = 0;

     private final DigestAlgorithmProvider digestAlgorithmProvider;

     @Inject
     public RestoreJobUtil(@Named("xxhash32") DigestAlgorithmProvider digestAlgorithmProvider)
     {
         this.digestAlgorithmProvider = digestAlgorithmProvider;
     }

     /**
      * Unzip a restore slice zip
      * @param zipFile source zip file
      * @param targetDir directory to keep the unzipped files
      * @throws IOException I/O exceptions during unzip
      * @throws RestoreJobException the zip file is malicious
      */
     public static void unzip(File zipFile, File targetDir) throws IOException, RestoreJobException
     {
         try (ZipInputStream zis = new ZipInputStream(Files.newInputStream(zipFile.toPath())))
         {
             ZipEntry zipEntry = zis.getNextEntry();

             while (zipEntry != null)
             {
                 // Encounters a directory inside the zip file
                 // It is not expected. The zip file should have the directory depth of 1.
                 // The reason of not using isDirectory(): in test, it gets 'dir/file' directly, instead of 'dir/'
                 if (zipEntry.getName().contains(File.separator))
                 {
                     throw new RestoreJobFatalException("Unexpected directory in slice zip file. File: " + zipFile);
                 }

                 File targetFile = newProtectedTargetFile(zipEntry, targetDir);
                 Files.copy(zis, targetFile.toPath(), StandardCopyOption.REPLACE_EXISTING);

                 zipEntry = zis.getNextEntry();
             }
             zis.closeEntry();
         }
     }

     /**
      * Resolve a prefixed job id for path
      * @param jobId restore job id
      * @return prefixed job id string
      */
     public static String prefixedJobId(UUID jobId)
     {
         return RESTORE_JOB_PREFIX + jobId;
     }

     public static String generateUniqueUploadId(UUID jobId, String sliceId)
     {
         return prefixedJobId(jobId) + '-' + sliceId + '-' + ThreadLocalRandom.current().nextInt(10000);
     }

     /**
      * Extract the timestamp from the restore job directory name.
      * @param fileName directory file name
      * @return unix timestamp epoch; otherwise, return -1 if no timestamp can be extracted
      */
     public static long timestampFromRestoreJobDir(String fileName)
     {
         if (!fileName.startsWith(RESTORE_JOB_PREFIX))
             return -1;

         try
         {
             UUID id = UUID.fromString(fileName.substring(RESTORE_JOB_PREFIX_LEN));
             return UUIDs.unixTimestamp(id);
         }
         catch (IllegalArgumentException e)
         {
             return -1;
         }
     }

     /**
      * Cleans given directory path without deleting it
      * @param path directory path to be cleaned
      * @throws IOException when I/O error occurs
      */
     public static void cleanDirectory(Path path) throws IOException
     {
         if (!Files.isDirectory(path) || !Files.exists(path))
         {
             LOGGER.warn("Clean directory API called for non existent directory: {}", path);
             return;
         }

         try (Stream<Path> contents = Files.list(path))
         {
             contents.forEach(filePath -> {
                 try
                 {
                     if (Files.isDirectory(filePath))
                     {
                         cleanDirectory(filePath);
                     }
                     Files.delete(filePath);
                 }
                 catch (IOException e)
                 {
                     LOGGER.error("Unexpected error occurred while cleaning directory {}, ", path, e);
                     throw new RuntimeException(e);
                 }
             });
         }
     }

     /**
      * Create a file that is protected from zip slip attack (https://security.snyk.io/research/zip-slip-vulnerability)
      * @param zipEntry zip entry to be extracted
      * @param targetDir directory to keep the unzipped files
      * @return a new file
      * @throws IOException failed to resolving path
      * @throws RestoreJobException if the zip file is malicious
      */
     private static File newProtectedTargetFile(ZipEntry zipEntry, File targetDir)
     throws IOException, RestoreJobException
     {
         File targetFile = new File(targetDir, zipEntry.getName());

         // Normalize the paths of both target dir and file
         String targetDirPath = targetDir.getCanonicalPath();
         String targetFilePath = targetFile.getCanonicalPath();

         if (!targetFilePath.startsWith(targetDirPath))
         {
             throw new RestoreJobException("Bad zip entry: " + zipEntry.getName());
         }

         return targetFile;
     }

     /**
      * @param file the file to use to perform the checksum
      * @return the checksum hex string of the file's content. XXHash32 is employed as the hash algorithm.
      */
     public String checksum(File file) throws IOException
     {
         return checksum(file, RESTORE_JOB_DEFAULT_HASH_SEED);
     }

     /**
      * @param file the file to use to perform the checksum
      * @param seed the seed to use for the hasher
      * @return the checksum hex string of the file's content. XXHash32 is employed as the hash algorithm.
      */
     public String checksum(File file, int seed) throws IOException
     {
         try (InputStream fis = Files.newInputStream(file.toPath()))
         {
             try (DigestAlgorithm digestAlgorithm = digestAlgorithmProvider.get(seed))
             {
                 byte[] buffer = new byte[KB_512];
                 int len;
                 while ((len = fis.read(buffer)) != -1)
                 {
                     digestAlgorithm.update(buffer, 0, len);
                 }
                 return digestAlgorithm.digest();
             }
         }
     }

     /**
      * @return the current time in nanoseconds
      */
     public long currentTimeNanos()
     {
         return System.nanoTime();
     }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	package org.apache.cassandra.sidecar.restore;

	import java.io.File;
	import java.io.IOException;
	import java.io.InputStream;
	import java.nio.file.Files;
	import java.nio.file.Path;
	import java.nio.file.StandardCopyOption;
	import java.util.UUID;
	import java.util.concurrent.ThreadLocalRandom;
	import java.util.stream.Stream;
	import java.util.zip.ZipEntry;
	import java.util.zip.ZipInputStream;

	import org.slf4j.Logger;
	import org.slf4j.LoggerFactory;

	import com.datastax.driver.core.utils.UUIDs;
	import com.google.inject.Inject;
	import com.google.inject.Singleton;
	import com.google.inject.name.Named;
	import org.apache.cassandra.sidecar.exceptions.RestoreJobException;
	import org.apache.cassandra.sidecar.exceptions.RestoreJobFatalException;
	import org.apache.cassandra.sidecar.utils.DigestAlgorithm;
	import org.apache.cassandra.sidecar.utils.DigestAlgorithmProvider;

	/**
	* Utilities that only makes sense in the context of restore jobs.
	*
	* Note: Avoid using it in the other scenarios.
	*
	*/
	@Singleton
	public class RestoreJobUtil
	{
	private static final Logger LOGGER = LoggerFactory.getLogger(RestoreJobUtil.class);
	private static final int KB_512 = 512 * 1024;
	// it is part of upload id and get validated by
	// org.apache.cassandra.sidecar.utils.SSTableUploadsPathBuilder.UPLOAD_ID_PATTERN
	private static final String RESTORE_JOB_PREFIX = "c0ffee-";
	private static final int RESTORE_JOB_PREFIX_LEN = RESTORE_JOB_PREFIX.length();
	private static final int RESTORE_JOB_DEFAULT_HASH_SEED = 0;

	private final DigestAlgorithmProvider digestAlgorithmProvider;

	@Inject
	public RestoreJobUtil(@Named("xxhash32") DigestAlgorithmProvider digestAlgorithmProvider)
	{
	this.digestAlgorithmProvider = digestAlgorithmProvider;
	}

	/**
	* Unzip a restore slice zip
	* @param zipFile source zip file
	* @param targetDir directory to keep the unzipped files
	* @throws IOException I/O exceptions during unzip
	* @throws RestoreJobException the zip file is malicious
	*/
	public static void unzip(File zipFile, File targetDir) throws IOException, RestoreJobException
	{
	try (ZipInputStream zis = new ZipInputStream(Files.newInputStream(zipFile.toPath())))
	{
	ZipEntry zipEntry = zis.getNextEntry();

	while (zipEntry != null)
	{
	// Encounters a directory inside the zip file
	// It is not expected. The zip file should have the directory depth of 1.
	// The reason of not using isDirectory(): in test, it gets 'dir/file' directly, instead of 'dir/'
	if (zipEntry.getName().contains(File.separator))
	{
	throw new RestoreJobFatalException("Unexpected directory in slice zip file. File: " + zipFile);
	}

	File targetFile = newProtectedTargetFile(zipEntry, targetDir);
	Files.copy(zis, targetFile.toPath(), StandardCopyOption.REPLACE_EXISTING);

	zipEntry = zis.getNextEntry();
	}
	zis.closeEntry();
	}
	}

	/**
	* Resolve a prefixed job id for path
	* @param jobId restore job id
	* @return prefixed job id string
	*/
	public static String prefixedJobId(UUID jobId)
	{
	return RESTORE_JOB_PREFIX + jobId;
	}

	public static String generateUniqueUploadId(UUID jobId, String sliceId)
	{
	return prefixedJobId(jobId) + '-' + sliceId + '-' + ThreadLocalRandom.current().nextInt(10000);
	}

	/**
	* Extract the timestamp from the restore job directory name.
	* @param fileName directory file name
	* @return unix timestamp epoch; otherwise, return -1 if no timestamp can be extracted
	*/
	public static long timestampFromRestoreJobDir(String fileName)
	{
	if (!fileName.startsWith(RESTORE_JOB_PREFIX))
	return -1;

	try
	{
	UUID id = UUID.fromString(fileName.substring(RESTORE_JOB_PREFIX_LEN));
	return UUIDs.unixTimestamp(id);
	}
	catch (IllegalArgumentException e)
	{
	return -1;
	}
	}

	/**
	* Cleans given directory path without deleting it
	* @param path directory path to be cleaned
	* @throws IOException when I/O error occurs
	*/
	public static void cleanDirectory(Path path) throws IOException
	{
	if (!Files.isDirectory(path) \|\| !Files.exists(path))
	{
	LOGGER.warn("Clean directory API called for non existent directory: {}", path);
	return;
	}

	try (Stream<Path> contents = Files.list(path))
	{
	contents.forEach(filePath -> {
	try
	{
	if (Files.isDirectory(filePath))
	{
	cleanDirectory(filePath);
	}
	Files.delete(filePath);
	}
	catch (IOException e)
	{
	LOGGER.error("Unexpected error occurred while cleaning directory {}, ", path, e);
	throw new RuntimeException(e);
	}
	});
	}
	}

	/**
	* Create a file that is protected from zip slip attack (https://security.snyk.io/research/zip-slip-vulnerability)
	* @param zipEntry zip entry to be extracted
	* @param targetDir directory to keep the unzipped files
	* @return a new file
	* @throws IOException failed to resolving path
	* @throws RestoreJobException if the zip file is malicious
	*/
	private static File newProtectedTargetFile(ZipEntry zipEntry, File targetDir)
	throws IOException, RestoreJobException
	{
	File targetFile = new File(targetDir, zipEntry.getName());

	// Normalize the paths of both target dir and file
	String targetDirPath = targetDir.getCanonicalPath();
	String targetFilePath = targetFile.getCanonicalPath();

	if (!targetFilePath.startsWith(targetDirPath))
	{
	throw new RestoreJobException("Bad zip entry: " + zipEntry.getName());
	}

	return targetFile;
	}

	/**
	* @param file the file to use to perform the checksum
	* @return the checksum hex string of the file's content. XXHash32 is employed as the hash algorithm.
	*/
	public String checksum(File file) throws IOException
	{
	return checksum(file, RESTORE_JOB_DEFAULT_HASH_SEED);
	}

	/**
	* @param file the file to use to perform the checksum
	* @param seed the seed to use for the hasher
	* @return the checksum hex string of the file's content. XXHash32 is employed as the hash algorithm.
	*/
	public String checksum(File file, int seed) throws IOException
	{
	try (InputStream fis = Files.newInputStream(file.toPath()))
	{
	try (DigestAlgorithm digestAlgorithm = digestAlgorithmProvider.get(seed))
	{
	byte[] buffer = new byte[KB_512];
	int len;
	while ((len = fis.read(buffer)) != -1)
	{
	digestAlgorithm.update(buffer, 0, len);
	}
	return digestAlgorithm.digest();
	}
	}
	}

	/**
	* @return the current time in nanoseconds
	*/
	public long currentTimeNanos()
	{
	return System.nanoTime();
	}
	}