blob: 1961b7a2b72b456b60b5dc507db98bc8a265d408 [file] [log] [blame]
#!/usr/bin/env python3
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import argparse
import logging
import re
from pathlib import Path
from typing import List
from enum import Enum
from cmd_utils import Sh, REPO_ROOT, init_log
RETRY_SCRIPT = REPO_ROOT / "ci" / "scripts" / "jenkins" / "retry.sh"
S3_DOWNLOAD_REGEX = re.compile(r"download: s3://.* to (.*)")
SH = Sh()
class Action(Enum):
UPLOAD = 1
DOWNLOAD = 2
def show_md5(item: str) -> None:
if not Path(item).is_dir():
sh.run(f"md5sum {item}")
def parse_output_files(stdout: str) -> List[str]:
"""
Grab the list of downloaded files from the output of 'aws s3 cp'. Lines look
like:
download: s3://some/prefix/a_file.txt to a_file.txt
"""
files = []
for line in stdout.split("\n"):
line = line.strip()
if line == "":
continue
m = S3_DOWNLOAD_REGEX.match(line)
if m:
files.append(m.groups()[0])
return files
def chmod(files: List[str]) -> None:
"""
S3 has no concept of file permissions so add them back in here to every file
"""
# Add execute bit for downloads
to_chmod = [str(f) for f in files]
logging.info(f"Adding execute bit for files: {to_chmod}")
if len(to_chmod) > 0:
SH.run(f"chmod +x {' '.join(to_chmod)}")
def s3(source: str, destination: str, recursive: bool) -> List[str]:
"""
Send or download the source to the destination in S3
"""
cmd = f". {RETRY_SCRIPT.relative_to(REPO_ROOT)} && retry 3 aws s3 cp --no-progress"
if recursive:
cmd += " --recursive"
cmd += f" {source} {destination}"
_, stdout = SH.tee(cmd)
return stdout
if __name__ == "__main__":
init_log()
help = "Uploads or downloads files from S3"
parser = argparse.ArgumentParser(description=help)
parser.add_argument("--action", help="either 'upload' or 'download'", required=True)
parser.add_argument("--bucket", help="s3 bucket", required=True)
parser.add_argument(
"--prefix", help="s3 bucket + tag (e.g. s3://tvm-ci-prod/PR-1234/cpu", required=True
)
parser.add_argument("--items", help="files and folders to upload", nargs="+")
args = parser.parse_args()
logging.info(args)
sh = Sh()
if Path.cwd() != REPO_ROOT:
logging.error(f"s3.py can only be executed from the repo root, instead was in {Path.cwd()}")
exit(1)
prefix = args.prefix.strip("/")
s3_path = f"s3://{args.bucket}/{prefix}"
logging.info(f"Using s3 path: {s3_path}")
if args.action == "upload":
action = Action.UPLOAD
elif args.action == "download":
action = Action.DOWNLOAD
else:
logging.error(f"Unsupported action: {args.action}")
exit(1)
if args.items is None:
if args.action == "upload":
logging.error("Cannot upload without --items")
exit(1)
else:
# Download the whole prefix
items = ["."]
else:
items = args.items
for item in items:
if action == Action.DOWNLOAD:
source = s3_path
recursive = True
if item != ".":
source = s3_path + "/" + item
recursive = False
stdout = s3(source=source, destination=item, recursive=recursive)
files = parse_output_files(stdout)
chmod(files)
for file in files:
# Show md5 after downloading
show_md5(file)
elif action == Action.UPLOAD:
if not Path(item).exists():
logging.warning(f"The path doesn't exist: {item}")
continue
show_md5(item)
if Path(item).is_dir():
if len(list(Path(item).glob("**/*"))) == 0:
raise RuntimeError(f"Cannot upload empty folder with name: {item}")
s3(item, s3_path + "/" + item, recursive=Path(item).is_dir())