blob: 021497ea2607a6a300c11c5f4e70a922f22f26ae [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package com.epam.dlab.module.aws;
import com.amazonaws.services.s3.AmazonS3;
import com.amazonaws.services.s3.model.ListObjectsV2Request;
import com.amazonaws.services.s3.model.ListObjectsV2Result;
import com.amazonaws.services.s3.model.S3ObjectSummary;
import com.epam.dlab.core.ModuleData;
import com.epam.dlab.exceptions.AdapterException;
import org.apache.commons.lang3.StringUtils;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import static java.util.stream.Collectors.mapping;
import static java.util.stream.Collectors.toList;
/**
* Create a file listing of reports from AWS bucket.
* See details in
* <a href="http://docs.aws.amazon.com/awsaccountbilling/latest/aboutv2/billing-reports.html#detailed-report-with-resources-tags">
* Detailed billing report with resources and tags</a>.
*/
public class S3FileList {
/**
* Report suffix without date.
*/
private static final String REPORT_SUFIX = ".csv";
/**
* Date regex for YYYYMMDD
*/
private static final String DATE_REGEX = "\\d{4}(0?[1-9]|1[012])(0?[1-9]|[12][0-9]|3[01])";
private static final String REGEX = String.format("(^.*/.*/%s-%s)/.*/*.\\%s", DATE_REGEX, DATE_REGEX,
REPORT_SUFIX);
/**
* Bucket name.
*/
private final String bucket;
/**
* Name of last file which is loaded or <b>null</b> for loading all files in bucket folder.
*/
private final ModuleData moduleData;
private final Pattern reportPattern;
private final boolean awsJobEnabled;
/**
* Instantiate file find class.
*
* @param awsJobEnabled
* @param bucket the name of bucket.
* @param moduleData data for working module
*/
public S3FileList(boolean awsJobEnabled, String bucket, ModuleData moduleData) {
this.bucket = bucket;
this.moduleData = moduleData;
this.awsJobEnabled = awsJobEnabled;
this.reportPattern = this.awsJobEnabled ? Pattern.compile(REGEX) : Pattern.compile(".*" + REPORT_SUFIX + "$");
}
/**
* Return the list of files for new reports.
*
* @param s3Client the S3 client.
* @return the list of files.
* @throws AdapterException
*/
public List<String> getFiles(AmazonS3 s3Client) throws AdapterException {
final List<S3ObjectSummary> objectSummaries = reportFilesInBillingBucket(s3Client);
return awsJobEnabled ? lastFilesPerBillingPeriod(objectSummaries) :
objectSummaries.stream().map(S3ObjectSummary::getKey).sorted().collect(toList());
}
private List<S3ObjectSummary> reportFilesInBillingBucket(AmazonS3 s3Client) throws AdapterException {
ListObjectsV2Request request = new ListObjectsV2Request()
.withBucketName(bucket);
ListObjectsV2Result result;
List<S3ObjectSummary> objectSummaries = new ArrayList<>();
try {
do {
result = s3Client.listObjectsV2(request);
objectSummaries.addAll(notProcessedFiles(result));
} while (result.isTruncated());
} catch (Exception e) {
throw new AdapterException("Cannot get the file listing of bucket \"" + bucket + "*\". " +
e.getLocalizedMessage(), e);
}
return objectSummaries;
}
private List<S3ObjectSummary> notProcessedFiles(ListObjectsV2Result result) {
return result.getObjectSummaries()
.stream()
.filter(this::matchBillingRegexAndWasNotProcessed)
.collect(toList());
}
private boolean matchBillingRegexAndWasNotProcessed(S3ObjectSummary o) {
return reportPattern.matcher(o.getKey()).matches()
&& !moduleData.wasProcessed(o.getKey(), o.getLastModified(),
extractDatePrefix(reportPattern, o));
}
/**
* Returns list of files that per billing period
* For particular billing period file with the biggest modification date will be returned
*
* @param objectSummaries amazon s3 objects
* @return list of file names
*/
protected List<String> lastFilesPerBillingPeriod(List<S3ObjectSummary> objectSummaries) {
final Map<String, List<S3ObjectSummary>> months = objectSummaries.stream()
.collect(Collectors.groupingBy(o -> extractDatePrefix(reportPattern, o), mapping(o -> o, toList())));
return months.entrySet()
.stream()
.flatMap(this::lastFileForBillingPeriod)
.sorted()
.collect(Collectors.toList());
}
private Stream<? extends String> lastFileForBillingPeriod(Map.Entry<String, List<S3ObjectSummary>> entry) {
final List<S3ObjectSummary> assemblyIds = entry.getValue();
final S3ObjectSummary lastBillingFile = assemblyIds.stream()
.max(Comparator.comparing(S3ObjectSummary::getLastModified))
.orElseThrow(() -> new IllegalStateException("AssemblyId does not contains any file"));
return assemblyIds.stream()
.filter(s -> s.getKey().startsWith(StringUtils.substringBeforeLast(lastBillingFile.getKey(), "/")))
.map(S3ObjectSummary::getKey);
}
private String extractDatePrefix(Pattern pattern, S3ObjectSummary o) {
final String key = o.getKey();
final Matcher matcher = pattern.matcher(key);
if (matcher.find() && awsJobEnabled) {
return matcher.group(1);
} else {
return key;
}
}
}