| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package org.apache.hadoop.fs.s3a.audit; |
| |
| import java.net.URISyntaxException; |
| import java.util.Map; |
| import java.util.regex.Matcher; |
| |
| import com.amazonaws.services.s3.model.GetObjectMetadataRequest; |
| import org.junit.Before; |
| import org.junit.Test; |
| import org.slf4j.Logger; |
| import org.slf4j.LoggerFactory; |
| |
| import org.apache.hadoop.conf.Configuration; |
| import org.apache.hadoop.fs.s3a.audit.impl.LoggingAuditor; |
| import org.apache.hadoop.fs.store.audit.AuditSpan; |
| import org.apache.hadoop.fs.audit.CommonAuditContext; |
| import org.apache.hadoop.fs.store.audit.HttpReferrerAuditHeader; |
| import org.apache.hadoop.security.UserGroupInformation; |
| |
| import static org.apache.hadoop.fs.s3a.audit.AuditTestSupport.loggingAuditConfig; |
| import static org.apache.hadoop.fs.s3a.audit.S3AAuditConstants.REFERRER_HEADER_FILTER; |
| import static org.apache.hadoop.fs.s3a.audit.S3LogParser.*; |
| import static org.apache.hadoop.fs.s3a.impl.HeaderProcessing.HEADER_REFERRER; |
| import static org.apache.hadoop.fs.store.audit.HttpReferrerAuditHeader.maybeStripWrappedQuotes; |
| import static org.apache.hadoop.fs.audit.AuditConstants.PARAM_FILESYSTEM_ID; |
| import static org.apache.hadoop.fs.audit.AuditConstants.PARAM_ID; |
| import static org.apache.hadoop.fs.audit.AuditConstants.PARAM_OP; |
| import static org.apache.hadoop.fs.audit.AuditConstants.PARAM_PATH; |
| import static org.apache.hadoop.fs.audit.AuditConstants.PARAM_PATH2; |
| import static org.apache.hadoop.fs.audit.AuditConstants.PARAM_PRINCIPAL; |
| import static org.apache.hadoop.fs.audit.AuditConstants.PARAM_THREAD0; |
| import static org.apache.hadoop.fs.audit.AuditConstants.PARAM_THREAD1; |
| import static org.apache.hadoop.fs.audit.AuditConstants.PARAM_TIMESTAMP; |
| import static org.assertj.core.api.Assertions.assertThat; |
| |
| /** |
| * Tests for referrer audit header generation/parsing. |
| */ |
| public class TestHttpReferrerAuditHeader extends AbstractAuditingTest { |
| |
| /** |
| * Logging. |
| */ |
| private static final Logger LOG = |
| LoggerFactory.getLogger(TestHttpReferrerAuditHeader.class); |
| |
| private LoggingAuditor auditor; |
| |
| @Before |
| public void setup() throws Exception { |
| super.setup(); |
| |
| auditor = (LoggingAuditor) getManager().getAuditor(); |
| } |
| |
| /** |
| * Create the config from {@link AuditTestSupport#loggingAuditConfig()} |
| * and patch in filtering for fields x1, x2, x3. |
| * @return a logging configuration. |
| */ |
| protected Configuration createConfig() { |
| final Configuration conf = loggingAuditConfig(); |
| conf.set(REFERRER_HEADER_FILTER, "x1, x2, x3"); |
| return conf; |
| } |
| |
| /** |
| * This verifies that passing a request through the audit manager |
| * causes the http referrer header to be added, that it can |
| * be split to query parameters, and that those parameters match |
| * those of the active wrapped span. |
| */ |
| @Test |
| public void testHttpReferrerPatchesTheRequest() throws Throwable { |
| AuditSpan span = span(); |
| long ts = span.getTimestamp(); |
| GetObjectMetadataRequest request = head(); |
| Map<String, String> headers |
| = request.getCustomRequestHeaders(); |
| assertThat(headers) |
| .describedAs("Custom headers") |
| .containsKey(HEADER_REFERRER); |
| String header = headers.get(HEADER_REFERRER); |
| LOG.info("Header is {}", header); |
| Map<String, String> params |
| = HttpReferrerAuditHeader.extractQueryParameters(header); |
| assertMapContains(params, PARAM_PRINCIPAL, |
| UserGroupInformation.getCurrentUser().getUserName()); |
| assertMapContains(params, PARAM_FILESYSTEM_ID, auditor.getAuditorId()); |
| assertMapContains(params, PARAM_OP, OPERATION); |
| assertMapContains(params, PARAM_PATH, PATH_1); |
| assertMapContains(params, PARAM_PATH2, PATH_2); |
| String threadID = CommonAuditContext.currentThreadID(); |
| assertMapContains(params, PARAM_THREAD0, threadID); |
| assertMapContains(params, PARAM_THREAD1, threadID); |
| assertMapContains(params, PARAM_ID, span.getSpanId()); |
| assertThat(span.getTimestamp()) |
| .describedAs("Timestamp of " + span) |
| .isEqualTo(ts); |
| |
| assertMapContains(params, PARAM_TIMESTAMP, |
| Long.toString(ts)); |
| } |
| |
| /** |
| * Test that a header with complext paths including spaces |
| * and colons can be converted to a URI and back again |
| * without the path getting corrupted. |
| */ |
| @Test |
| public void testHeaderComplexPaths() throws Throwable { |
| String p1 = "s3a://dotted.bucket/path: value/subdir"; |
| String p2 = "s3a://key/"; |
| AuditSpan span = getManager().createSpan(OPERATION, p1, p2); |
| long ts = span.getTimestamp(); |
| Map<String, String> params = issueRequestAndExtractParameters(); |
| assertMapContains(params, PARAM_PRINCIPAL, |
| UserGroupInformation.getCurrentUser().getUserName()); |
| assertMapContains(params, PARAM_FILESYSTEM_ID, auditor.getAuditorId()); |
| assertMapContains(params, PARAM_OP, OPERATION); |
| assertMapContains(params, PARAM_PATH, p1); |
| assertMapContains(params, PARAM_PATH2, p2); |
| String threadID = CommonAuditContext.currentThreadID(); |
| assertMapContains(params, PARAM_THREAD0, threadID); |
| assertMapContains(params, PARAM_THREAD1, threadID); |
| assertMapContains(params, PARAM_ID, span.getSpanId()); |
| assertThat(span.getTimestamp()) |
| .describedAs("Timestamp of " + span) |
| .isEqualTo(ts); |
| |
| assertMapContains(params, PARAM_TIMESTAMP, |
| Long.toString(ts)); |
| } |
| |
| /** |
| * Issue a request, then get the header field and parse it to the parameter. |
| * @return map of query params on the referrer header. |
| * @throws URISyntaxException failure to parse the header as a URI. |
| */ |
| private Map<String, String> issueRequestAndExtractParameters() |
| throws URISyntaxException { |
| head(); |
| return HttpReferrerAuditHeader.extractQueryParameters( |
| auditor.getLastHeader()); |
| } |
| |
| |
| /** |
| * Test that headers are filtered out if configured. |
| */ |
| @Test |
| public void testHeaderFiltering() throws Throwable { |
| // add two attributes, x2 will be filtered. |
| AuditSpan span = getManager().createSpan(OPERATION, null, null); |
| auditor.addAttribute("x0", "x0"); |
| auditor.addAttribute("x2", "x2"); |
| final Map<String, String> params |
| = issueRequestAndExtractParameters(); |
| assertThat(params) |
| .doesNotContainKey("x2"); |
| |
| } |
| |
| /** |
| * A real log entry. |
| * This is derived from a real log entry on a test run. |
| * If this needs to be updated, please do it from a real log. |
| * Splitting this up across lines has a tendency to break things, so |
| * be careful making changes. |
| */ |
| public static final String SAMPLE_LOG_ENTRY = |
| "183c9826b45486e485693808f38e2c4071004bf5dfd4c3ab210f0a21a4000000" |
| + " bucket-london" |
| + " [13/May/2021:11:26:06 +0000]" |
| + " 109.157.171.174" |
| + " arn:aws:iam::152813717700:user/dev" |
| + " M7ZB7C4RTKXJKTM9" |
| + " REST.PUT.OBJECT" |
| + " fork-0001/test/testParseBrokenCSVFile" |
| + " \"PUT /fork-0001/test/testParseBrokenCSVFile HTTP/1.1\"" |
| + " 200" |
| + " -" |
| + " -" |
| + " 794" |
| + " 55" |
| + " 17" |
| + " \"https://audit.example.org/hadoop/1/op_create/" |
| + "e8ede3c7-8506-4a43-8268-fe8fcbb510a4-00000278/" |
| + "?op=op_create" |
| + "&p1=fork-0001/test/testParseBrokenCSVFile" |
| + "&pr=alice" |
| + "&ps=2eac5a04-2153-48db-896a-09bc9a2fd132" |
| + "&id=e8ede3c7-8506-4a43-8268-fe8fcbb510a4-00000278&t0=154" |
| + "&fs=e8ede3c7-8506-4a43-8268-fe8fcbb510a4&t1=156&" |
| + "ts=1620905165700\"" |
| + " \"Hadoop 3.4.0-SNAPSHOT, java/1.8.0_282 vendor/AdoptOpenJDK\"" |
| + " -" |
| + " TrIqtEYGWAwvu0h1N9WJKyoqM0TyHUaY+ZZBwP2yNf2qQp1Z/0=" |
| + " SigV4" |
| + " ECDHE-RSA-AES128-GCM-SHA256" |
| + " AuthHeader" |
| + " bucket-london.s3.eu-west-2.amazonaws.com" |
| + " TLSv1.2"; |
| |
| private static final String DESCRIPTION = String.format( |
| "log entry %s split by %s", SAMPLE_LOG_ENTRY, |
| LOG_ENTRY_PATTERN); |
| |
| /** |
| * Match the log entry and validate the results. |
| */ |
| @Test |
| public void testMatchAWSLogEntry() throws Throwable { |
| |
| LOG.info("Matcher pattern is\n'{}'", LOG_ENTRY_PATTERN); |
| LOG.info("Log entry is\n'{}'", SAMPLE_LOG_ENTRY); |
| final Matcher matcher = LOG_ENTRY_PATTERN.matcher(SAMPLE_LOG_ENTRY); |
| |
| // match the pattern against the entire log entry. |
| assertThat(matcher.matches()) |
| .describedAs("matches() " + DESCRIPTION) |
| .isTrue(); |
| final int groupCount = matcher.groupCount(); |
| assertThat(groupCount) |
| .describedAs("Group count of " + DESCRIPTION) |
| .isGreaterThanOrEqualTo(AWS_LOG_REGEXP_GROUPS.size()); |
| |
| // now go through the groups |
| |
| for (String name : AWS_LOG_REGEXP_GROUPS) { |
| try { |
| final String group = matcher.group(name); |
| LOG.info("[{}]: '{}'", name, group); |
| } catch (IllegalStateException e) { |
| // group failure |
| throw new AssertionError("No match for group <" + name + ">: " |
| + e, e); |
| } |
| } |
| // if you print out the groups as integers, there is duplicate matching |
| // for some fields. Why? |
| for (int i = 1; i <= groupCount; i++) { |
| try { |
| final String group = matcher.group(i); |
| LOG.info("[{}]: '{}'", i, group); |
| } catch (IllegalStateException e) { |
| // group failure |
| throw new AssertionError("No match for group " + i |
| +": "+ e, e); |
| } |
| } |
| |
| // verb |
| assertThat(nonBlankGroup(matcher, VERB_GROUP)) |
| .describedAs("HTTP Verb") |
| .isEqualTo(S3LogVerbs.PUT); |
| |
| // referrer |
| final String referrer = nonBlankGroup(matcher, REFERRER_GROUP); |
| Map<String, String> params |
| = HttpReferrerAuditHeader.extractQueryParameters(referrer); |
| LOG.info("Parsed referrer"); |
| for (Map.Entry<String, String> entry : params.entrySet()) { |
| LOG.info("{} = \"{}\"", entry.getKey(), entry.getValue()); |
| } |
| } |
| |
| /** |
| * Get a group entry which must be non-blank. |
| * @param matcher matcher |
| * @param group group name |
| * @return value |
| */ |
| private String nonBlankGroup(final Matcher matcher, |
| final String group) { |
| final String g = matcher.group(group); |
| assertThat(g) |
| .describedAs("Value of group %s", group) |
| .isNotBlank(); |
| return g; |
| } |
| |
| /** |
| * Verify the header quote stripping works. |
| */ |
| @Test |
| public void testStripWrappedQuotes() throws Throwable { |
| expectStrippedField("", ""); |
| expectStrippedField("\"UA\"", "UA"); |
| expectStrippedField("\"\"\"\"", ""); |
| expectStrippedField("\"\"\"b\"", "b"); |
| } |
| |
| /** |
| * Expect a field with quote stripping to match the expected value. |
| * @param str string to strip |
| * @param ex expected value. |
| */ |
| private void expectStrippedField(final String str, |
| final String ex) { |
| assertThat(maybeStripWrappedQuotes(str)) |
| .describedAs("Stripped <%s>", str) |
| .isEqualTo(ex); |
| } |
| } |