| /** |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * <p> |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * <p> |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.hadoop.fs.viewfs; |
| |
| import java.io.IOException; |
| import java.util.ArrayList; |
| import java.util.HashMap; |
| import java.util.HashSet; |
| import java.util.List; |
| import java.util.Map; |
| import java.util.Set; |
| import java.util.regex.Matcher; |
| import java.util.regex.Pattern; |
| import java.util.regex.PatternSyntaxException; |
| |
| import org.apache.hadoop.classification.InterfaceAudience; |
| import org.apache.hadoop.classification.InterfaceStability; |
| import org.apache.hadoop.fs.Path; |
| import org.apache.hadoop.util.StringUtils; |
| import org.slf4j.Logger; |
| import org.slf4j.LoggerFactory; |
| |
| import static org.apache.hadoop.fs.viewfs.InodeTree.SlashPath; |
| |
| /** |
| * Regex mount point is build to implement regex based mount point. |
| */ |
| @InterfaceAudience.Private |
| @InterfaceStability.Unstable |
| class RegexMountPoint<T> { |
| private static final Logger LOGGER = |
| LoggerFactory.getLogger(RegexMountPoint.class.getName()); |
| |
| private InodeTree inodeTree; |
| private String srcPathRegex; |
| private Pattern srcPattern; |
| private String dstPath; |
| private String interceptorSettingsString; |
| private List<RegexMountPointInterceptor> interceptorList; |
| |
| public static final String SETTING_SRCREGEX_SEP = "#."; |
| public static final char INTERCEPTOR_SEP = ';'; |
| public static final char INTERCEPTOR_INTERNAL_SEP = ':'; |
| // ${var},$var |
| public static final Pattern VAR_PATTERN_IN_DEST = |
| Pattern.compile("\\$((\\{\\w+\\})|(\\w+))"); |
| |
| // Same var might have different representations. |
| // e.g. |
| // key => $key or key = > ${key} |
| private Map<String, Set<String>> varInDestPathMap; |
| |
| public Map<String, Set<String>> getVarInDestPathMap() { |
| return varInDestPathMap; |
| } |
| |
| RegexMountPoint(InodeTree inodeTree, String sourcePathRegex, |
| String destPath, String settingsStr) { |
| this.inodeTree = inodeTree; |
| this.srcPathRegex = sourcePathRegex; |
| this.dstPath = destPath; |
| this.interceptorSettingsString = settingsStr; |
| this.interceptorList = new ArrayList<>(); |
| } |
| |
| /** |
| * Initialize regex mount point. |
| * |
| * @throws IOException |
| */ |
| public void initialize() throws IOException { |
| try { |
| srcPattern = Pattern.compile(srcPathRegex); |
| } catch (PatternSyntaxException ex) { |
| throw new IOException( |
| "Failed to initialized mount point due to bad src path regex:" |
| + srcPathRegex + ", dstPath:" + dstPath, ex); |
| } |
| varInDestPathMap = getVarListInString(dstPath); |
| initializeInterceptors(); |
| } |
| |
| private void initializeInterceptors() throws IOException { |
| if (interceptorSettingsString == null |
| || interceptorSettingsString.isEmpty()) { |
| return; |
| } |
| String[] interceptorStrArray = |
| StringUtils.split(interceptorSettingsString, INTERCEPTOR_SEP); |
| for (String interceptorStr : interceptorStrArray) { |
| RegexMountPointInterceptor interceptor = |
| RegexMountPointInterceptorFactory.create(interceptorStr); |
| if (interceptor == null) { |
| throw new IOException( |
| "Illegal settings String " + interceptorSettingsString); |
| } |
| interceptor.initialize(); |
| interceptorList.add(interceptor); |
| } |
| } |
| |
| /** |
| * Get $var1 and $var2 style variables in string. |
| * |
| * @param input - the string to be process. |
| * @return |
| */ |
| public static Map<String, Set<String>> getVarListInString(String input) { |
| Map<String, Set<String>> varMap = new HashMap<>(); |
| Matcher matcher = VAR_PATTERN_IN_DEST.matcher(input); |
| while (matcher.find()) { |
| // $var or ${var} |
| String varName = matcher.group(0); |
| // var or {var} |
| String strippedVarName = matcher.group(1); |
| if (strippedVarName.startsWith("{")) { |
| // {varName} = > varName |
| strippedVarName = |
| strippedVarName.substring(1, strippedVarName.length() - 1); |
| } |
| varMap.putIfAbsent(strippedVarName, new HashSet<>()); |
| varMap.get(strippedVarName).add(varName); |
| } |
| return varMap; |
| } |
| |
| public String getSrcPathRegex() { |
| return srcPathRegex; |
| } |
| |
| public Pattern getSrcPattern() { |
| return srcPattern; |
| } |
| |
| public String getDstPath() { |
| return dstPath; |
| } |
| |
| public static Pattern getVarPatternInDest() { |
| return VAR_PATTERN_IN_DEST; |
| } |
| |
| /** |
| * Get resolved path from regex mount points. |
| * E.g. link: ^/user/(?<username>\\w+) => s3://$user.apache.com/_${user} |
| * srcPath: is /user/hadoop/dir1 |
| * resolveLastComponent: true |
| * then return value is s3://hadoop.apache.com/_hadoop |
| * @param srcPath - the src path to resolve |
| * @param resolveLastComponent - whether resolve the path after last `/` |
| * @return mapped path of the mount point. |
| */ |
| public InodeTree.ResolveResult<T> resolve(final String srcPath, |
| final boolean resolveLastComponent) { |
| String pathStrToResolve = getPathToResolve(srcPath, resolveLastComponent); |
| for (RegexMountPointInterceptor interceptor : interceptorList) { |
| pathStrToResolve = interceptor.interceptSource(pathStrToResolve); |
| } |
| LOGGER.debug("Path to resolve:" + pathStrToResolve + ", srcPattern:" |
| + getSrcPathRegex()); |
| Matcher srcMatcher = getSrcPattern().matcher(pathStrToResolve); |
| String parsedDestPath = getDstPath(); |
| int mappedCount = 0; |
| String resolvedPathStr = ""; |
| while (srcMatcher.find()) { |
| resolvedPathStr = pathStrToResolve.substring(0, srcMatcher.end()); |
| Map<String, Set<String>> varMap = getVarInDestPathMap(); |
| for (Map.Entry<String, Set<String>> entry : varMap.entrySet()) { |
| String regexGroupNameOrIndexStr = entry.getKey(); |
| Set<String> groupRepresentationStrSetInDest = entry.getValue(); |
| parsedDestPath = replaceRegexCaptureGroupInPath( |
| parsedDestPath, srcMatcher, |
| regexGroupNameOrIndexStr, groupRepresentationStrSetInDest); |
| } |
| ++mappedCount; |
| } |
| if (0 == mappedCount) { |
| return null; |
| } |
| Path remainingPath = getRemainingPathStr(srcPath, resolvedPathStr); |
| for (RegexMountPointInterceptor interceptor : interceptorList) { |
| parsedDestPath = interceptor.interceptResolvedDestPathStr(parsedDestPath); |
| remainingPath = |
| interceptor.interceptRemainingPath(remainingPath); |
| } |
| InodeTree.ResolveResult resolveResult = inodeTree |
| .buildResolveResultForRegexMountPoint(InodeTree.ResultKind.EXTERNAL_DIR, |
| resolvedPathStr, parsedDestPath, remainingPath); |
| return resolveResult; |
| } |
| |
| private Path getRemainingPathStr( |
| String srcPath, |
| String resolvedPathStr) { |
| String remainingPathStr = srcPath.substring(resolvedPathStr.length()); |
| if (!remainingPathStr.startsWith("/")) { |
| remainingPathStr = "/" + remainingPathStr; |
| } |
| return new Path(remainingPathStr); |
| } |
| |
| private String getPathToResolve( |
| String srcPath, boolean resolveLastComponent) { |
| if (resolveLastComponent) { |
| return srcPath; |
| } |
| int lastSlashIndex = srcPath.lastIndexOf(SlashPath.toString()); |
| if (lastSlashIndex == -1) { |
| return null; |
| } |
| return srcPath.substring(0, lastSlashIndex); |
| } |
| |
| /** |
| * Use capture group named regexGroupNameOrIndexStr in mather to replace |
| * parsedDestPath. |
| * E.g. link: ^/user/(?<username>\\w+) => s3://$user.apache.com/_${user} |
| * srcMatcher is from /user/hadoop. |
| * Then the params will be like following. |
| * parsedDestPath: s3://$user.apache.com/_${user}, |
| * regexGroupNameOrIndexStr: user |
| * groupRepresentationStrSetInDest: {user:$user; user:${user}} |
| * return value will be s3://hadoop.apache.com/_hadoop |
| * @param parsedDestPath |
| * @param srcMatcher |
| * @param regexGroupNameOrIndexStr |
| * @param groupRepresentationStrSetInDest |
| * @return return parsedDestPath while ${var},$var replaced or |
| * parsedDestPath nothing found. |
| */ |
| private String replaceRegexCaptureGroupInPath( |
| String parsedDestPath, |
| Matcher srcMatcher, |
| String regexGroupNameOrIndexStr, |
| Set<String> groupRepresentationStrSetInDest) { |
| String groupValue = getRegexGroupValueFromMather( |
| srcMatcher, regexGroupNameOrIndexStr); |
| if (groupValue == null) { |
| return parsedDestPath; |
| } |
| for (String varName : groupRepresentationStrSetInDest) { |
| parsedDestPath = parsedDestPath.replace(varName, groupValue); |
| LOGGER.debug("parsedDestPath value is:" + parsedDestPath); |
| } |
| return parsedDestPath; |
| } |
| |
| /** |
| * Get matched capture group value from regex matched string. E.g. |
| * Regex: ^/user/(?<username>\\w+), regexGroupNameOrIndexStr: userName |
| * then /user/hadoop should return hadoop while call |
| * getRegexGroupValueFromMather(matcher, usersName) |
| * or getRegexGroupValueFromMather(matcher, 1) |
| * |
| * @param srcMatcher - the matcher to be use |
| * @param regexGroupNameOrIndexStr - the regex group name or index |
| * @return - Null if no matched group named regexGroupNameOrIndexStr found. |
| */ |
| private String getRegexGroupValueFromMather( |
| Matcher srcMatcher, String regexGroupNameOrIndexStr) { |
| if (regexGroupNameOrIndexStr.matches("\\d+")) { |
| // group index |
| int groupIndex = Integer.parseUnsignedInt(regexGroupNameOrIndexStr); |
| if (groupIndex >= 0 && groupIndex <= srcMatcher.groupCount()) { |
| return srcMatcher.group(groupIndex); |
| } |
| } else { |
| // named group in regex |
| return srcMatcher.group(regexGroupNameOrIndexStr); |
| } |
| return null; |
| } |
| |
| } |