blob: e9c631867d9ed546debf00ace96228256b066876 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.oodt.cas.pushpull.filerestrictions.parsers;
//OODT imports
import org.apache.oodt.cas.metadata.Metadata;
import org.apache.oodt.cas.pushpull.filerestrictions.Parser;
import org.apache.oodt.cas.pushpull.filerestrictions.VirtualFile;
import org.apache.oodt.cas.pushpull.filerestrictions.VirtualFileStructure;
import org.apache.oodt.cas.pushpull.exceptions.ParserException;
//Google imports
import com.google.common.base.Splitter;
import com.google.common.base.Strings;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
//JDK imports
import java.io.FileInputStream;
import java.util.List;
import java.util.Scanner;
import java.util.Set;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* A generic email parser which generates file paths to be downloaded by using a defined java
* Pattern. The pattern should specify pattern groups for file paths in the matching pattern.
* These groups will then be extracted and added to the file structure.
*
* @author bfoster@apache.org (Brian Foster)
*/
public class GenericEmailParser implements Parser {
private static final Logger log = Logger.getLogger(GenericEmailParser.class.getCanonicalName());
public static final String FILE_PATTERNS_PROPERTY_NAME =
"org.apache.oodt.cas.pushpull.generic.email.parser.file.pattern";
public static final String CHECK_FOR_PATTERN_PROPERTY_NAME =
"org.apache.oodt.cas.pushpull.generic.email.parser.check.for.pattern";
public static final String PATH_TO_ROOT_PROPERTY_NAME =
"org.apache.oodt.cas.pushpull.generic.email.parser.path.to.root";
public static final String METADATA_KEYS =
"org.apache.oodt.cas.pushpull.generic.email.parser.metadata.keys";
public static final String METADATA_KEY_PREFIX =
"org.apache.oodt.cas.pushpull.generic.email.parser.metadata.";
private final String filePattern;
private final String checkForPattern;
private final String pathToRoot;
public GenericEmailParser() {
filePattern = loadFilePattern();
checkForPattern = loadCheckForPattern();
pathToRoot = loadPathToRoot();
}
public GenericEmailParser(String filePattern, String checkForPattern, String pathToRoot) {
this.filePattern = filePattern;
this.checkForPattern = checkForPattern;
this.pathToRoot = Strings.nullToEmpty(pathToRoot);
}
@Override
public VirtualFileStructure parse(FileInputStream emailFile, Metadata metadata)
throws ParserException {
log.info("GenericEmailParser is parsing email: " + emailFile);
VirtualFile root = VirtualFile.createRootDir();
String emailText = readEmail(emailFile);
if (!isValidEmail(emailText)) {
throw new ParserException("Failed to find check for pattern in email: " + checkForPattern);
}
List<String> filePaths = generateFilePaths(emailText);
readMetadata(emailText, metadata);
for (String filePath : filePaths) {
new VirtualFile(root, pathToRoot + filePath, false);
}
return new VirtualFileStructure("/", root);
}
private String readEmail(FileInputStream emailFile) {
StringBuilder emailText = new StringBuilder("");
Scanner scanner = new Scanner(emailFile);
while (scanner.hasNextLine()) {
emailText.append(scanner.nextLine()).append("\n");
}
scanner.close();
return emailText.toString();
}
private List<String> generateFilePaths(String emailText) throws ParserException {
List<String> filePaths = Lists.newArrayList();
Pattern pattern = Pattern.compile(filePattern);
Matcher m = pattern.matcher(emailText);
if (m.find()) {
// Ignore index 0, as that is the matching string for pattern.
for (int i = 1; i <= m.groupCount(); i++) {
filePaths.add(m.group(i));
}
}
return filePaths;
}
private void readMetadata(String emailText, Metadata metadata) {
Set<String> metadataKeys = loadMetadataKeys();
for (String metadataKey : metadataKeys) {
String metadataPattern = loadMetadataKey(metadataKey);
if (metadataPattern == null) {
log.log(Level.SEVERE, "Failed to load metadata pattern for key: " + metadataKey);
} else {
Pattern pattern = Pattern.compile(metadataPattern);
Matcher m = pattern.matcher(emailText);
if (m.find()) {
// Ignore index 0, as that is the matching string for pattern.
String metadatValue = m.group(1);
metadata.replaceMetadata(metadataKey, metadatValue);
}
}
}
}
private boolean isValidEmail(String emailText) {
Pattern pattern = Pattern.compile(checkForPattern);
Matcher m = pattern.matcher(emailText.replaceAll("\n", " "));
return m.find();
}
private String loadFilePattern() {
return System.getProperty(FILE_PATTERNS_PROPERTY_NAME);
}
private String loadCheckForPattern() {
return System.getProperty(CHECK_FOR_PATTERN_PROPERTY_NAME);
}
private String loadPathToRoot() {
return Strings.nullToEmpty(System.getProperty(PATH_TO_ROOT_PROPERTY_NAME));
}
private Set<String> loadMetadataKeys() {
return Sets.newHashSet(Splitter.on(",").omitEmptyStrings().split(
Strings.nullToEmpty(System.getProperty(METADATA_KEYS))));
}
private String loadMetadataKey(String key) {
return System.getProperty(METADATA_KEY_PREFIX + key);
}
}