| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.camel.component.hdfs2; |
| |
| import java.io.IOException; |
| import java.util.concurrent.locks.ReadWriteLock; |
| import java.util.concurrent.locks.ReentrantReadWriteLock; |
| |
| import javax.security.auth.login.Configuration; |
| |
| import org.apache.camel.Exchange; |
| import org.apache.camel.Message; |
| import org.apache.camel.Processor; |
| import org.apache.camel.support.DefaultMessage; |
| import org.apache.camel.support.ScheduledPollConsumer; |
| import org.apache.camel.util.IOHelper; |
| import org.apache.commons.lang.StringUtils; |
| import org.apache.hadoop.fs.FileStatus; |
| import org.apache.hadoop.fs.Path; |
| import org.apache.hadoop.fs.PathFilter; |
| |
| public final class HdfsConsumer extends ScheduledPollConsumer { |
| |
| public static final long DEFAULT_CONSUMER_INITIAL_DELAY = 10 * 1000L; |
| |
| private final HdfsConfiguration config; |
| private final StringBuilder hdfsPath; |
| private final Processor processor; |
| private final ReadWriteLock rwlock = new ReentrantReadWriteLock(); |
| private volatile HdfsInputStream istream; |
| |
| public HdfsConsumer(HdfsEndpoint endpoint, Processor processor, HdfsConfiguration config) { |
| super(endpoint, processor); |
| this.config = config; |
| this.hdfsPath = config.getFileSystemType().getHdfsPath(config); |
| this.processor = processor; |
| setUseFixedDelay(true); |
| } |
| |
| @Override |
| public HdfsEndpoint getEndpoint() { |
| return (HdfsEndpoint) super.getEndpoint(); |
| } |
| |
| @Override |
| protected void doStart() throws Exception { |
| super.doStart(); |
| |
| if (config.isConnectOnStartup()) { |
| // setup hdfs if configured to do on startup |
| setupHdfs(true); |
| } |
| } |
| |
| private HdfsInfo setupHdfs(boolean onStartup) throws Exception { |
| // if we are starting up then log at info level, and if runtime then log at debug level to not flood the log |
| if (onStartup) { |
| log.info("Connecting to hdfs file-system {}:{}/{} (may take a while if connection is not available)", config.getHostName(), config.getPort(), hdfsPath); |
| } else { |
| if (log.isDebugEnabled()) { |
| log.debug("Connecting to hdfs file-system {}:{}/{} (may take a while if connection is not available)", config.getHostName(), config.getPort(), hdfsPath); |
| } |
| } |
| |
| // hadoop will cache the connection by default so its faster to get in the poll method |
| HdfsInfo answer = HdfsInfoFactory.newHdfsInfo(this.hdfsPath.toString()); |
| |
| if (onStartup) { |
| log.info("Connected to hdfs file-system {}:{}/{}", config.getHostName(), config.getPort(), hdfsPath); |
| } else { |
| if (log.isDebugEnabled()) { |
| log.debug("Connected to hdfs file-system {}:{}/{}", config.getHostName(), config.getPort(), hdfsPath); |
| } |
| } |
| return answer; |
| } |
| |
| @Override |
| protected int poll() throws Exception { |
| // need to remember auth as Hadoop will override that, which otherwise means the Auth is broken afterwards |
| Configuration auth = HdfsComponent.getJAASConfiguration(); |
| try { |
| return doPoll(); |
| } finally { |
| HdfsComponent.setJAASConfiguration(auth); |
| } |
| } |
| |
| protected int doPoll() throws Exception { |
| class ExcludePathFilter implements PathFilter { |
| public boolean accept(Path path) { |
| return !(path.toString().endsWith(config.getOpenedSuffix()) || path.toString().endsWith(config.getReadSuffix())); |
| } |
| } |
| |
| int numMessages = 0; |
| |
| HdfsInfo info = setupHdfs(false); |
| FileStatus fileStatuses[]; |
| if (info.getFileSystem().isFile(info.getPath())) { |
| fileStatuses = info.getFileSystem().globStatus(info.getPath()); |
| } else { |
| Path pattern = info.getPath().suffix("/" + this.config.getPattern()); |
| fileStatuses = info.getFileSystem().globStatus(pattern, new ExcludePathFilter()); |
| } |
| |
| for (FileStatus status : fileStatuses) { |
| |
| if (normalFileIsDirectoryNoSuccessFile(status, info)) { |
| continue; |
| } |
| |
| if (config.getOwner() != null) { |
| // must match owner |
| if (!config.getOwner().equals(status.getOwner())) { |
| if (log.isDebugEnabled()) { |
| log.debug("Skipping file: {} as not matching owner: {}", status.getPath(), config.getOwner()); |
| } |
| continue; |
| } |
| } |
| |
| try { |
| this.rwlock.writeLock().lock(); |
| this.istream = HdfsInputStream.createInputStream(status.getPath().toString(), this.config); |
| if (!this.istream.isOpened()) { |
| if (log.isDebugEnabled()) { |
| log.debug("Skipping file: {} because it doesn't exist anymore", status.getPath()); |
| } |
| continue; |
| } |
| } finally { |
| this.rwlock.writeLock().unlock(); |
| } |
| |
| try { |
| Holder<Object> key = new Holder<>(); |
| Holder<Object> value = new Holder<>(); |
| while (this.istream.next(key, value) >= 0) { |
| Exchange exchange = this.getEndpoint().createExchange(); |
| Message message = new DefaultMessage(this.getEndpoint().getCamelContext()); |
| String fileName = StringUtils.substringAfterLast(status.getPath().toString(), "/"); |
| message.setHeader(Exchange.FILE_NAME, fileName); |
| if (key.value != null) { |
| message.setHeader(HdfsHeader.KEY.name(), key.value); |
| } |
| message.setBody(value.value); |
| exchange.setIn(message); |
| |
| log.debug("Processing file {}", fileName); |
| try { |
| processor.process(exchange); |
| } catch (Exception e) { |
| exchange.setException(e); |
| } |
| |
| // in case of unhandled exceptions then let the exception handler handle them |
| if (exchange.getException() != null) { |
| getExceptionHandler().handleException(exchange.getException()); |
| } |
| |
| numMessages++; |
| } |
| } finally { |
| IOHelper.close(istream, "input stream", log); |
| } |
| } |
| |
| return numMessages; |
| } |
| |
| private boolean normalFileIsDirectoryNoSuccessFile(FileStatus status, HdfsInfo info) throws IOException { |
| if (config.getFileType().equals(HdfsFileType.NORMAL_FILE) && status.isDirectory()) { |
| Path successPath = new Path(status.getPath().toString() + "/_SUCCESS"); |
| if (!info.getFileSystem().exists(successPath)) { |
| return true; |
| } |
| } |
| return false; |
| } |
| |
| } |