| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| */ |
| |
| package org.apache.samoa.streams.fs; |
| |
| import java.io.IOException; |
| import java.io.InputStream; |
| import java.nio.file.FileSystems; |
| import java.util.ArrayList; |
| import java.util.List; |
| |
| import org.apache.hadoop.conf.Configuration; |
| import org.apache.hadoop.fs.FileStatus; |
| import org.apache.hadoop.fs.FileSystem; |
| import org.apache.hadoop.fs.Path; |
| import org.apache.hadoop.io.IOUtils; |
| |
| /** |
| * Source for FileStream for HDFS files |
| * |
| * @author Casey |
| */ |
| public class HDFSFileStreamSource implements FileStreamSource { |
| |
| /** |
| * |
| */ |
| private static final long serialVersionUID = -3887354805787167400L; |
| |
| private transient InputStream fileStream; |
| private transient Configuration config; |
| private List<String> filePaths; |
| private int currentIndex; |
| |
| public HDFSFileStreamSource() { |
| this.currentIndex = -1; |
| } |
| |
| public void init(String path, String ext) { |
| this.init(this.getDefaultConfig(), path, ext); |
| } |
| |
| public void init(Configuration config, String path, String ext) { |
| this.config = config; |
| config.set("fs.hdfs.impl", |
| org.apache.hadoop.hdfs.DistributedFileSystem.class.getName()); |
| config.set("fs.file.impl", |
| org.apache.hadoop.fs.LocalFileSystem.class.getName()); |
| this.filePaths = new ArrayList<>(); |
| Path hdfsPath = new Path(path); |
| FileSystem fs; |
| try { |
| fs = FileSystem.get(config); |
| FileStatus fileStat = fs.getFileStatus(hdfsPath); |
| if (fileStat.isDirectory()) { |
| Path filterPath = hdfsPath; |
| if (ext != null) { |
| filterPath = new Path(path, "*." + ext); |
| } else { |
| filterPath = new Path(path, "*"); |
| } |
| FileStatus[] filesInDir = fs.globStatus(filterPath); |
| for (FileStatus aFilesInDir : filesInDir) { |
| if (aFilesInDir.isFile()) { |
| filePaths.add(aFilesInDir.getPath().toString()); |
| } |
| } |
| } else { |
| this.filePaths.add(path); |
| } |
| } catch (IOException ioe) { |
| throw new RuntimeException("Failed getting list of files at:" + path, ioe); |
| } |
| |
| this.currentIndex = -1; |
| } |
| |
| private Configuration getDefaultConfig() { |
| String hadoopHome = System.getenv("HADOOP_HOME"); |
| Configuration conf = new Configuration(); |
| if (hadoopHome != null) { |
| java.nio.file.Path coreSitePath = FileSystems.getDefault().getPath(hadoopHome, "etc/hadoop/core-site.xml"); |
| java.nio.file.Path hdfsSitePath = FileSystems.getDefault().getPath(hadoopHome, "etc/hadoop/hdfs-site.xml"); |
| conf.addResource(new Path(coreSitePath.toAbsolutePath().toString())); |
| conf.addResource(new Path(hdfsSitePath.toAbsolutePath().toString())); |
| } |
| return conf; |
| } |
| |
| public void reset() throws IOException { |
| this.currentIndex = -1; |
| this.closeFileStream(); |
| } |
| |
| private void closeFileStream() { |
| IOUtils.closeStream(fileStream); |
| } |
| |
| public InputStream getNextInputStream() { |
| this.closeFileStream(); |
| if (this.currentIndex >= (this.filePaths.size() - 1)) |
| return null; |
| |
| this.currentIndex++; |
| String filePath = this.filePaths.get(currentIndex); |
| |
| Path hdfsPath = new Path(filePath); |
| FileSystem fs; |
| try { |
| fs = FileSystem.get(config); |
| fileStream = fs.open(hdfsPath); |
| } catch (IOException ioe) { |
| this.closeFileStream(); |
| throw new RuntimeException("Failed opening file:" + filePath, ioe); |
| } |
| |
| return fileStream; |
| } |
| |
| public InputStream getCurrentInputStream() { |
| return fileStream; |
| } |
| |
| protected int getFilePathListSize() { |
| if (filePaths != null) |
| return filePaths.size(); |
| return 0; |
| } |
| |
| protected String getFilePathAt(int index) { |
| if (filePaths != null && filePaths.size() > index) |
| return filePaths.get(index); |
| return null; |
| } |
| } |