blob: 6d59e680e9d0638d29449c80fc4afff41cdee475 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.impala.testutil;
import java.io.File;
import java.io.FileWriter;
import java.util.Collection;
import java.util.List;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.HdfsConfiguration;
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
import org.apache.impala.catalog.Catalog;
import org.apache.impala.catalog.FeCatalogUtils;
import org.apache.impala.catalog.FeDb;
import org.apache.impala.catalog.FeFsPartition;
import org.apache.impala.catalog.FeTable;
import org.apache.impala.catalog.HdfsPartition.FileDescriptor;
import org.apache.impala.catalog.HdfsTable;
import org.apache.impala.util.PatternMatcher;
/**
* Utility to generate an output file with all the block ids for each table
* currently in the metastore. Having the block ids allows us to map hdfs
* files to filesystem files. This is mostly a hack since hdfs does not
* willingly expose this information.
*/
public class BlockIdGenerator {
@SuppressWarnings("deprecation")
public static void main(String[] args)
throws Exception {
if (args.length != 1) {
throw new Exception("Invalid args: BlockIdGenerator <output_file>");
}
HdfsConfiguration hdfsConfig = new HdfsConfiguration();
File output = new File(args[0]);
FileWriter writer = null;
try {
writer = new FileWriter(output);
// Load all tables in the catalog
try (Catalog catalog = CatalogServiceTestCatalog.create()) {
for (FeDb database : catalog.getDbs(PatternMatcher.MATCHER_MATCH_ALL)) {
for (String tableName : database.getAllTableNames()) {
FeTable table = database.getTable(tableName);
// Only do this for hdfs tables
if (table == null || !(table instanceof HdfsTable)) {
continue;
}
HdfsTable hdfsTable = (HdfsTable) table;
// Write the output as <tablename>: <blockid1> <blockid2> <etc>
writer.write(tableName + ":");
Collection<? extends FeFsPartition> parts =
FeCatalogUtils.loadAllPartitions(hdfsTable);
for (FeFsPartition partition : parts) {
List<FileDescriptor> fileDescriptors = partition.getFileDescriptors();
for (FileDescriptor fd : fileDescriptors) {
Path p = new Path(partition.getLocation(), fd.getRelativePath());
// Use a deprecated API to get block ids
DistributedFileSystem dfs =
(DistributedFileSystem) p.getFileSystem(hdfsConfig);
LocatedBlocks locations = dfs.getClient().getNamenode().getBlockLocations(
p.toUri().getPath(), 0, fd.getFileLength());
for (LocatedBlock lb : locations.getLocatedBlocks()) {
long id = lb.getBlock().getBlockId();
writer.write(" " + id);
}
}
}
writer.write("\n");
}
}
}
} finally {
if (writer != null) writer.close();
}
}
}