blob: 0c69b6de8136346127cd8fb2c1343a22026ac490 [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs.tools;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.PrintStream;
import java.net.URI;
import java.net.URL;
import java.net.URLConnection;
import java.net.URLEncoder;
import java.security.PrivilegedExceptionAction;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DFSUtil;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.HAUtil;
import org.apache.hadoop.hdfs.HdfsConfiguration;
import org.apache.hadoop.hdfs.server.namenode.NamenodeFsck;
import org.apache.hadoop.hdfs.web.URLConnectionFactory;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.security.authentication.client.AuthenticationException;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
/**
* This class provides rudimentary checking of DFS volumes for errors and
* sub-optimal conditions.
* <p>The tool scans all files and directories, starting from an indicated
* root path. The following abnormal conditions are detected and handled:</p>
* <ul>
* <li>files with blocks that are completely missing from all datanodes.<br/>
* In this case the tool can perform one of the following actions:
* <ul>
* <li>none ({@link org.apache.hadoop.hdfs.server.namenode.NamenodeFsck#FIXING_NONE})</li>
* <li>move corrupted files to /lost+found directory on DFS
* ({@link org.apache.hadoop.hdfs.server.namenode.NamenodeFsck#FIXING_MOVE}). Remaining data blocks are saved as a
* block chains, representing longest consecutive series of valid blocks.</li>
* <li>delete corrupted files ({@link org.apache.hadoop.hdfs.server.namenode.NamenodeFsck#FIXING_DELETE})</li>
* </ul>
* </li>
* <li>detect files with under-replicated or over-replicated blocks</li>
* </ul>
* Additionally, the tool collects a detailed overall DFS statistics, and
* optionally can print detailed statistics on block locations and replication
* factors of each file.
* The tool also provides and option to filter open files during the scan.
*
*/
@InterfaceAudience.Private
public class DFSck extends Configured implements Tool {
static{
HdfsConfiguration.init();
}
private static final String USAGE = "Usage: hdfs fsck <path> "
+ "[-list-corruptfileblocks | "
+ "[-move | -delete | -openforwrite] "
+ "[-files [-blocks [-locations | -racks | -replicaDetails | " +
"-upgradedomains]]]] "
+ "[-includeSnapshots] "
+ "[-storagepolicies] [-maintenance] [-blockId <blk_Id>]\n"
+ "\t<path>\tstart checking from this path\n"
+ "\t-move\tmove corrupted files to /lost+found\n"
+ "\t-delete\tdelete corrupted files\n"
+ "\t-files\tprint out files being checked\n"
+ "\t-openforwrite\tprint out files opened for write\n"
+ "\t-includeSnapshots\tinclude snapshot data if the given path"
+ " indicates a snapshottable directory or there are "
+ "snapshottable directories under it\n"
+ "\t-list-corruptfileblocks\tprint out list of missing "
+ "blocks and files they belong to\n"
+ "\t-files -blocks\tprint out block report\n"
+ "\t-files -blocks -locations\tprint out locations for every block\n"
+ "\t-files -blocks -racks"
+ "\tprint out network topology for data-node locations\n"
+ "\t-files -blocks -replicaDetails\tprint out each replica details \n"
+ "\t-files -blocks -upgradedomains\tprint out upgrade domains for " +
"every block\n"
+ "\t-storagepolicies\tprint out storage policy summary for the blocks\n"
+ "\t-maintenance\tprint out maintenance state node details\n"
+ "\t-blockId\tprint out which file this blockId belongs to, locations"
+ " (nodes, racks) of this block, and other diagnostics info"
+ " (under replicated, corrupted or not, etc)\n\n"
+ "Please Note:\n"
+ "\t1. By default fsck ignores files opened for write, "
+ "use -openforwrite to report such files. They are usually "
+ " tagged CORRUPT or HEALTHY depending on their block "
+ "allocation status\n"
+ "\t2. Option -includeSnapshots should not be used for comparing stats,"
+ " should be used only for HEALTH check, as this may contain duplicates"
+ " if the same file present in both original fs tree "
+ "and inside snapshots.";
private final UserGroupInformation ugi;
private final PrintStream out;
private final URLConnectionFactory connectionFactory;
private final boolean isSpnegoEnabled;
/**
* Filesystem checker.
* @param conf current Configuration
*/
public DFSck(Configuration conf) throws IOException {
this(conf, System.out);
}
public DFSck(Configuration conf, PrintStream out) throws IOException {
super(conf);
this.ugi = UserGroupInformation.getCurrentUser();
this.out = out;
this.connectionFactory = URLConnectionFactory
.newDefaultURLConnectionFactory(conf);
this.isSpnegoEnabled = UserGroupInformation.isSecurityEnabled();
}
/**
* Print fsck usage information
*/
static void printUsage(PrintStream out) {
out.println(USAGE + "\n");
ToolRunner.printGenericCommandUsage(out);
}
@Override
public int run(final String[] args) throws IOException {
if (args.length == 0) {
printUsage(System.err);
return -1;
}
try {
return UserGroupInformation.getCurrentUser().doAs(
new PrivilegedExceptionAction<Integer>() {
@Override
public Integer run() throws Exception {
return doWork(args);
}
});
} catch (InterruptedException e) {
throw new IOException(e);
}
}
/*
* To get the list, we need to call iteratively until the server says
* there is no more left.
*/
private Integer listCorruptFileBlocks(String dir, String baseUrl)
throws IOException {
int errCode = -1;
int numCorrupt = 0;
int cookie = 0;
final String noCorruptLine = "has no CORRUPT files";
final String noMoreCorruptLine = "has no more CORRUPT files";
final String cookiePrefix = "Cookie:";
boolean allDone = false;
while (!allDone) {
final StringBuffer url = new StringBuffer(baseUrl);
if (cookie > 0) {
url.append("&startblockafter=").append(String.valueOf(cookie));
}
URL path = new URL(url.toString());
URLConnection connection;
try {
connection = connectionFactory.openConnection(path, isSpnegoEnabled);
} catch (AuthenticationException e) {
throw new IOException(e);
}
InputStream stream = connection.getInputStream();
BufferedReader input = new BufferedReader(new InputStreamReader(
stream, "UTF-8"));
try {
String line = null;
while ((line = input.readLine()) != null) {
if (line.startsWith(cookiePrefix)){
try{
cookie = Integer.parseInt(line.split("\t")[1]);
} catch (Exception e){
allDone = true;
break;
}
continue;
}
if ((line.endsWith(noCorruptLine)) ||
(line.endsWith(noMoreCorruptLine)) ||
(line.endsWith(NamenodeFsck.NONEXISTENT_STATUS))) {
allDone = true;
break;
}
if ((line.isEmpty())
|| (line.startsWith("FSCK started by"))
|| (line.startsWith("The filesystem under path")))
continue;
numCorrupt++;
if (numCorrupt == 1) {
out.println("The list of corrupt files under path '"
+ dir + "' are:");
}
out.println(line);
}
} finally {
input.close();
}
}
out.println("The filesystem under path '" + dir + "' has "
+ numCorrupt + " CORRUPT files");
if (numCorrupt == 0)
errCode = 0;
return errCode;
}
private Path getResolvedPath(String dir) throws IOException {
Configuration conf = getConf();
Path dirPath = new Path(dir);
FileSystem fs = dirPath.getFileSystem(conf);
return fs.resolvePath(dirPath);
}
/**
* Derive the namenode http address from the current file system,
* either default or as set by "-fs" in the generic options.
* @return Returns http address or null if failure.
* @throws IOException if we can't determine the active NN address
*/
private URI getCurrentNamenodeAddress(Path target) throws IOException {
//String nnAddress = null;
Configuration conf = getConf();
//get the filesystem object to verify it is an HDFS system
final FileSystem fs = target.getFileSystem(conf);
if (!(fs instanceof DistributedFileSystem)) {
System.err.println("FileSystem is " + fs.getUri());
return null;
}
return DFSUtil.getInfoServer(HAUtil.getAddressOfActive(fs), conf,
DFSUtil.getHttpClientScheme(conf));
}
private int doWork(final String[] args) throws IOException {
final StringBuilder url = new StringBuilder();
url.append("/fsck?ugi=").append(ugi.getShortUserName());
String dir = null;
boolean doListCorruptFileBlocks = false;
for (int idx = 0; idx < args.length; idx++) {
if (args[idx].equals("-move")) { url.append("&move=1"); }
else if (args[idx].equals("-delete")) { url.append("&delete=1"); }
else if (args[idx].equals("-files")) { url.append("&files=1"); }
else if (args[idx].equals("-openforwrite")) { url.append("&openforwrite=1"); }
else if (args[idx].equals("-blocks")) { url.append("&blocks=1"); }
else if (args[idx].equals("-locations")) { url.append("&locations=1"); }
else if (args[idx].equals("-racks")) { url.append("&racks=1"); }
else if (args[idx].equals("-replicaDetails")) {
url.append("&replicadetails=1");
} else if (args[idx].equals("-upgradedomains")) {
url.append("&upgradedomains=1");
} else if (args[idx].equals("-storagepolicies")) {
url.append("&storagepolicies=1");
} else if (args[idx].equals("-list-corruptfileblocks")) {
url.append("&listcorruptfileblocks=1");
doListCorruptFileBlocks = true;
} else if (args[idx].equals("-includeSnapshots")) {
url.append("&includeSnapshots=1");
} else if (args[idx].equals("-maintenance")) {
url.append("&maintenance=1");
} else if (args[idx].equals("-blockId")) {
StringBuilder sb = new StringBuilder();
idx++;
while(idx < args.length && !args[idx].startsWith("-")){
sb.append(args[idx]);
sb.append(" ");
idx++;
}
url.append("&blockId=").append(URLEncoder.encode(sb.toString(), "UTF-8"));
} else if (!args[idx].startsWith("-")) {
if (null == dir) {
dir = args[idx];
} else {
System.err.println("fsck: can only operate on one path at a time '"
+ args[idx] + "'");
printUsage(System.err);
return -1;
}
} else {
System.err.println("fsck: Illegal option '" + args[idx] + "'");
printUsage(System.err);
return -1;
}
}
if (null == dir) {
dir = "/";
}
Path dirpath = null;
URI namenodeAddress = null;
try {
dirpath = getResolvedPath(dir);
namenodeAddress = getCurrentNamenodeAddress(dirpath);
} catch (IOException ioe) {
System.err.println("FileSystem is inaccessible due to:\n"
+ ioe.toString());
}
if (namenodeAddress == null) {
//Error message already output in {@link #getCurrentNamenodeAddress()}
System.err.println("DFSck exiting.");
return 0;
}
url.insert(0, namenodeAddress.toString());
url.append("&path=").append(URLEncoder.encode(
Path.getPathWithoutSchemeAndAuthority(dirpath).toString(), "UTF-8"));
System.err.println("Connecting to namenode via " + url.toString());
if (doListCorruptFileBlocks) {
return listCorruptFileBlocks(dir, url.toString());
}
URL path = new URL(url.toString());
URLConnection connection;
try {
connection = connectionFactory.openConnection(path, isSpnegoEnabled);
} catch (AuthenticationException e) {
throw new IOException(e);
}
InputStream stream = connection.getInputStream();
BufferedReader input = new BufferedReader(new InputStreamReader(
stream, "UTF-8"));
String line = null;
String lastLine = NamenodeFsck.CORRUPT_STATUS;
int errCode = -1;
try {
while ((line = input.readLine()) != null) {
out.println(line);
lastLine = line;
}
} finally {
input.close();
}
if (lastLine.endsWith(NamenodeFsck.HEALTHY_STATUS)) {
errCode = 0;
} else if (lastLine.endsWith(NamenodeFsck.CORRUPT_STATUS)) {
errCode = 1;
} else if (lastLine.endsWith(NamenodeFsck.NONEXISTENT_STATUS)) {
errCode = 0;
} else if (lastLine.contains("Incorrect blockId format:")) {
errCode = 0;
} else if (lastLine.endsWith(NamenodeFsck.DECOMMISSIONED_STATUS)) {
errCode = 2;
} else if (lastLine.endsWith(NamenodeFsck.DECOMMISSIONING_STATUS)) {
errCode = 3;
} else if (lastLine.endsWith(NamenodeFsck.IN_MAINTENANCE_STATUS)) {
errCode = 4;
} else if (lastLine.endsWith(NamenodeFsck.ENTERING_MAINTENANCE_STATUS)) {
errCode = 5;
}
return errCode;
}
public static void main(String[] args) throws Exception {
// -files option is also used by GenericOptionsParser
// Make sure that is not the first argument for fsck
int res = -1;
if ((args.length == 0) || ("-files".equals(args[0]))) {
printUsage(System.err);
} else if (DFSUtil.parseHelpArgument(args, USAGE, System.out, true)) {
res = 0;
} else {
res = ToolRunner.run(new DFSck(new HdfsConfiguration()), args);
}
System.exit(res);
}
}