blob: 7dcc29998f3359c927606d03ff9e5664199563c4 [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs.tools.offlineImageViewer;
import java.io.IOException;
import java.util.LinkedList;
import org.apache.hadoop.util.StringUtils;
/**
* File size distribution visitor.
*
* <h3>Description.</h3>
* This is the tool for analyzing file sizes in the namespace image.
* In order to run the tool one should define a range of integers
* <tt>[0, maxSize]</tt> by specifying <tt>maxSize</tt> and a <tt>step</tt>.
* The range of integers is divided into segments of size <tt>step</tt>:
* <tt>[0, s<sub>1</sub>, ..., s<sub>n-1</sub>, maxSize]</tt>,
* and the visitor calculates how many files in the system fall into
* each segment <tt>[s<sub>i-1</sub>, s<sub>i</sub>)</tt>.
* Note that files larger than <tt>maxSize</tt> always fall into
* the very last segment.
*
* <h3>Input.</h3>
* <ul>
* <li><tt>filename</tt> specifies the location of the image file;</li>
* <li><tt>maxSize</tt> determines the range <tt>[0, maxSize]</tt> of files
* sizes considered by the visitor;</li>
* <li><tt>step</tt> the range is divided into segments of size step.</li>
* </ul>
*
* <h3>Output.</h3>
* The output file is formatted as a tab separated two column table:
* Size and NumFiles. Where Size represents the start of the segment,
* and numFiles is the number of files form the image which size falls in
* this segment.
*/
class FileDistributionVisitor extends TextWriterImageVisitor {
final private LinkedList<ImageElement> elemS = new LinkedList<ImageElement>();
private final static long MAX_SIZE_DEFAULT = 0x2000000000L; // 1/8 TB = 2^37
private final static int INTERVAL_DEFAULT = 0x200000; // 2 MB = 2^21
private int[] distribution;
private long maxSize;
private int step;
private int totalFiles;
private int totalDirectories;
private int totalBlocks;
private long totalSpace;
private long maxFileSize;
private FileContext current;
private boolean inInode = false;
private boolean formatOutput = false;
/**
* File or directory information.
*/
private static class FileContext {
String path;
long fileSize;
int numBlocks;
int replication;
}
public FileDistributionVisitor(String filename, long maxSize, int step,
boolean formatOutput) throws IOException {
super(filename, false);
this.maxSize = (maxSize == 0 ? MAX_SIZE_DEFAULT : maxSize);
this.step = (step == 0 ? INTERVAL_DEFAULT : step);
this.formatOutput = formatOutput;
long numIntervals = this.maxSize / this.step;
if(numIntervals >= Integer.MAX_VALUE)
throw new IOException("Too many distribution intervals " + numIntervals);
this.distribution = new int[1 + (int)(numIntervals)];
this.totalFiles = 0;
this.totalDirectories = 0;
this.totalBlocks = 0;
this.totalSpace = 0;
this.maxFileSize = 0;
}
@Override
void start() throws IOException {}
@Override
void finish() throws IOException {
output();
super.finish();
}
@Override
void finishAbnormally() throws IOException {
System.out.println("*** Image processing finished abnormally. Ending ***");
output();
super.finishAbnormally();
}
private void output() throws IOException {
// write the distribution into the output file
write((formatOutput ? "Size Range" : "Size") + "\tNumFiles\n");
for (int i = 0; i < distribution.length; i++) {
if (distribution[i] > 0) {
if (formatOutput) {
write((i == 0 ? "[" : "(")
+ StringUtils.byteDesc(((long) (i == 0 ? 0 : i - 1) * step))
+ ", "
+ StringUtils.byteDesc((long)
(i == distribution.length - 1 ? maxFileSize : i * step))
+ "]\t"
+ distribution[i] + "\n");
} else {
write(((long) i * step) + "\t" + distribution[i] + "\n");
}
}
}
System.out.println("totalFiles = " + totalFiles);
System.out.println("totalDirectories = " + totalDirectories);
System.out.println("totalBlocks = " + totalBlocks);
System.out.println("totalSpace = " + totalSpace);
System.out.println("maxFileSize = " + maxFileSize);
}
@Override
void leaveEnclosingElement() throws IOException {
ImageElement elem = elemS.pop();
if(elem != ImageElement.INODE &&
elem != ImageElement.INODE_UNDER_CONSTRUCTION)
return;
inInode = false;
if(current.numBlocks < 0) {
totalDirectories ++;
return;
}
totalFiles++;
totalBlocks += current.numBlocks;
totalSpace += current.fileSize * current.replication;
if(maxFileSize < current.fileSize)
maxFileSize = current.fileSize;
int high;
if(current.fileSize > maxSize)
high = distribution.length-1;
else
high = (int)Math.ceil((double)current.fileSize / step);
if (high >= distribution.length) {
high = distribution.length - 1;
}
distribution[high]++;
if(totalFiles % 1000000 == 1)
System.out.println("Files processed: " + totalFiles
+ " Current: " + current.path);
}
@Override
void visit(ImageElement element, String value) throws IOException {
if(inInode) {
switch(element) {
case INODE_PATH:
current.path = (value.equals("") ? "/" : value);
break;
case REPLICATION:
current.replication = Integer.parseInt(value);
break;
case NUM_BYTES:
current.fileSize += Long.parseLong(value);
break;
default:
break;
}
}
}
@Override
void visitEnclosingElement(ImageElement element) throws IOException {
elemS.push(element);
if(element == ImageElement.INODE ||
element == ImageElement.INODE_UNDER_CONSTRUCTION) {
current = new FileContext();
inInode = true;
}
}
@Override
void visitEnclosingElement(ImageElement element,
ImageElement key, String value) throws IOException {
elemS.push(element);
if(element == ImageElement.INODE ||
element == ImageElement.INODE_UNDER_CONSTRUCTION)
inInode = true;
else if(element == ImageElement.BLOCKS)
current.numBlocks = Integer.parseInt(value);
}
}