blob: 965d053c053b985acbd68626e4357bf4c2993af0 [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs.tools.offlineImageViewer;
import java.io.IOException;
import java.util.LinkedList;
/**
* File size distribution visitor.
*
* <h3>Description.</h3>
* This is the tool for analyzing file sizes in the namespace image.
* In order to run the tool one should define a range of integers
* <tt>[0, maxSize]</tt> by specifying <tt>maxSize</tt> and a <tt>step</tt>.
* The range of integers is divided into segments of size <tt>step</tt>:
* <tt>[0, s<sub>1</sub>, ..., s<sub>n-1</sub>, maxSize]</tt>,
* and the visitor calculates how many files in the system fall into
* each segment <tt>[s<sub>i-1</sub>, s<sub>i</sub>)</tt>.
* Note that files larger than <tt>maxSize</tt> always fall into
* the very last segment.
*
* <h3>Input.</h3>
* <ul>
* <li><tt>filename</tt> specifies the location of the image file;</li>
* <li><tt>maxSize</tt> determines the range <tt>[0, maxSize]</tt> of files
* sizes considered by the visitor;</li>
* <li><tt>step</tt> the range is divided into segments of size step.</li>
* </ul>
*
* <h3>Output.</h3>
* The output file is formatted as a tab separated two column table:
* Size and NumFiles. Where Size represents the start of the segment,
* and numFiles is the number of files form the image which size falls in
* this segment.
*/
class FileDistributionVisitor extends TextWriterImageVisitor {
final private LinkedList<ImageElement> elemS = new LinkedList<ImageElement>();
private final static long MAX_SIZE_DEFAULT = 0x2000000000L; // 1/8 TB = 2^37
private final static int INTERVAL_DEFAULT = 0x200000; // 2 MB = 2^21
private int[] distribution;
private long maxSize;
private int step;
private int totalFiles;
private int totalDirectories;
private int totalBlocks;
private long totalSpace;
private long maxFileSize;
private FileContext current;
private boolean inInode = false;
/**
* File or directory information.
*/
private static class FileContext {
String path;
long fileSize;
int numBlocks;
int replication;
}
public FileDistributionVisitor(String filename,
long maxSize,
int step) throws IOException {
super(filename, false);
this.maxSize = (maxSize == 0 ? MAX_SIZE_DEFAULT : maxSize);
this.step = (step == 0 ? INTERVAL_DEFAULT : step);
long numIntervals = this.maxSize / this.step;
if(numIntervals >= Integer.MAX_VALUE)
throw new IOException("Too many distribution intervals " + numIntervals);
this.distribution = new int[1 + (int)(numIntervals)];
this.totalFiles = 0;
this.totalDirectories = 0;
this.totalBlocks = 0;
this.totalSpace = 0;
this.maxFileSize = 0;
}
@Override
void start() throws IOException {}
@Override
void finish() throws IOException {
// write the distribution into the output file
write("Size\tNumFiles\n");
for(int i = 0; i < distribution.length; i++)
write(((long)i * step) + "\t" + distribution[i] + "\n");
System.out.println("totalFiles = " + totalFiles);
System.out.println("totalDirectories = " + totalDirectories);
System.out.println("totalBlocks = " + totalBlocks);
System.out.println("totalSpace = " + totalSpace);
System.out.println("maxFileSize = " + maxFileSize);
super.finish();
}
@Override
void leaveEnclosingElement() throws IOException {
ImageElement elem = elemS.pop();
if(elem != ImageElement.INODE &&
elem != ImageElement.INODE_UNDER_CONSTRUCTION)
return;
inInode = false;
if(current.numBlocks < 0) {
totalDirectories ++;
return;
}
totalFiles++;
totalBlocks += current.numBlocks;
totalSpace += current.fileSize * current.replication;
if(maxFileSize < current.fileSize)
maxFileSize = current.fileSize;
int high;
if(current.fileSize > maxSize)
high = distribution.length-1;
else
high = (int)Math.ceil((double)current.fileSize / step);
distribution[high]++;
if(totalFiles % 1000000 == 1)
System.out.println("Files processed: " + totalFiles
+ " Current: " + current.path);
}
@Override
void visit(ImageElement element, String value) throws IOException {
if(inInode) {
switch(element) {
case INODE_PATH:
current.path = (value.equals("") ? "/" : value);
break;
case REPLICATION:
current.replication = Integer.valueOf(value);
break;
case NUM_BYTES:
current.fileSize += Long.valueOf(value);
break;
default:
break;
}
}
}
@Override
void visitEnclosingElement(ImageElement element) throws IOException {
elemS.push(element);
if(element == ImageElement.INODE ||
element == ImageElement.INODE_UNDER_CONSTRUCTION) {
current = new FileContext();
inInode = true;
}
}
@Override
void visitEnclosingElement(ImageElement element,
ImageElement key, String value) throws IOException {
elemS.push(element);
if(element == ImageElement.INODE ||
element == ImageElement.INODE_UNDER_CONSTRUCTION)
inInode = true;
else if(element == ImageElement.BLOCKS)
current.numBlocks = Integer.parseInt(value);
}
}