blob: 9433066b6895176edb65c9ba69ba42f341223daf [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nutch.segment;
import java.io.IOException;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
/**
* Utility class for handling information about segment parts.
*
* @author Andrzej Bialecki
*/
public class SegmentPart {
/** Name of the segment (just the last path component). */
public String segmentName;
/** Name of the segment part (ie. one of subdirectories inside a segment). */
public String partName;
public SegmentPart() {
}
public SegmentPart(String segmentName, String partName) {
this.segmentName = segmentName;
this.partName = partName;
}
/**
* Return a String representation of this class, in the form
* "segmentName/partName".
*/
public String toString() {
return segmentName + "/" + partName;
}
/**
* Create SegmentPart from a FileSplit.
*
* @param split
* @return A {@link SegmentPart} resultant from a {@link FileSplit}.
* @throws IOException
*/
public static SegmentPart get(FileSplit split) throws IOException {
return get(split.getPath().toString());
}
/**
* Create SegmentPart from a full path of a location inside any segment part.
*
* @param path
* full path into a segment part (may include "part-xxxxx"
* components)
* @return SegmentPart instance describing this part.
* @throws IOException
* if any required path components are missing.
*/
public static SegmentPart get(String path) throws IOException {
// find part name
String dir = path.replace('\\', '/');
int idx = dir.lastIndexOf("/part-");
if (idx == -1) {
throw new IOException("Cannot determine segment part: " + dir);
}
dir = dir.substring(0, idx);
idx = dir.lastIndexOf('/');
if (idx == -1) {
throw new IOException("Cannot determine segment part: " + dir);
}
String part = dir.substring(idx + 1);
// find segment name
dir = dir.substring(0, idx);
idx = dir.lastIndexOf('/');
if (idx == -1) {
throw new IOException("Cannot determine segment name: " + dir);
}
String segment = dir.substring(idx + 1);
return new SegmentPart(segment, part);
}
/**
* Create SegmentPart from a String in format "segmentName/partName".
*
* @param string
* input String
* @return parsed instance of SegmentPart
* @throws IOException
* if "/" is missing.
*/
public static SegmentPart parse(String string) throws IOException {
int idx = string.indexOf('/');
if (idx == -1) {
throw new IOException("Invalid SegmentPart: '" + string + "'");
}
String segment = string.substring(0, idx);
String part = string.substring(idx + 1);
return new SegmentPart(segment, part);
}
}