blob: 1414d153c2d95bbd38107018faafc0a28458b99e [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nutch.util;
import org.apache.commons.lang.StringUtils;
import java.net.MalformedURLException;
import java.net.URL;
import java.nio.ByteBuffer;
public class TableUtil {
public static final ByteBuffer YES_VAL = ByteBuffer.wrap(new byte[] { 'y' });
/**
* Reverses a url's domain. This form is better for storing in hbase. Because
* scans within the same domain are faster.
* <p>
* E.g. "http://bar.foo.com:8983/to/index.html?a=b" becomes
* "com.foo.bar:8983:http/to/index.html?a=b".
*
* @param urlString
* url to be reversed
* @return Reversed url
* @throws MalformedURLException
*/
public static String reverseUrl(String urlString)
throws MalformedURLException {
return reverseUrl(new URL(urlString));
}
/**
* Reverses a url's domain. This form is better for storing in hbase. Because
* scans within the same domain are faster.
* <p>
* E.g. "http://bar.foo.com:8983/to/index.html?a=b" becomes
* "com.foo.bar:http:8983/to/index.html?a=b".
*
* @param url
* url to be reversed
* @return Reversed url
*/
public static String reverseUrl(URL url) {
String host = url.getHost();
String file = url.getFile();
String protocol = url.getProtocol();
int port = url.getPort();
StringBuilder buf = new StringBuilder();
/* reverse host */
reverseAppendSplits(host, buf);
/* add protocol */
buf.append(':');
buf.append(protocol);
/* add port if necessary */
if (port != -1) {
buf.append(':');
buf.append(port);
}
/* add path */
if (file.length() > 0 && '/' != file.charAt(0)) {
buf.append('/');
}
buf.append(file);
return buf.toString();
}
public static String unreverseUrl(String reversedUrl) {
StringBuilder buf = new StringBuilder(reversedUrl.length() + 2);
int pathBegin = reversedUrl.indexOf('/');
if (pathBegin == -1)
pathBegin = reversedUrl.length();
String sub = reversedUrl.substring(0, pathBegin);
String[] splits = StringUtils.splitPreserveAllTokens(sub, ':'); // {<reversed
// host>,
// <port>,
// <protocol>}
buf.append(splits[1]); // add protocol
buf.append("://");
reverseAppendSplits(splits[0], buf); // splits[0] is reversed
// host
if (splits.length == 3) { // has a port
buf.append(':');
buf.append(splits[2]);
}
buf.append(reversedUrl.substring(pathBegin));
return buf.toString();
}
/**
* Given a reversed url, returns the reversed host E.g
* "com.foo.bar:http:8983/to/index.html?a=b" -&gt; "com.foo.bar"
*
* @param reversedUrl
* Reversed url
* @return Reversed host
*/
public static String getReversedHost(String reversedUrl) {
return reversedUrl.substring(0, reversedUrl.indexOf(':'));
}
private static void reverseAppendSplits(String string, StringBuilder buf) {
String[] splits = StringUtils.split(string, '.');
if (splits.length > 0) {
for (int i = splits.length - 1; i > 0; i--) {
buf.append(splits[i]);
buf.append('.');
}
buf.append(splits[0]);
} else {
buf.append(string);
}
}
public static String reverseHost(String hostName) {
StringBuilder buf = new StringBuilder();
reverseAppendSplits(hostName, buf);
return buf.toString();
}
public static String unreverseHost(String reversedHostName) {
return reverseHost(reversedHostName); // Reversible
}
/**
* Convert given Utf8 instance to String and and cleans out any offending "�"
* from the String.
*
*
* @param utf8
* Utf8 object
* @return string-ifed Utf8 object or null if Utf8 instance is null
*/
public static String toString(CharSequence utf8) {
return (utf8 == null ? null : StringUtil.cleanField(utf8.toString()));
}
}