| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.nutch.util; |
| |
| import org.apache.commons.lang.StringUtils; |
| |
| import java.net.MalformedURLException; |
| import java.net.URL; |
| import java.nio.ByteBuffer; |
| |
| public class TableUtil { |
| |
| public static final ByteBuffer YES_VAL = ByteBuffer.wrap(new byte[] { 'y' }); |
| |
| /** |
| * Reverses a url's domain. This form is better for storing in hbase. Because |
| * scans within the same domain are faster. |
| * <p> |
| * E.g. "http://bar.foo.com:8983/to/index.html?a=b" becomes |
| * "com.foo.bar:8983:http/to/index.html?a=b". |
| * |
| * @param urlString |
| * url to be reversed |
| * @return Reversed url |
| * @throws MalformedURLException if the input urlString is malformed |
| */ |
| public static String reverseUrl(String urlString) |
| throws MalformedURLException { |
| return reverseUrl(new URL(urlString)); |
| } |
| |
| /** |
| * Reverses a url's domain. This form is better for storing in hbase. Because |
| * scans within the same domain are faster. |
| * <p> |
| * E.g. "http://bar.foo.com:8983/to/index.html?a=b" becomes |
| * "com.foo.bar:http:8983/to/index.html?a=b". |
| * |
| * @param url |
| * url to be reversed |
| * @return Reversed url |
| */ |
| public static String reverseUrl(URL url) { |
| String host = url.getHost(); |
| String file = url.getFile(); |
| String protocol = url.getProtocol(); |
| int port = url.getPort(); |
| |
| StringBuilder buf = new StringBuilder(); |
| |
| /* reverse host */ |
| reverseAppendSplits(host, buf); |
| |
| /* add protocol */ |
| buf.append(':'); |
| buf.append(protocol); |
| |
| /* add port if necessary */ |
| if (port != -1) { |
| buf.append(':'); |
| buf.append(port); |
| } |
| |
| /* add path */ |
| if (file.length() > 0 && '/' != file.charAt(0)) { |
| buf.append('/'); |
| } |
| buf.append(file); |
| |
| return buf.toString(); |
| } |
| |
| public static String unreverseUrl(String reversedUrl) { |
| StringBuilder buf = new StringBuilder(reversedUrl.length() + 2); |
| |
| int pathBegin = reversedUrl.indexOf('/'); |
| if (pathBegin == -1) |
| pathBegin = reversedUrl.length(); |
| String sub = reversedUrl.substring(0, pathBegin); |
| |
| String[] splits = StringUtils.splitPreserveAllTokens(sub, ':'); // {<reversed |
| // host>, |
| // <port>, |
| // <protocol>} |
| |
| buf.append(splits[1]); // add protocol |
| buf.append("://"); |
| reverseAppendSplits(splits[0], buf); // splits[0] is reversed |
| // host |
| if (splits.length == 3) { // has a port |
| buf.append(':'); |
| buf.append(splits[2]); |
| } |
| buf.append(reversedUrl.substring(pathBegin)); |
| return buf.toString(); |
| } |
| |
| /** |
| * Given a reversed url, returns the reversed host E.g |
| * "com.foo.bar:http:8983/to/index.html?a=b" -> "com.foo.bar" |
| * |
| * @param reversedUrl |
| * Reversed url |
| * @return Reversed host |
| */ |
| public static String getReversedHost(String reversedUrl) { |
| return reversedUrl.substring(0, reversedUrl.indexOf(':')); |
| } |
| |
| private static void reverseAppendSplits(String string, StringBuilder buf) { |
| String[] splits = StringUtils.split(string, '.'); |
| if (splits.length > 0) { |
| for (int i = splits.length - 1; i > 0; i--) { |
| buf.append(splits[i]); |
| buf.append('.'); |
| } |
| buf.append(splits[0]); |
| } else { |
| buf.append(string); |
| } |
| } |
| |
| public static String reverseHost(String hostName) { |
| StringBuilder buf = new StringBuilder(); |
| reverseAppendSplits(hostName, buf); |
| return buf.toString(); |
| |
| } |
| |
| public static String unreverseHost(String reversedHostName) { |
| return reverseHost(reversedHostName); // Reversible |
| } |
| |
| /** |
| * Convert given Utf8 instance to String and and cleans out any offending "�" |
| * from the String. |
| * |
| * |
| * @param utf8 |
| * Utf8 object |
| * @return string-ifed Utf8 object or null if Utf8 instance is null |
| */ |
| public static String toString(CharSequence utf8) { |
| return (utf8 == null ? null : StringUtil.cleanField(utf8.toString())); |
| } |
| |
| } |