blob: 64ac4a52562752b5f3e5fe6370fbed37915d234e [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.accumulo.examples.dirlist;
import java.util.Map;
import java.util.Map.Entry;
import java.util.TreeMap;
import org.apache.accumulo.core.client.AccumuloClient;
import org.apache.accumulo.core.client.IteratorSetting;
import org.apache.accumulo.core.client.Scanner;
import org.apache.accumulo.core.client.TableNotFoundException;
import org.apache.accumulo.core.data.Key;
import org.apache.accumulo.core.data.Range;
import org.apache.accumulo.core.data.Value;
import org.apache.accumulo.core.iterators.user.RegExFilter;
import org.apache.accumulo.core.security.Authorizations;
import org.apache.accumulo.examples.cli.ClientOnRequiredTable;
import org.apache.hadoop.io.Text;
import com.beust.jcommander.Parameter;
/**
* Provides utility methods for getting the info for a file, listing the contents of a directory,
* and performing single wild card searches on file or directory names.
*/
public class QueryUtil {
private final AccumuloClient client;
private final String tableName;
private final Authorizations auths;
public static final Text DIR_COLF = new Text("dir");
public static final Text FORWARD_PREFIX = new Text("f");
public static final Text REVERSE_PREFIX = new Text("r");
public static final Text INDEX_COLF = new Text("i");
public static final Text COUNTS_COLQ = new Text("counts");
public QueryUtil(AccumuloClient client, Opts opts) {
this.client = client;
this.tableName = opts.getTableName();
this.auths = opts.auths;
}
/**
* Calculates the depth of a path, i.e. the number of forward slashes in the path name.
*
* @param path
* the full path of a file or directory
* @return the depth of the path
*/
public static int getDepth(String path) {
int numSlashes = 0;
int index = -1;
while ((index = path.indexOf("/", index + 1)) >= 0)
numSlashes++;
return numSlashes;
}
/**
* Given a path, construct an accumulo row prepended with the path's depth for the directory
* table.
*
* @param path
* the full path of a file or directory
* @return the accumulo row associated with this path
*/
public static Text getRow(String path) {
Text row = new Text(String.format("%03d", getDepth(path)));
row.append(path.getBytes(), 0, path.length());
return row;
}
/**
* Given a path, construct an accumulo row prepended with the {@link #FORWARD_PREFIX} for the
* index table.
*
* @param path
* the full path of a file or directory
* @return the accumulo row associated with this path
*/
public static Text getForwardIndex(String path) {
String part = path.substring(path.lastIndexOf("/") + 1);
if (part.length() == 0)
return null;
Text row = new Text(FORWARD_PREFIX);
row.append(part.getBytes(), 0, part.length());
return row;
}
/**
* Given a path, construct an accumulo row prepended with the {@link #REVERSE_PREFIX} with the
* path reversed for the index table.
*
* @param path
* the full path of a file or directory
* @return the accumulo row associated with this path
*/
public static Text getReverseIndex(String path) {
String part = path.substring(path.lastIndexOf("/") + 1);
if (part.length() == 0)
return null;
byte[] rev = new byte[part.length()];
int i = part.length() - 1;
for (byte b : part.getBytes())
rev[i--] = b;
Text row = new Text(REVERSE_PREFIX);
row.append(rev, 0, rev.length);
return row;
}
/**
* Returns either the {@link #DIR_COLF} or a decoded string version of the colf.
*
* @param colf
* the column family
*/
public static String getType(Text colf) {
if (colf.equals(DIR_COLF))
return colf + ":";
return Ingest.encoder.decode(colf.getBytes()) + ":";
}
/**
* Scans over the directory table and pulls out stat information about a path.
*
* @param path
* the full path of a file or directory
*/
public Map<String,String> getData(String path) throws TableNotFoundException {
if (path.endsWith("/"))
path = path.substring(0, path.length() - 1);
Map<String,String> data = new TreeMap<>();
try (Scanner scanner = client.createScanner(tableName, auths)) {
scanner.setRange(new Range(getRow(path)));
for (Entry<Key,Value> e : scanner) {
String type = getType(e.getKey().getColumnFamily());
data.put("fullname", e.getKey().getRow().toString().substring(3));
data.put(type + e.getKey().getColumnQualifier().toString() + ":"
+ e.getKey().getColumnVisibility().toString(), new String(e.getValue().get()));
}
}
return data;
}
/**
* Uses the directory table to list the contents of a directory.
*
* @param path
* the full path of a directory
*/
public Map<String,Map<String,String>> getDirList(String path) throws TableNotFoundException {
if (!path.endsWith("/"))
path = path + "/";
Map<String,Map<String,String>> fim = new TreeMap<>();
try (Scanner scanner = client.createScanner(tableName, auths)) {
scanner.setRange(Range.prefix(getRow(path)));
for (Entry<Key,Value> e : scanner) {
String name = e.getKey().getRow().toString();
name = name.substring(name.lastIndexOf("/") + 1);
String type = getType(e.getKey().getColumnFamily());
if (!fim.containsKey(name)) {
fim.put(name, new TreeMap<>());
fim.get(name).put("fullname", e.getKey().getRow().toString().substring(3));
}
fim.get(name).put(type + e.getKey().getColumnQualifier().toString() + ":"
+ e.getKey().getColumnVisibility().toString(), new String(e.getValue().get()));
}
}
return fim;
}
/**
* Scans over the index table for files or directories with a given name.
*
* @param term
* the name a file or directory to search for
*/
public Iterable<Entry<Key,Value>> exactTermSearch(String term) throws Exception {
System.out.println("executing exactTermSearch for " + term);
Scanner scanner = client.createScanner(tableName, auths);
scanner.setRange(new Range(getForwardIndex(term)));
return scanner;
}
/**
* Scans over the index table for files or directories with a given name, prefix, or suffix
* (indicated by a wildcard '*' at the beginning or end of the term.
*
* @param exp
* the name a file or directory to search for with an optional wildcard '*' at the
* beginning or end
*/
public Iterable<Entry<Key,Value>> singleRestrictedWildCardSearch(String exp) throws Exception {
if (exp.contains("/"))
throw new Exception("this method only works with unqualified names");
Scanner scanner = client.createScanner(tableName, auths);
if (exp.startsWith("*")) {
System.out.println("executing beginning wildcard search for " + exp);
exp = exp.substring(1);
scanner.setRange(Range.prefix(getReverseIndex(exp)));
} else if (exp.endsWith("*")) {
System.out.println("executing ending wildcard search for " + exp);
exp = exp.substring(0, exp.length() - 1);
scanner.setRange(Range.prefix(getForwardIndex(exp)));
} else if (exp.contains("*")) {
throw new Exception("this method only works for beginning or ending wild cards");
} else {
return exactTermSearch(exp);
}
return scanner;
}
/**
* Scans over the index table for files or directories with a given name that can contain a single
* wildcard '*' anywhere in the term.
*
* @param exp
* the name a file or directory to search for with one optional wildcard '*'
*/
public Iterable<Entry<Key,Value>> singleWildCardSearch(String exp) throws Exception {
int starIndex = exp.indexOf("*");
if (exp.indexOf("*", starIndex + 1) >= 0)
throw new Exception("only one wild card for search");
if (starIndex < 0) {
return exactTermSearch(exp);
} else if (starIndex == 0 || starIndex == exp.length() - 1) {
return singleRestrictedWildCardSearch(exp);
}
String firstPart = exp.substring(0, starIndex);
String lastPart = exp.substring(starIndex + 1);
String regexString = ".*/" + exp.replace("*", "[^/]*");
Scanner scanner = client.createScanner(tableName, auths);
if (firstPart.length() >= lastPart.length()) {
System.out.println("executing middle wildcard search for " + regexString
+ " from entries starting with " + firstPart);
scanner.setRange(Range.prefix(getForwardIndex(firstPart)));
} else {
System.out.println("executing middle wildcard search for " + regexString
+ " from entries ending with " + lastPart);
scanner.setRange(Range.prefix(getReverseIndex(lastPart)));
}
IteratorSetting regex = new IteratorSetting(50, "regex", RegExFilter.class);
RegExFilter.setRegexs(regex, null, null, regexString, null, false);
scanner.addScanIterator(regex);
return scanner;
}
public static class Opts extends ClientOnRequiredTable {
@Parameter(names = "--path", description = "the directory to list")
String path = "/";
@Parameter(names = "--search", description = "find a file or directory with the given name")
boolean search = false;
}
/**
* Lists the contents of a directory using the directory table, or searches for file or directory
* names (if the -search flag is included).
*/
public static void main(String[] args) throws Exception {
Opts opts = new Opts();
opts.parseArgs(QueryUtil.class.getName(), args);
try (AccumuloClient client = opts.createAccumuloClient()) {
QueryUtil q = new QueryUtil(client, opts);
if (opts.search) {
for (Entry<Key,Value> e : q.singleWildCardSearch(opts.path)) {
System.out.println(e.getKey().getColumnQualifier());
}
} else {
for (Entry<String,Map<String,String>> e : q.getDirList(opts.path).entrySet()) {
System.out.println(e);
}
}
}
}
}