| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.accumulo.core.client; |
| |
| import java.util.Iterator; |
| import java.util.Map; |
| import java.util.Map.Entry; |
| import java.util.concurrent.TimeUnit; |
| |
| import org.apache.accumulo.core.client.IteratorSetting.Column; |
| import org.apache.accumulo.core.client.sample.SamplerConfiguration; |
| import org.apache.accumulo.core.data.Key; |
| import org.apache.accumulo.core.data.Value; |
| import org.apache.accumulo.core.security.Authorizations; |
| import org.apache.accumulo.core.spi.scan.HintScanPrioritizer; |
| import org.apache.accumulo.core.spi.scan.ScanDispatcher; |
| import org.apache.accumulo.core.spi.scan.ScanInfo; |
| import org.apache.accumulo.core.spi.scan.ScanPrioritizer; |
| import org.apache.accumulo.core.spi.scan.SimpleScanDispatcher; |
| import org.apache.hadoop.io.Text; |
| |
| /** |
| * This class hosts configuration methods that are shared between different types of scanners. |
| */ |
| public interface ScannerBase extends Iterable<Entry<Key,Value>>, AutoCloseable { |
| |
| /** |
| * Add a server-side scan iterator. |
| * |
| * @param cfg |
| * fully specified scan-time iterator, including all options for the iterator. Any |
| * changes to the iterator setting after this call are not propagated to the stored |
| * iterator. |
| * @throws IllegalArgumentException |
| * if the setting conflicts with existing iterators |
| */ |
| void addScanIterator(IteratorSetting cfg); |
| |
| /** |
| * Remove an iterator from the list of iterators. |
| * |
| * @param iteratorName |
| * nickname used for the iterator |
| */ |
| void removeScanIterator(String iteratorName); |
| |
| /** |
| * Update the options for an iterator. Note that this does <b>not</b> change the iterator options |
| * during a scan, it just replaces the given option on a configured iterator before a scan is |
| * started. |
| * |
| * @param iteratorName |
| * the name of the iterator to change |
| * @param key |
| * the name of the option |
| * @param value |
| * the new value for the named option |
| */ |
| void updateScanIteratorOption(String iteratorName, String key, String value); |
| |
| /** |
| * Adds a column family to the list of columns that will be fetched by this scanner. By default |
| * when no columns have been added the scanner fetches all columns. To fetch multiple column |
| * families call this function multiple times. |
| * |
| * <p> |
| * This can help limit which locality groups are read on the server side. |
| * |
| * <p> |
| * When used in conjunction with custom iterators, the set of column families fetched is passed to |
| * the top iterator's seek method. Custom iterators may change this set of column families when |
| * calling seek on their source. |
| * |
| * @param col |
| * the column family to be fetched |
| */ |
| void fetchColumnFamily(Text col); |
| |
| /** |
| * Adds a column to the list of columns that will be fetched by this scanner. The column is |
| * identified by family and qualifier. By default when no columns have been added the scanner |
| * fetches all columns. |
| * |
| * <p> |
| * <b>WARNING</b>. Using this method with custom iterators may have unexpected results. Iterators |
| * have control over which column families are fetched. However iterators have no control over |
| * which column qualifiers are fetched. When this method is called it activates a system iterator |
| * that only allows the requested family/qualifier pairs through. This low level filtering |
| * prevents custom iterators from requesting additional column families when calling seek. |
| * |
| * <p> |
| * For an example, assume fetchColumns(A, Q1) and fetchColumns(B,Q1) is called on a scanner and a |
| * custom iterator is configured. The families (A,B) will be passed to the seek method of the |
| * custom iterator. If the custom iterator seeks its source iterator using the families (A,B,C), |
| * it will never see any data from C because the system iterator filtering A:Q1 and B:Q1 will |
| * prevent the C family from getting through. ACCUMULO-3905 also has an example of the type of |
| * problem this method can cause. |
| * |
| * <p> |
| * tl;dr If using a custom iterator with a seek method that adds column families, then may want to |
| * avoid using this method. |
| * |
| * @param colFam |
| * the column family of the column to be fetched |
| * @param colQual |
| * the column qualifier of the column to be fetched |
| */ |
| void fetchColumn(Text colFam, Text colQual); |
| |
| /** |
| * Adds a column to the list of columns that will be fetch by this scanner. |
| * |
| * @param column |
| * the {@link Column} to fetch |
| * @since 1.7.0 |
| */ |
| void fetchColumn(Column column); |
| |
| /** |
| * Clears the columns to be fetched (useful for resetting the scanner for reuse). Once cleared, |
| * the scanner will fetch all columns. |
| */ |
| void clearColumns(); |
| |
| /** |
| * Clears scan iterators prior to returning a scanner to the pool. |
| */ |
| void clearScanIterators(); |
| |
| /** |
| * Returns an iterator over an accumulo table. This iterator uses the options that are currently |
| * set for its lifetime, so setting options will have no effect on existing iterators. |
| * |
| * <p> |
| * Keys returned by the iterator are not guaranteed to be in sorted order. |
| * |
| * @return an iterator over Key,Value pairs which meet the restrictions set on the scanner |
| */ |
| @Override |
| Iterator<Entry<Key,Value>> iterator(); |
| |
| /** |
| * This setting determines how long a scanner will automatically retry when a failure occurs. By |
| * default, a scanner will retry forever. |
| * |
| * <p> |
| * Setting the timeout to zero (with any time unit) or {@link Long#MAX_VALUE} (with |
| * {@link TimeUnit#MILLISECONDS}) means no timeout. |
| * |
| * @param timeOut |
| * the length of the timeout |
| * @param timeUnit |
| * the units of the timeout |
| * @since 1.5.0 |
| */ |
| void setTimeout(long timeOut, TimeUnit timeUnit); |
| |
| /** |
| * Returns the setting for how long a scanner will automatically retry when a failure occurs. |
| * |
| * @return the timeout configured for this scanner |
| * @since 1.5.0 |
| */ |
| long getTimeout(TimeUnit timeUnit); |
| |
| /** |
| * Closes any underlying connections on the scanner. This may invalidate any iterators derived |
| * from the Scanner, causing them to throw exceptions. |
| * |
| * @since 1.5.0 |
| */ |
| @Override |
| void close(); |
| |
| /** |
| * Returns the authorizations that have been set on the scanner |
| * |
| * @since 1.7.0 |
| * @return The authorizations set on the scanner instance |
| */ |
| Authorizations getAuthorizations(); |
| |
| /** |
| * Setting this will cause the scanner to read sample data, as long as that sample data was |
| * generated with the given configuration. By default this is not set and all data is read. |
| * |
| * <p> |
| * One way to use this method is as follows, where the sampler configuration is obtained from the |
| * table configuration. Sample data can be generated in many different ways, so its important to |
| * verify the sample data configuration meets expectations. |
| * |
| * <pre> |
| * <code> |
| * // could cache this if creating many scanners to avoid RPCs. |
| * SamplerConfiguration samplerConfig = |
| * client.tableOperations().getSamplerConfiguration(table); |
| * // verify table's sample data is generated in an expected way before using |
| * userCode.verifySamplerConfig(samplerConfig); |
| * scanner.setSamplerConfiguration(samplerConfig); |
| * </code> |
| * </pre> |
| * |
| * <p> |
| * Of course this is not the only way to obtain a {@link SamplerConfiguration}, it could be a |
| * constant, configuration, etc. |
| * |
| * <p> |
| * If sample data is not present or sample data was generated with a different configuration, then |
| * the scanner iterator will throw a {@link SampleNotPresentException}. Also if a table's sampler |
| * configuration is changed while a scanner is iterating over a table, a |
| * {@link SampleNotPresentException} may be thrown. |
| * |
| * @since 1.8.0 |
| */ |
| void setSamplerConfiguration(SamplerConfiguration samplerConfig); |
| |
| /** |
| * @return currently set sampler configuration. Returns null if no sampler configuration is set. |
| * @since 1.8.0 |
| */ |
| SamplerConfiguration getSamplerConfiguration(); |
| |
| /** |
| * Clears sampler configuration making a scanner read all data. After calling this, |
| * {@link #getSamplerConfiguration()} should return null. |
| * |
| * @since 1.8.0 |
| */ |
| void clearSamplerConfiguration(); |
| |
| /** |
| * This setting determines how long a scanner will wait to fill the returned batch. By default, a |
| * scanner wait until the batch is full. |
| * |
| * <p> |
| * Setting the timeout to zero (with any time unit) or {@link Long#MAX_VALUE} (with |
| * {@link TimeUnit#MILLISECONDS}) means no timeout. |
| * |
| * @param timeOut |
| * the length of the timeout |
| * @param timeUnit |
| * the units of the timeout |
| * @since 1.8.0 |
| */ |
| void setBatchTimeout(long timeOut, TimeUnit timeUnit); |
| |
| /** |
| * Returns the timeout to fill a batch in the given TimeUnit. |
| * |
| * @return the batch timeout configured for this scanner |
| * @since 1.8.0 |
| */ |
| long getBatchTimeout(TimeUnit timeUnit); |
| |
| /** |
| * Sets the name of the classloader context on this scanner. See the administration chapter of the |
| * user manual for details on how to configure and use classloader contexts. |
| * |
| * @param classLoaderContext |
| * name of the classloader context |
| * @throws NullPointerException |
| * if context is null |
| * @since 1.8.0 |
| */ |
| void setClassLoaderContext(String classLoaderContext); |
| |
| /** |
| * Clears the current classloader context set on this scanner |
| * |
| * @since 1.8.0 |
| */ |
| void clearClassLoaderContext(); |
| |
| /** |
| * Returns the name of the current classloader context set on this scanner |
| * |
| * @return name of the current context |
| * @since 1.8.0 |
| */ |
| String getClassLoaderContext(); |
| |
| /** |
| * Set hints for the configured {@link ScanPrioritizer} and {@link ScanDispatcher}. These hints |
| * are available on the server side via {@link ScanInfo#getExecutionHints()} Depending on the |
| * configuration, these hints may be ignored. Hints will never impact what data is returned by a |
| * scan, only how quickly it is returned. |
| * |
| * <p> |
| * The default configuration for Accumulo will ignore hints. See {@link HintScanPrioritizer} and |
| * {@link SimpleScanDispatcher} for examples of classes that can react to hints. |
| * |
| * @since 2.0.0 |
| */ |
| void setExecutionHints(Map<String,String> hints); |
| } |